iwla

iwla Commit Details

Date:2015-02-18 20:32:04 (6 years 5 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:cfbd35d818bd495076ce11b416dd84bbb2396803
Parents: efb5ddf7617130842001bcefc9762a395d806508
Message:Merge one hit only parsers in feeds parsers detection

Changes:
Mplugins/display/feeds.py (2 diffs)
Mplugins/post_analysis/feeds.py (2 diffs)

File differences

plugins/display/feeds.py
6767
6868
6969
70
70
7171
7272
7373
......
8181
8282
8383
84
84
85
86
87
8588
8689
8790
# All in a page
if self.create_all_feeds_page:
title = createCurTitle(self.iwla, u'All Feeds parsers')
title = createCurTitle(self.iwla, self.iwla._(u'All Feeds parsers'))
filename = 'all_feeds.html'
path = self.iwla.getCurDisplayPath(filename)
display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False)
if display_visitor_ip and\
super_hit.get('dns_name_replaced', False):
address = '%s [%s]' % (address, super_hit['remote_ip'])
table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']])
if super_hit['robot']:
table.appendRow([address, super_hit['not_viewed_pages'], super_hit['not_viewed_hits']])
else:
table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']])
page.appendBlock(table)
display.addPage(page)
plugins/post_analysis/feeds.py
2727
2828
2929
30
31
3032
3133
3234
......
6466
6567
6668
69
70
71
72
73
74
75
76
77
6778
6879
80
6981
70
82
83
84
85
86
87
88
7189
7290
7391
7492
93
7594
7695
77
78
96
97
7998
80
81
99
Post analysis hook
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
If there is ony one hit per day to a feed, merge feeds parsers with the same user agent
as it must be the same person with a different IP address.
Plugin requirements :
None
return True
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
if isFeedParser and (hit['viewed_hits'] + hit['not_viewed_hits']) == 1:
user_agent = hit['requests'][0]['http_user_agent'].lower()
if one_hit_only.get(user_agent, None) is None:
one_hit_only[user_agent] = (hit)
else:
isFeedParser = False
hit['feed_parser'] = isFeedParser
def hook(self):
hits = self.iwla.getCurrentVisists()
one_hit_only = {}
for hit in hits.values():
if not hit.get('feed_parser', None) is None: continue
isFeedParser = hit.get('feed_parser', None)
if isFeedParser == True:
self.mergeOneHitOnlyFeedsParsers(one_hit_only, hit)
if not isFeedParser is None: continue
isFeedParser = False
uri = hit['requests'][0]['extract_request']['extract_uri'].lower()
for regexp in self.feeds_re:
if regexp.match(uri):
isFeedParser = True
# Robot that views pages -> bot
if hit['robot']:
if hit['viewed_pages']: continue
isFeedParser = True
if hit['viewed_pages']:
isFeedParser = False
break
hit['feed_parser'] = isFeedParser
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)

Archive Download the corresponding diff file

Branches

Tags