iwla

iwla Commit Details

Date:2016-08-20 13:08:02 (4 years 11 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:e805e59c101b61797d472846ff352a3e68a5739b
Parents: 10d087ad70e90ac3e8ebc38b02888b2bf31a3e36
Message:Remove crawler from feed parsers

Changes:
Mplugins/display/feeds.py (1 diff)
Mplugins/post_analysis/feeds.py (3 diffs)
Mplugins/post_analysis/reverse_dns.py (1 diff)

File differences

plugins/display/feeds.py
7676
7777
7878
79
79
80
81
8082
8183
8284
table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'All feeds parsers'), [self.iwla._(u'Host'), self.iwla._(u'Pages'), self.iwla._(u'Hits')])
table.setColsCSSClass(['', 'iwla_page', 'iwla_hit'])
for super_hit in hits.values():
if not super_hit['feed_parser']: continue
if not super_hit.get('feed_parser', False): continue
if super_hit['feed_parser'] == IWLAPostAnalysisFeeds.BAD_FEED_PARSER:
continue
nb_feeds_parsers += 1
address = super_hit['remote_addr']
if display_visitor_ip and\
plugins/post_analysis/feeds.py
5555
5656
5757
58
5859
5960
6061
......
7172
7273
7374
75
76
77
7478
7579
7680
......
9498
9599
96100
97
101
102
103
104
105
106
107
108
109
110
111
98112
99113
100114
NOT_A_FEED_PARSER = 0
FEED_PARSER = 1
MERGED_FEED_PARSER = 2
BAD_FEED_PARSER = 3
def __init__(self, iwla):
super(IWLAPostAnalysisFeeds, self).__init__(iwla)
for f in feeds:
self.feeds_re.append(re.compile(r'.*%s.*' % (f)))
self.bad_feeds_re = []
self.bad_feeds_re.append(re.compile(r'.*crawl.*'))
return True
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
self.merge_one_hit_only_feeds_parsers:
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
if not isFeedParser is None: continue
if isFeedParser:
if hit['feed_parser'] == self.BAD_FEED_PARSER: continue
if not hit.get('feed_name_analysed', False) and\
hit.get('dns_name_replaced', False):
hit['feed_name_analysed'] = True
addr = hit.get('remote_addr', None)
for r in self.bad_feeds_re:
if r.match(addr):
hit['feed_parser'] = self.BAD_FEED_PARSER
return
return
isFeedParser = self.NOT_A_FEED_PARSER
uri = hit['requests'][0]['extract_request']['extract_uri'].lower()
plugins/post_analysis/reverse_dns.py
6767
6868
6969
70
70
7171
7272
7373
hits = self.iwla.getCurrentVisits()
for (k, hit) in hits.items():
if hit.get('dns_analysed', False): continue
if not hit['feed_parser'] and\
if not hit.get('feed_parser', False) and\
not self.iwla.isValidVisitor(hit):
continue
try:

Archive Download the corresponding diff file

Branches

Tags