Remove crawler from feed parsers

This commit is contained in:
Grégory Soutadé 2016-08-20 13:08:02 +02:00
parent 10d087ad70
commit e805e59c10
3 changed files with 19 additions and 3 deletions

View File

@ -76,7 +76,9 @@ class IWLADisplayFeeds(IPlugin):
table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'All feeds parsers'), [self.iwla._(u'Host'), self.iwla._(u'Pages'), self.iwla._(u'Hits')]) table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'All feeds parsers'), [self.iwla._(u'Host'), self.iwla._(u'Pages'), self.iwla._(u'Hits')])
table.setColsCSSClass(['', 'iwla_page', 'iwla_hit']) table.setColsCSSClass(['', 'iwla_page', 'iwla_hit'])
for super_hit in hits.values(): for super_hit in hits.values():
if not super_hit['feed_parser']: continue if not super_hit.get('feed_parser', False): continue
if super_hit['feed_parser'] == IWLAPostAnalysisFeeds.BAD_FEED_PARSER:
continue
nb_feeds_parsers += 1 nb_feeds_parsers += 1
address = super_hit['remote_addr'] address = super_hit['remote_addr']
if display_visitor_ip and\ if display_visitor_ip and\

View File

@ -55,6 +55,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
NOT_A_FEED_PARSER = 0 NOT_A_FEED_PARSER = 0
FEED_PARSER = 1 FEED_PARSER = 1
MERGED_FEED_PARSER = 2 MERGED_FEED_PARSER = 2
BAD_FEED_PARSER = 3
def __init__(self, iwla): def __init__(self, iwla):
super(IWLAPostAnalysisFeeds, self).__init__(iwla) super(IWLAPostAnalysisFeeds, self).__init__(iwla)
@ -71,6 +72,9 @@ class IWLAPostAnalysisFeeds(IPlugin):
for f in feeds: for f in feeds:
self.feeds_re.append(re.compile(r'.*%s.*' % (f))) self.feeds_re.append(re.compile(r'.*%s.*' % (f)))
self.bad_feeds_re = []
self.bad_feeds_re.append(re.compile(r'.*crawl.*'))
return True return True
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit): def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
@ -94,7 +98,17 @@ class IWLAPostAnalysisFeeds(IPlugin):
self.merge_one_hit_only_feeds_parsers: self.merge_one_hit_only_feeds_parsers:
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
if not isFeedParser is None: continue if isFeedParser:
if hit['feed_parser'] == self.BAD_FEED_PARSER: continue
if not hit.get('feed_name_analysed', False) and\
hit.get('dns_name_replaced', False):
hit['feed_name_analysed'] = True
addr = hit.get('remote_addr', None)
for r in self.bad_feeds_re:
if r.match(addr):
hit['feed_parser'] = self.BAD_FEED_PARSER
return
return
isFeedParser = self.NOT_A_FEED_PARSER isFeedParser = self.NOT_A_FEED_PARSER
uri = hit['requests'][0]['extract_request']['extract_uri'].lower() uri = hit['requests'][0]['extract_request']['extract_uri'].lower()

View File

@ -67,7 +67,7 @@ class IWLAPostAnalysisReverseDNS(IPlugin):
hits = self.iwla.getCurrentVisits() hits = self.iwla.getCurrentVisits()
for (k, hit) in hits.items(): for (k, hit) in hits.items():
if hit.get('dns_analysed', False): continue if hit.get('dns_analysed', False): continue
if not hit['feed_parser'] and\ if not hit.get('feed_parser', False) and\
not self.iwla.isValidVisitor(hit): not self.iwla.isValidVisitor(hit):
continue continue
try: try: