Change merge_one_hit_only_feeds_parsers by merge_feeds_parsers and set it to False by default

This commit is contained in:
Gregory Soutade 2022-11-12 19:00:14 +01:00
parent c9bc21a506
commit ad01b48898
1 changed files with 23 additions and 15 deletions

View File

@ -27,7 +27,7 @@ from iplugin import IPlugin
Post analysis hook
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
If there is ony one hit per day to a feed, merge feeds parsers with the same user agent
If merge_feeds_parsers is set to True, merge feeds parsers with the same user agent
as it must be the same person with a different IP address.
Plugin requirements :
@ -36,7 +36,7 @@ Plugin requirements :
Conf values needed :
feeds
feeds_referers*
merge_one_hit_only_feeds_parsers*
merge_feeds_parsers*
Output files :
None
@ -66,7 +66,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
def load(self):
feeds = self.iwla.getConfValue('feeds', [])
feeds_referers = self.iwla.getConfValue('feeds_referers', [])
self.merge_one_hit_only_feeds_parsers = self.iwla.getConfValue('merge_one_hit_only_feeds_parsers', True)
self.merge_feeds_parsers = self.iwla.getConfValue('merge_feeds_parsers', False)
if feeds is None: return False
@ -88,15 +88,21 @@ class IWLAPostAnalysisFeeds(IPlugin):
return True
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
if isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1:
def mergeFeedsParsers(self, isFeedParser, one_hit_only, hit):
# One hit only match
if isFeedParser: #isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1:
user_agent = hit['requests'][0]['http_user_agent'].lower()
# First time, register into dict
if one_hit_only.get(user_agent, None) is None:
# Merged
isFeedParser = self.MERGED_FEED_PARSER
one_hit_only[user_agent] = (hit)
one_hit_only[user_agent] = hit
else:
isFeedParser = self.NOT_A_FEED_PARSER
# Next time
# Current must be ignored
hit['feed_parser'] = self.NOT_A_FEED_PARSER
# Previous matched hit must be set as merged
isFeedParser = self.MERGED_FEED_PARSER
hit = one_hit_only[user_agent]
hit['feed_parser'] = isFeedParser
def hook(self):
@ -105,9 +111,11 @@ class IWLAPostAnalysisFeeds(IPlugin):
for hit in hits.values():
isFeedParser = hit.get('feed_parser', None)
if isFeedParser == self.FEED_PARSER and\
self.merge_one_hit_only_feeds_parsers:
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
# Register already tagged feed parser in one_hit_only
if self.merge_feeds_parsers and\
not isFeedParser in (None, self.BAD_FEED_PARSER):
self.mergeFeedsParsers(isFeedParser, one_hit_only, hit)
continue
if isFeedParser:
if hit['feed_parser'] == self.BAD_FEED_PARSER: continue
@ -118,8 +126,8 @@ class IWLAPostAnalysisFeeds(IPlugin):
for r in self.bad_feeds_re:
if r.match(addr):
hit['feed_parser'] = self.BAD_FEED_PARSER
return
return
break
continue
request = hit['requests'][0]
isFeedParser = self.NOT_A_FEED_PARSER
@ -148,7 +156,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
isFeedParser = self.FEED_PARSER
break
if self.merge_one_hit_only_feeds_parsers:
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
if self.merge_feeds_parsers:
self.mergeFeedsParsers(isFeedParser, one_hit_only, hit)
else:
hit['feed_parser'] = isFeedParser