iwla

iwla Commit Details

Date:2021-04-01 08:22:52 (3 months 28 days ago)
Author:Grégory Soutadé
Branch:dev
Commit:8697be26ad9573132654785bdea37c8552d98fdc
Parents: 0602d3ce4b0b8299722c50be2628821dfc7b3aaa
Message:Update feed detector : check 'feed', 'rss' or 'atom' string in user agent

Changes:
Mplugins/post_analysis/feeds.py (2 diffs)

File differences

plugins/post_analysis/feeds.py
7575
7676
7777
78
79
80
81
82
7883
7984
8085
......
120125
121126
122127
128
129
130
131
132
133
134
135
123136
124137
125138
self.bad_feeds_re = []
self.bad_feeds_re.append(re.compile(r'.*crawl.*'))
self.user_agents_re = []
self.user_agents_re.append(re.compile(r'.*rss.*'))
self.user_agents_re.append(re.compile(r'.*atom.*'))
self.user_agents_re.append(re.compile(r'.*feed.*'))
return True
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
if hit['not_viewed_pages'][0]:
isFeedParser = self.NOT_A_FEED_PARSER
break
if isFeedParser == self.NOT_A_FEED_PARSER:
user_agent = hit['requests'][0]['http_user_agent'].lower()
for regexp in self.user_agents_re:
if regexp.match(user_agent):
isFeedParser = self.FEED_PARSER
break
if self.merge_one_hit_only_feeds_parsers:
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
else:

Archive Download the corresponding diff file

Branches

Tags