iwla

iwla Commit Details

Date:2015-01-11 18:06:44 (6 years 6 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:4c74a14037f69c14188d239ee7e90ab9f2429825
Parents: 00ad08a201a2f45666e02af7edaa6a606cc3a4fb
Message:Filter robot with *bot* and *crawl* re

Changes:
Mplugins/display/referers.py (1 diff)
Mplugins/pre_analysis/robots.py (2 diffs)

File differences

plugins/display/referers.py
190190
191191
192192
193
193
194194
195195
196196
# All key phrases in a file
if self.create_all_key_phrases:
title = createCurTitle(self.iwla, u'All Key Phrases')
title = createCurTitle(self.iwla, self.iwla._(u'All Key Phrases'))
filename = 'key_phrases.html'
path = self.iwla.getCurDisplayPath(filename)
plugins/pre_analysis/robots.py
5959
6060
6161
62
62
63
6364
6465
6566
......
7273
7374
7475
75
76
77
78
79
80
7681
7782
7883
def load(self):
self.awstats_robots = map(lambda (x) : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots)
self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE)
self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE)
return True
# Basic rule to detect robots
referers = 0
first_page = super_hit['requests'][0]
if not self.iwla.isValidForCurrentAnalysis(first_page): continue
if self.robot_re.match(first_page['http_user_agent']) or\
self.crawl_re.match(first_page['http_user_agent']):
super_hit['robot'] = 1
continue
for r in self.awstats_robots:
if r.match(first_page['http_user_agent']):

Archive Download the corresponding diff file

Branches

Tags