iwla

iwla Commit Details

Date:2014-11-20 14:09:01 (6 years 8 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:f593cc78d93d02e080dcd3f71b2993eb4f8dc96b
Parents: 7bd5a42962750cee68c672689fc76cf0fd45b75c
Message:Basically seems to work

Changes:
Mhooks/pre_analysis/H001_soutade.py (1 diff)
Mhooks/pre_analysis/H002_robot.py (1 diff)
Miwla.py (2 diffs)

File differences

hooks/pre_analysis/H001_soutade.py
1313
1414
1515
16
17
1618
1719
1820
for p in super_hit['pages']:
if not p['is_page']: continue
if int(p['status']) != 200: continue
if logo_re.match(p['extract_request']['extract_uri']):
p['is_page'] = False
if super_hit['viewed_pages']:
hooks/pre_analysis/H002_robot.py
1919
2020
2121
22
22
23
24
25
2326
2427
2528
if not super_hit['viewed_hits']:
super_hit['robot'] = 1
continue
elif not super_hit['viewed_pages']:
# Hit only
super_hit['hit_only'] = 1
for hit in super_hit['pages']:
# 3) /robots.txt read
if hit['extract_request']['http_uri'] == '/robots.txt':
iwla.py
161161
162162
163163
164
164165
165166
166167
......
276277
277278
278279
279
280
281
280282
281283
282284
super_hit['last_access'] = meta_visit['last_time']
super_hit['pages'] = [];
super_hit['robot'] = isRobot(hit);
super_hit['hit_only'] = 0;
appendHit(hit)
def isRobot(hit):
print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits'])
stats['nb_visitors'] += 1
if not super_hit['hit_only']:
stats['nb_visitors'] += 1
stats['viewed_bandwidth'] += super_hit['bandwith']
stats['viewed_pages'] += super_hit['viewed_pages']
stats['viewed_hits'] += super_hit['viewed_hits']

Archive Download the corresponding diff file

Branches

Tags