iwla

iwla Git Source Tree

Root/plugins/pre_analysis/page_to_hit.py

1import re
2
3from iwla import IWLA
4from iplugin import IPlugin
5
6#
7# Pre analysis hook
8# Change page into hit and hit into page into statistics
9#
10# Plugin requirements :
11# None
12#
13# Conf values needed :
14# page_to_hit_conf*
15# hit_to_page_conf*
16#
17# Output files :
18# None
19#
20# Statistics creation :
21# None
22#
23# Statistics update :
24# visits :
25# remote_addr =>
26# is_page
27#
28# Statistics deletion :
29# None
30#
31
32class IWLAPreAnalysisPageToHit(IPlugin):
33
34 def __init__(self, iwla):
35 super(IWLAPreAnalysisPageToHit, self).__init__(iwla)
36 self.API_VERSION = 1
37
38 def load(self):
39 # Page to hit
40 self.ph_regexps = self.iwla.getConfValue('page_to_hit_conf', [])
41 if not self.ph_regexps: return False
42 self.ph_regexps = map(lambda(r): re.compile(r), self.ph_regexps)
43
44 # Hit to page
45 self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', [])
46 if not self.hp_regexps: return False
47 self.hp_regexps = map(lambda(r): re.compile(r), self.hp_regexps)
48
49 return True
50
51 def hook(self):
52 hits = self.iwla.getCurrentVisists()
53
54 for (k, super_hit) in hits.items():
55 if super_hit['robot']: continue
56
57 for request in super_hit['requests']:
58 if not self.iwla.isValidForCurrentAnalysis(request) or\
59 not self.iwla.hasBeenViewed(request):
60 continue
61
62 uri = request['extract_request']['extract_uri']
63
64 if request['is_page']:
65 # Page to hit
66 for regexp in self.ph_regexps:
67 if regexp.match(uri):
68 #print '%s is a hit' % (uri )
69 request['is_page'] = False
70 super_hit['viewed_pages'] -= 1
71 super_hit['viewed_hits'] += 1
72 break
73 else:
74 # Hit to page
75 for regexp in self.hp_regexps:
76 if regexp.match(uri):
77 #print '%s is a page' % (uri )
78 request['is_page'] = True
79 super_hit['viewed_pages'] += 1
80 super_hit['viewed_hits'] -= 1
81 break

Archive Download this file

Branches

Tags