iwla

iwla Commit Details

Date:2014-11-27 12:34:42 (6 years 7 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:9fbc5448bc8262bf41346cb8b2b276dcabe7546a
Parents: dd8349ab082bca7acc7e4d722a0df60b46367f8c
Message:Add conf_requires. Load plugins in order

Changes:
Mconf.py (1 diff)
Mdefault_conf.py (1 diff)
Miplugin.py (5 diffs)
Miwla.py (4 diffs)
Mplugins/post_analysis/referers.py (2 diffs)
Mplugins/post_analysis/reverse_dns.py (1 diff)
Mplugins/pre_analysis/page_to_hit.py (3 diffs)

File differences

conf.py
1616
1717
1818
19
19
2020
21
21
2222
2323
24
24
2525
26
26
DISPLAY_ROOT = './output/'
pre_analysis_hooks = ['page_to_hit', 'robots']
post_analysis_hooks = ['referers', 'top_pages']
post_analysis_hooks = ['referers', 'top_pages', 'top_downloads']
# post_analysis_hooks = ['top_visitors', 'reverse_dns']
display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages']
display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads']
reverse_dns_timeout = 0.2
page_to_hit_conf = [r'^.+/logo/$']
page_to_hit_conf = [r'^.+/logo[/]?$', r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$']
count_hit_only_visitors = False
count_hit_only_visitors = True
default_conf.py
2020
2121
2222
23
23
2424
2525
2626
27
28
29
post_analysis_hooks = []
display_hooks = []
pages_extensions = ['/', 'html', 'xhtml', 'py', 'pl', 'rb', 'php']
pages_extensions = ['/', 'htm', 'html', 'xhtml', 'py', 'pl', 'rb', 'php']
viewed_http_codes = [200, 304]
count_hit_only_visitors = True
multimedia_files = ['png', 'jpg', 'jpeg', 'gif', 'ico',
'css', 'js']
iplugin.py
77
88
99
10
1011
1112
1213
......
2223
2324
2425
26
27
28
2529
2630
2731
......
3337
3438
3539
36
37
40
41
3842
3943
4044
......
5761
5862
5963
64
65
66
67
68
69
70
71
72
73
74
6075
6176
62
63
64
65
66
67
68
69
70
77
78
79
80
81
7182
72
83
84
85
86
7387
7488
7589
......
7892
7993
8094
81
95
8296
8397
8498
def __init__(self, iwla):
self.iwla = iwla
self.requires = []
self.conf_requires = []
self.API_VERSION = 1
self.ANALYSIS_CLASS = 'HTTP'
def getRequirements(self):
return self.requires
def getConfRequirements(self):
return self.conf_requires
def load(self):
return True
print "==> Preload plugins"
for root in plugins.keys():
for plugin_filename in plugins[root]:
for (root, plugins_filenames) in plugins:
for plugin_filename in plugins_filenames:
plugin_path = root + '.' + plugin_filename
try:
mod = importlib.import_module(plugin_path)
#print 'Load plugin %s' % (plugin_name)
conf_requirements = plugin.getConfRequirements()
requirement_validated = True
for r in conf_requirements:
conf_value = iwla.getConfValue(r, None)
if conf_value is None:
print '\'%s\' conf value required for %s' % (r, plugin_path)
requirement_validated = False
break
if not requirement_validated: continue
requirements = plugin.getRequirements()
if requirements:
requirement_validated = False
for r in requirements:
for (_,p) in cache_plugins.items():
if p.__class__.__name__ == r:
requirement_validated = True
break
if not requirement_validated:
print 'Missing requirements for plugin %s' % (plugin_path)
requirement_validated = False
for r in requirements:
for (_,p) in cache_plugins.items():
if p.__class__.__name__ == r:
requirement_validated = True
break
if not requirement_validated: continue
if not requirement_validated:
print 'Missing requirements \'%s\' for plugin %s' % (r, plugin_path)
break
if requirements and not requirement_validated: continue
if not plugin.load():
print 'Plugin %s load failed' % (plugin_path)
print '\tRegister %s' % (plugin_path)
cache_plugins[plugin_path] = plugin
except Exception as e:
print 'Error loading \'%s\' => %s' % (plugin_path, e)
print 'Error loading %s => %s' % (plugin_path, e)
traceback.print_exc()
return cache_plugins
iwla.py
3535
3636
3737
38
39
40
38
39
40
4141
42
42
4343
4444
4545
......
7777
7878
7979
80
80
8181
8282
8383
......
115115
116116
117117
118
119
120
121
122
123
118
119
120
121
122
123
124
125
126
124127
125128
126129
......
143146
144147
145148
146
147
148
149
149150
150151
151152
self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
self.log_re = re.compile(self.log_format_extracted)
self.uri_re = re.compile(r'(?P<extract_uri>[^\?]+)(\?(?P<extract_parameters>.+))?')
self.plugins = {conf.PRE_HOOK_DIRECTORY : conf.pre_analysis_hooks,
conf.POST_HOOK_DIRECTORY : conf.post_analysis_hooks,
conf.DISPLAY_HOOK_DIRECTORY : conf.display_hooks}
self.plugins = [(conf.PRE_HOOK_DIRECTORY , conf.pre_analysis_hooks),
(conf.POST_HOOK_DIRECTORY , conf.post_analysis_hooks),
(conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)]
def getConfValue(self, key, default):
def getConfValue(self, key, default=None):
if not key in dir(conf):
return default
else:
def isValidForCurrentAnalysis(self, request):
cur_time = self.meta_infos['start_analysis_time']
return (time.mktime(cur_time) < time.mktime(request['time_decoded']))
return (time.mktime(cur_time) <= time.mktime(request['time_decoded']))
def _clearMeta(self):
self.meta_infos = {
return pickle.load(f)
return None
def _callPlugins(self, root, *args):
print '==> Call plugins (%s)' % root
for p in self.plugins[root]:
print '\t%s' % (p)
mod = self.cache_plugins[root + '.' + p]
mod.hook(*args)
def _callPlugins(self, target_root, *args):
print '==> Call plugins (%s)' % target_root
for (root, plugins) in self.plugins:
if root != target_root: continue
for p in plugins:
mod = self.cache_plugins.get(root + '.' + p, None)
if mod:
print '\t%s' % (p)
mod.hook(*args)
def isPage(self, request):
for e in conf.pages_extensions:
request = hit['extract_request']
if 'extract_uri' in request.keys():
uri = request['extract_uri'] = request['http_uri']
uri = request['extract_uri']
uri = request.get('extract_uri', request['http_uri'])
hit['is_page'] = self.isPage(uri)
plugins/post_analysis/referers.py
1010
1111
1212
13
1314
1415
1516
......
2223
2324
2425
25
26
2627
2728
2829
def __init__(self, iwla):
super(IWLAPostAnalysisReferers, self).__init__(iwla)
self.API_VERSION = 1
self.conf_requires = ['domain_name']
def _getSearchEngine(self, hashid):
for (k, e) in self.search_engines.items():
domain_name = self.iwla.getConfValue('domain_name', '')
if not domain_name:
print 'domain_name required in conf'
print 'domain_name must not be empty !'
return False
self.own_domain_re = re.compile(r'.*%s.*' % (domain_name))
plugins/post_analysis/reverse_dns.py
44
55
66
7
8
79
810
911
1012
1113
12
14
15
1316
1417
1518
from iplugin import IPlugin
class IWLAPostAnalysisReverseDNS(IPlugin):
DEFAULT_DNS_TIMEOUT = 0.5
def __init__(self, iwla):
super(IWLAPostAnalysisReverseDNS, self).__init__(iwla)
self.API_VERSION = 1
def load(self):
timeout = self.iwla.getConfValue('reverse_dns_timeout', 0.5)
timeout = self.iwla.getConfValue('reverse_dns_timeout',
IWLAPostAnalysisReverseDNS.DEFAULT_DNS_TIMEOUT)
socket.setdefaulttimeout(timeout)
return True
plugins/pre_analysis/page_to_hit.py
1010
1111
1212
13
1314
1415
1516
......
2122
2223
2324
24
25
26
2527
2628
2729
......
3133
3234
3335
36
3437
3538
3639
def __init__(self, iwla):
super(IWLAPreAnalysisPageToHit, self).__init__(iwla)
self.API_VERSION = 1
self.conf_requires = ['viewed_http_codes']
def load(self):
# Remove logo from indefero
def hook(self):
hits = self.iwla.getCurrentVisists()
viewed_http_codes = self.iwla.getConfValue('viewed_http_codes', [200, 304])
viewed_http_codes = self.iwla.getConfValue('viewed_http_codes')
for (k, super_hit) in hits.items():
if super_hit['robot']: continue
uri = request['extract_request']['extract_uri']
for regexp in self.regexps:
if regexp.match(uri):
#print '%s is an hit' % uri
request['is_page'] = False
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1

Archive Download the corresponding diff file

Branches

Tags