iwla

iwla Commit Details

Date:2014-11-24 21:37:37 (6 years 8 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:549c0e5d973d1cd1ed2eab70ade49fe59feb742d
Parents: 21a95cc2fab7a3ff5d48ee09ccaef59f8f7e0ff6
Message:Update conf management

Changes:
Mconf.py (2 diffs)
Miplugin.py (3 diffs)
Miwla.py (12 diffs)
Mplugins/display/top_visitors.py (1 diff)
Mplugins/post_analysis/reverse_dns.py (1 diff)
Mplugins/post_analysis/top_visitors.py (1 diff)
Mplugins/pre_analysis/robots.py (1 diff)
Mplugins/pre_analysis/soutade.py (1 diff)

File differences

conf.py
11
22
3
4
3
4
55
66
77
......
1515
1616
1717
18
1819
1920
2021
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
'"$request" $status $body_bytes_sent ' +\
'"$http_referer" "$http_user_agent"';
'"$request" $status $body_bytes_sent ' +\
'"$http_referer" "$http_user_agent"';
#09/Nov/2014:06:35:16 +0100
time_format = '%d/%b/%Y:%H:%M:%S +0100'
post_analysis_hooks = ['top_visitors', 'reverse_dns']
display_hooks = ['top_visitors']
reverse_dns_timeout = 0.2
# pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py']
# post_analysis_hooks = ['top_visitors.py']
# display_hooks = ['top_visitors.py']
iplugin.py
22
33
44
5
6
7
8
9
510
611
7
12
813
14
915
1016
1117
......
1925
2026
2127
28
29
30
31
32
33
2234
2335
2436
......
4658
4759
4860
49
61
5062
5163
5264
import inspect
import traceback
import default_conf as conf
import conf as _
conf.__dict__.update(_.__dict__)
del _
class IPlugin(object):
def __init__(self, iwla):
def __init__(self, iwla, conf):
self.iwla = iwla
self.conf = conf
self.requires = []
self.API_VERSION = 1
self.ANALYSIS_CLASS = 'HTTP'
return True
def getConfValue(self, key, default):
if not key in dir(self.conf):
return default
else:
return self.conf.__dict__[key]
def getRequirements(self):
return self.requires
print 'No plugin defined in %s' % (plugin_path)
continue
plugin = classes[0](iwla)
plugin = classes[0](iwla, conf)
plugin_name = plugin.__class__.__name__
if not plugin.isValid(iwla.ANALYSIS_CLASS, iwla.API_VERSION):
iwla.py
1010
1111
1212
13
14
15
16
17
1318
1419
1520
16
17
18
1921
2022
2123
......
3133
3234
3335
34
36
3537
3638
3739
3840
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
41
42
43
6444
6545
6646
......
9777
9878
9979
100
80
10181
10282
10383
......
10585
10686
10787
108
88
10989
11090
11191
......
130110
131111
132112
133
113
134114
135115
136116
......
162142
163143
164144
165
145
166146
167147
168148
......
211191
212192
213193
214
194
215195
216196
217197
......
261241
262242
263243
264
265
244
245
266246
267247
268248
......
308288
309289
310290
311
291
312292
313293
314294
......
323303
324304
325305
326
306
327307
328308
329309
......
382362
383363
384364
385
365
386366
387367
388368
389
369
390370
391371
392372
393373
394374
395
375
396376
397377
398378
......
408388
409389
410390
411
391
412392
413393
414394
import gzip
import importlib
import default_conf as conf
import conf as _
conf.__dict__.update(_.__dict__)
del _
from iplugin import *
from display import *
from default_conf import *
from conf import *
class IWLA(object):
ANALYSIS_CLASS = 'HTTP'
self.display = DisplayHTMLBuild()
self.valid_visitors = None
self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', log_format)
self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', conf.log_format)
self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted)
self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
self.log_re = re.compile(self.log_format_extracted)
self.uri_re = re.compile(r'(?P<extract_uri>[^\?]*)[\?(?P<extract_parameters>.*)]?')
self.plugins = {PRE_HOOK_DIRECTORY : pre_analysis_hooks,
POST_HOOK_DIRECTORY : post_analysis_hooks,
DISPLAY_HOOK_DIRECTORY : display_hooks}
def _preloadPlugins(self):
self.cache_plugins = preloadPlugins(self.plugins, self)
return
ret = True
for root in self.plugins.keys():
for plugin_name in self.plugins[root]:
p = root + '.' + plugin_name
try:
self.cache_plugins[p] = importlib.import_module(p)
mod = self.cache_plugins[p]
infos = mod.get_plugins_infos()
if infos['class'] != IWLA.ANALYSIS_CLASS or \
IWLA.API_VERSION < infos['min_version'] or\
(infos['max_version'] != -1 and (IWLA.API_VERSION > infos['max_version'])):
del self.cache_plugins[p]
elif not mod.load():
del self.cache_plugins[p]
except Exception as e:
print 'Error loading \'%s\' => %s' % (p, e)
ret = False
return ret
self.plugins = {conf.PRE_HOOK_DIRECTORY : conf.pre_analysis_hooks,
conf.POST_HOOK_DIRECTORY : conf.post_analysis_hooks,
conf.DISPLAY_HOOK_DIRECTORY : conf.display_hooks}
def _clearVisits(self):
self.current_analysis = {
return self.display
def getDBFilename(self, time):
return (DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, DB_FILENAME)
return (conf.DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, conf.DB_FILENAME)
def _serialize(self, obj, filename):
base = os.path.dirname(filename)
os.makedirs(base)
# TODO : remove return
return
#return
with open(filename + '.tmp', 'wb+') as f:
pickle.dump(obj, f)
mod.hook(*args)
def isPage(self, request):
for e in pages_extensions:
for e in conf.pages_extensions:
if request.endswith(e):
return True
if status >= 300 and status < 400: return
if super_hit['robot'] or\
not status in viewed_http_codes:
not status in conf.viewed_http_codes:
page_key = 'not_viewed_pages'
hit_key = 'not_viewed_hits'
else:
return True
def _decodeTime(self, hit):
hit['time_decoded'] = time.strptime(hit['time_local'], time_format)
hit['time_decoded'] = time.strptime(hit['time_local'], conf.time_format)
def getDisplayIndex(self):
cur_time = self.meta_infos['last_time']
def _generateDisplay(self):
self._generateDisplayDaysStat()
self._callPlugins(DISPLAY_HOOK_DIRECTORY, self)
self.display.build(DISPLAY_ROOT)
self._callPlugins(conf.DISPLAY_HOOK_DIRECTORY, self)
self.display.build(conf.DISPLAY_ROOT)
def _generateStats(self, visits):
stats = {}
self.current_analysis['month_stats'] = stats
self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
self._callPlugins(POST_HOOK_DIRECTORY, self)
self._callPlugins(conf.POST_HOOK_DIRECTORY, self)
path = self.getDBFilename(cur_time)
if os.path.exists(path):
def _generateDayStats(self):
visits = self.current_analysis['visits']
self._callPlugins(PRE_HOOK_DIRECTORY, self)
self._callPlugins(conf.PRE_HOOK_DIRECTORY, self)
stats = self._generateStats(visits)
return True
def start(self):
self._preloadPlugins()
self.cache_plugins = preloadPlugins(self.plugins, self)
print '==> Analysing log'
self.meta_infos = self._deserialize(META_PATH) or self._clearMeta()
self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta()
if self.meta_infos['last_time']:
self.current_analysis = self._deserialize(self.getDBFilename(self.meta_infos['last_time'])) or self._clearVisits()
else:
self._clearVisits()
with open(analyzed_filename) as f:
with open(conf.analyzed_filename) as f:
for l in f:
# print "line " + l
if self.analyse_started:
self._generateDayStats()
self._generateMonthStats()
self._serialize(self.meta_infos, META_PATH)
self._serialize(self.meta_infos, conf.META_PATH)
else:
print '==> Analyse not started : nothing to do'
self._generateMonthStats()
plugins/display/top_visitors.py
55
66
77
8
9
8
9
1010
1111
1212
from display import *
class IWLADisplayTopVisitors(IPlugin):
def __init__(self, iwla):
super(IWLADisplayTopVisitors, self).__init__(iwla)
def __init__(self, iwla, conf):
super(IWLADisplayTopVisitors, self).__init__(iwla, conf)
self.API_VERSION = 1
self.requires = ['IWLAPostAnalysisTopVisitors']
plugins/post_analysis/reverse_dns.py
1
2
13
24
35
46
5
6
7
8
79
810
11
12
13
14
15
916
1017
1118
import socket
from iwla import IWLA
from iplugin import IPlugin
class IWLAPostAnalysisReverseDNS(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisReverseDNS, self).__init__(iwla)
def __init__(self, iwla, conf):
super(IWLAPostAnalysisReverseDNS, self).__init__(iwla, conf)
self.API_VERSION = 1
def load(self):
timeout = self.getConfValue('reverse_dns_timeout', 0.5)
socket.setdefaulttimeout(timeout)
return True
def hook(self, iwla):
hits = iwla.getValidVisitors()
for (k, hit) in hits.items():
plugins/post_analysis/top_visitors.py
22
33
44
5
6
5
6
77
88
99
from iplugin import IPlugin
class IWLAPostAnalysisTopVisitors(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisTopVisitors, self).__init__(iwla)
def __init__(self, iwla, conf):
super(IWLAPostAnalysisTopVisitors, self).__init__(iwla, conf)
self.API_VERSION = 1
def hook(self, iwla):
plugins/pre_analysis/robots.py
66
77
88
9
10
9
10
1111
1212
1313
from awstats_robots_data import awstats_robots
class IWLAPreAnalysisRobots(IPlugin):
def __init__(self, iwla):
super(IWLAPreAnalysisRobots, self).__init__(iwla)
def __init__(self, iwla, conf):
super(IWLAPreAnalysisRobots, self).__init__(iwla, conf)
self.API_VERSION = 1
def load(self):
plugins/pre_analysis/soutade.py
77
88
99
10
11
10
11
1212
1313
1414
class IWLAPreAnalysisSoutade(IPlugin):
def __init__(self, iwla):
super(IWLAPreAnalysisSoutade, self).__init__(iwla)
def __init__(self, iwla, conf):
super(IWLAPreAnalysisSoutade, self).__init__(iwla, conf)
self.API_VERSION = 1
def load(self):

Archive Download the corresponding diff file

Branches

Tags