iwla

iwla Commit Details

Date:2014-11-21 14:46:12 (6 years 8 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:c3c201fda1ff70981d8f95f0c1651bc7df475598
Parents: 7dada493abd0068bac180572621b43ddcbfd2f4b
Message:Start using classes

Changes:
Miwla.py (2 diffs)

File differences

iwla.py
1010
1111
1212
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
13
2814
2915
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
107102
108103
109104
......
112107
113108
114109
115
116
110
111
112
113
114
115
117116
118117
119
120
121
118
119
120
121
122
123
122124
123
124
125
126
127
128
125
126
127
128
129129
130
131
132
133
130
134131
135
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
136244
137
138
245
246
247
248
139249
140
141
142
250
251
252
253
143254
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
255
256
257
258
259
260
261
262
205263
206
207
208
209
264
265
266
267
268
210269
211
212
270
213271
214
272
273
274
275
276
215277
216
217
218
278
279
280
281
219282
220
283
221284
222
223
224
225
226
227
228
285
286
229287
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
from display import *
# Default configuration
DB_ROOT = './output/'
DISPLAY_ROOT = './output/'
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
'"$request" $status $body_bytes_sent ' +\
'"$http_referer" "$http_user_agent"'
time_format = '%d/%b/%Y:%H:%M:%S +0100'
pre_analysis_hooks = []
post_analysis_hooks = []
display_hooks = []
from default_conf import *
from conf import *
print '==> Start'
meta_visit = {}
analyse_started = False
current_visits = {}
cache_plugins = {}
display = {}
log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', log_format)
log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', log_format_extracted)
http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
log_re = re.compile(log_format_extracted)
uri_re = re.compile(r'(?P<extract_uri>[^\?]*)[\?(?P<extract_parameters>.*)]?')
pages_extensions = ['/', 'html', 'xhtml', 'py', 'pl', 'rb', 'php']
viewed_http_codes = [200]
HOOKS_ROOT = './plugins/'
PRE_HOOK_DIRECTORY = HOOKS_ROOT + 'pre_analysis/'
POST_HOOK_DIRECTORY = HOOKS_ROOT + 'post_analysis/'
DISPLAY_HOOK_DIRECTORY = HOOKS_ROOT + 'display/'
META_PATH = DB_ROOT + 'meta.db'
DB_FILENAME = 'iwla.db'
plugins = {PRE_HOOK_DIRECTORY : pre_analysis_hooks, POST_HOOK_DIRECTORY : post_analysis_hooks, DISPLAY_HOOK_DIRECTORY : display_hooks}
ANALYSIS_CLASS = 'HTTP'
API_VERSION = 1
def preloadPlugins():
ret = True
for root in plugins.keys():
for plugin_name in plugins[root]:
p = root + '/' + plugin_name
try:
fp, pathname, description = imp.find_module(plugin_name, [root])
cache_plugins[p] = imp.load_module(plugin_name, fp, pathname, description)
#cache_plugins[p] = imp.load_module(p,None,p,("py","r",imp.PKG_DIRECTORY))
#cache_plugins[p] = imp.load_source(p, p)
mod = cache_plugins[p]
#print dir(mod)
#print "Register %s -> %s" % (p, mod)
infos = mod.get_plugins_infos()
if infos['class'] != ANALYSIS_CLASS or \
API_VERSION < infos['min_version'] or\
(infos['max_version'] != -1 and (API_VERSION > infos['max_version'])):
del cache_plugins[p]
elif not mod.load():
del cache_plugins[p]
except Exception as e:
print 'Error loading \'%s\' => %s' % (p, e)
ret = False
return ret
def createEmptyVisits():
visits = {'days_stats' : {}, 'month_stats' : {}, 'visits' : {}}
return visits
def createEmptyMeta():
meta = {'last_time' : None}
return meta
def createEmptyDisplay():
display = {}
return display
def getDBFilename(time):
return (DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, DB_FILENAME)
def serialize(obj, filename):
base = os.path.dirname(filename)
if not os.path.exists(base):
os.makedirs(base)
# TODO : remove return
return
class IWLA(object):
ANALYSIS_CLASS = 'HTTP'
API_VERSION = 1
def __init__(self):
print '==> Start'
self.meta_infos = {}
self.analyse_started = False
self.current_analysis = {}
self.cache_plugins = {}
self.display = {}
self.valid_visitors = None
self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', log_format)
self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted)
self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
self.log_re = re.compile(self.log_format_extracted)
self.uri_re = re.compile(r'(?P<extract_uri>[^\?]*)[\?(?P<extract_parameters>.*)]?')
self.plugins = {PRE_HOOK_DIRECTORY : pre_analysis_hooks,
POST_HOOK_DIRECTORY : post_analysis_hooks,
DISPLAY_HOOK_DIRECTORY : display_hooks}
def _preloadPlugins(self):
ret = True
for root in self.plugins.keys():
for plugin_name in self.plugins[root]:
p = root + '/' + plugin_name
try:
fp, pathname, description = imp.find_module(plugin_name, [root])
self.cache_plugins[p] = imp.load_module(plugin_name, fp, pathname, description)
mod = self.cache_plugins[p]
infos = mod.get_plugins_infos()
if infos['class'] != IWLA.ANALYSIS_CLASS or \
IWLA.API_VERSION < infos['min_version'] or\
(infos['max_version'] != -1 and (IWLA.API_VERSION > infos['max_version'])):
del self.cache_plugins[p]
elif not mod.load():
del self.cache_plugins[p]
except Exception as e:
print 'Error loading \'%s\' => %s' % (p, e)
ret = False
return ret
def _clearVisits(self):
self.current_analysis = {
'days_stats' : {},
'month_stats' : {},
'visits' : {}
}
self.valid_visitors = None
return self.current_analysis
def getDaysStats(self):
return self.current_analysis['days_stats']
def getMonthStatsStats(self):
return self.current_analysis['month_stats']
def getCurrentVisists(self):
return self.current_analysis['visits']
def getValidVisitors(self):
return self.current_analysis['visits']
def _clearMeta(self):
self.meta_infos = {
'last_time' : None
}
return self.meta_infos
def _clearDisplay(self):
self.display = {}
return self.display
def getDBFilename(self, time):
return (DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, DB_FILENAME)
def _serialize(self, obj, filename):
base = os.path.dirname(filename)
if not os.path.exists(base):
os.makedirs(base)
# TODO : remove return
return
with open(filename + '.tmp', 'wb+') as f:
pickle.dump(obj, f)
fzip.write(f.read())
os.remove(filename + '.tmp')
def deserialize(filename):
if not os.path.exists(filename):
def _deserialize(self, filename):
if not os.path.exists(filename):
return None
with gzip.open(filename, 'r') as f:
return pickle.load(f)
return None
with gzip.open(filename, 'r') as f:
return pickle.load(f)
return None
def _callPlugins(self, root, *args):
print '==> Call plugins (%s)' % root
for p in self.plugins[root]:
print '\t%s' % (p)
mod = self.cache_plugins[root + '/' + p]
mod.hook(*args)
def callPlugins(root, *args):
print '==> Call plugins (%s)' % root
for p in plugins[root]:
print '\t%s' % (p)
mod = cache_plugins[root + '/' + p]
mod.hook(*args)
def isPage(self, request):
for e in pages_extensions:
if request.endswith(e):
return True
def isPage(request):
for e in pages_extensions:
if request.endswith(e):
return True
return False
return False
def _appendHit(self, hit):
remote_addr = hit['remote_addr']
if not remote_addr in self.current_analysis['visits'].keys():
self._createUser(hit)
return
super_hit = self.current_analysis['visits'][remote_addr]
super_hit['requests'].append(hit)
super_hit['bandwidth'] += int(hit['body_bytes_sent'])
super_hit['last_access'] = self.meta_infos['last_time']
request = hit['extract_request']
if 'extract_uri' in request.keys():
uri = request['extract_uri']
else:
uri = request['http_uri']
hit['is_page'] = self.isPage(uri)
# Don't count 3xx status
status = int(hit['status'])
if status >= 300 and status < 400: return
if super_hit['robot'] or\
not status in viewed_http_codes:
page_key = 'not_viewed_pages'
hit_key = 'not_viewed_hits'
else:
page_key = 'viewed_pages'
hit_key = 'viewed_hits'
if hit['is_page']:
super_hit[page_key] += 1
else:
super_hit[hit_key] += 1
def _createUser(self, hit):
super_hit = self.current_analysis['visits'][hit['remote_addr']] = {}
super_hit['remote_addr'] = hit['remote_addr']
super_hit['viewed_pages'] = 0
super_hit['viewed_hits'] = 0
super_hit['not_viewed_pages'] = 0
super_hit['not_viewed_hits'] = 0
super_hit['bandwidth'] = 0
super_hit['last_access'] = self.meta_infos['last_time']
super_hit['requests'] = []
super_hit['robot'] = False
super_hit['hit_only'] = 0
self._appendHit(hit)
def _decodeHTTPRequest(self, hit):
if not 'request' in hit.keys(): return False
groups = self.http_request_extracted.match(hit['request'])
if groups:
hit['extract_request'] = groups.groupdict()
uri_groups = self.uri_re.match(hit['extract_request']['http_uri'])
if uri_groups:
d = uri_groups.groupdict()
hit['extract_request']['extract_uri'] = d['extract_uri']
if 'extract_parameters' in d.keys():
hit['extract_request']['extract_parameters'] = d['extract_parameters']
else:
print "Bad request extraction " + hit['request']
return False
referer_groups = self.uri_re.match(hit['http_referer'])
if referer_groups:
referer = hit['extract_referer'] = referer_groups.groupdict()
return True
def _decodeTime(self, hit):
hit['time_decoded'] = time.strptime(hit['time_local'], time_format)
def getDisplayIndex(self):
cur_time = self.meta_infos['last_time']
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
return self.display.get(filename, None)
def _generateDisplayDaysStat(self):
cur_time = self.meta_infos['last_time']
title = 'Stats %d/%d' % (cur_time.tm_mon, cur_time.tm_year)
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
page = createPage(self.display, filename, title)
days = createTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwidth', 'Robot Bandwidth'])
keys = self.current_analysis['days_stats'].keys()
keys.sort()
nb_visits = 0
for k in keys:
stats = self.current_analysis['days_stats'][k]
row = [k, stats['nb_visitors'], stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
row = map(lambda(v): str(v), row)
appendRowToTable(days, row)
nb_visits += stats['nb_visitors']
stats = self.current_analysis['month_stats']
nb_days = len(keys)
row = [0, nb_visits, stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
if nb_days:
row = map(lambda(v): str(int(v/nb_days)), row)
else:
row = map(lambda(v): '0', row)
row[0] = 'Average'
appendRowToTable(days, row)
def appendHit(hit):
remote_addr = hit['remote_addr']
row = ['Total', nb_visits, stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
row = map(lambda(v): str(v), row)
appendRowToTable(days, row)
appendBlockToPage(page, days)
if not remote_addr in current_visits['visits'].keys():
createUser(hit)
return
def _generateDisplay(self):
self._generateDisplayDaysStat()
self._callPlugins(DISPLAY_HOOK_DIRECTORY, self.current_analysis, self.display)
buildPages(DISPLAY_ROOT, self.display)
super_hit = current_visits['visits'][remote_addr]
super_hit['requests'].append(hit)
super_hit['bandwidth'] += int(hit['body_bytes_sent'])
super_hit['last_access'] = meta_visit['last_time']
request = hit['extract_request']
if 'extract_uri' in request.keys():
uri = request['extract_uri']
else:
uri = request['http_uri']
hit['is_page'] = isPage(uri)
# Don't count 3xx status
status = int(hit['status'])
if status >= 300 and status < 400: return
if super_hit['robot'] or\
not status in viewed_http_codes:
page_key = 'not_viewed_pages'
hit_key = 'not_viewed_hits'
else:
page_key = 'viewed_pages'
hit_key = 'viewed_hits'
if hit['is_page']:
super_hit[page_key] += 1
else:
super_hit[hit_key] += 1
def createUser(hit):
super_hit = current_visits['visits'][hit['remote_addr']] = {}
super_hit['remote_addr'] = hit['remote_addr']
super_hit['viewed_pages'] = 0
super_hit['viewed_hits'] = 0
super_hit['not_viewed_pages'] = 0
super_hit['not_viewed_hits'] = 0
super_hit['bandwidth'] = 0
super_hit['last_access'] = meta_visit['last_time']
super_hit['requests'] = []
super_hit['robot'] = False
super_hit['hit_only'] = 0
appendHit(hit)
def decodeHTTPRequest(hit):
if not 'request' in hit.keys(): return False
groups = http_request_extracted.match(hit['request'])
if groups:
hit['extract_request'] = groups.groupdict()
uri_groups = uri_re.match(hit['extract_request']['http_uri'])
if uri_groups:
d = uri_groups.groupdict()
hit['extract_request']['extract_uri'] = d['extract_uri']
if 'extract_parameters' in d.keys():
hit['extract_request']['extract_parameters'] = d['extract_parameters']
else:
print "Bad request extraction " + hit['request']
return False
def _generateStats(self, visits):
stats = {}
stats['viewed_bandwidth'] = 0
stats['not_viewed_bandwidth'] = 0
stats['viewed_pages'] = 0
stats['viewed_hits'] = 0
#stats['requests'] = set()
stats['nb_visitors'] = 0
referer_groups = uri_re.match(hit['http_referer'])
if referer_groups:
referer = hit['extract_referer'] = referer_groups.groupdict()
return True
for k in visits.keys():
super_hit = visits[k]
if super_hit['robot']:
stats['not_viewed_bandwidth'] += super_hit['bandwidth']
continue
def decodeTime(hit):
t = hit['time_local']
#print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits'])
hit['time_decoded'] = time.strptime(t, time_format)
if not super_hit['hit_only']:
stats['nb_visitors'] += 1
stats['viewed_bandwidth'] += super_hit['bandwidth']
stats['viewed_pages'] += super_hit['viewed_pages']
stats['viewed_hits'] += super_hit['viewed_hits']
def getDisplayIndex():
cur_time = meta_visit['last_time']
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
# for p in super_hit['requests']:
# if not p['is_page']: continue
# req = p['extract_request']
# stats['requests'].add(req['extract_uri'])
return display.get(filename, None)
return stats
def generateDisplayDaysStat():
cur_time = meta_visit['last_time']
title = 'Stats %d/%d' % (cur_time.tm_mon, cur_time.tm_year)
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
page = createPage(display, filename, title)
days = createTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwidth', 'Robot Bandwidth'])
def _generateMonthStats(self):
self._clearDisplay()
keys = current_visits['days_stats'].keys()
keys.sort()
nb_visits = 0
for k in keys:
stats = current_visits['days_stats'][k]
row = [k, stats['nb_visitors'], stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
row = map(lambda(v): str(v), row)
appendRowToTable(days, row)
nb_visits += stats['nb_visitors']
stats = current_visits['month_stats']
nb_days = len(keys)
row = [0, nb_visits, stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
if nb_days:
row = map(lambda(v): str(int(v/nb_days)), row)
else:
row = map(lambda(v): '0', row)
row[0] = 'Average'
appendRowToTable(days, row)
row = ['Total', nb_visits, stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
row = map(lambda(v): str(v), row)
appendRowToTable(days, row)
appendBlockToPage(page, days)
def generateDisplay():
generateDisplayDaysStat()
callPlugins(DISPLAY_HOOK_DIRECTORY, current_visits, display)
buildPages(DISPLAY_ROOT, display)
def generateStats(visits):
stats = {}
stats['viewed_bandwidth'] = 0
stats['not_viewed_bandwidth'] = 0
stats['viewed_pages'] = 0
stats['viewed_hits'] = 0
#stats['requests'] = set()
stats['nb_visitors'] = 0
for k in visits.keys():
super_hit = visits[k]
if super_hit['robot']:
stats['not_viewed_bandwidth'] += super_hit['bandwidth']
continue
#print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits'])
if not super_hit['hit_only']:
stats['nb_visitors'] += 1
stats['viewed_bandwidth'] += super_hit['bandwidth']
stats['viewed_pages'] += super_hit['viewed_pages']
stats['viewed_hits'] += super_hit['viewed_hits']
# for p in super_hit['requests']:
# if not p['is_page']: continue
# req = p['extract_request']
# stats['requests'].add(req['extract_uri'])
return stats
def generateMonthStats():
display = createEmptyDisplay()
visits = current_visits['visits']
stats = generateStats(visits)
cur_time = meta_visit['last_time']
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
print stats
valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
callPlugins(POST_HOOK_DIRECTORY, valid_visitors, stats)
current_visits['month_stats'] = stats
path = getDBFilename(cur_time)
if os.path.exists(path):
os.remove(path)
print "==> Serialize to %s" % path
serialize(current_visits, path)
generateDisplay()
def generateDayStats():
visits = current_visits['visits']
callPlugins(PRE_HOOK_DIRECTORY, visits)
stats = generateStats(visits)
cur_time = meta_visit['last_time']
print "== Stats for %d/%d/%d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday)
if cur_time.tm_mday > 1:
last_day = cur_time.tm_mday - 1
while last_day:
if last_day in current_visits['days_stats'].keys():
break
last_day -= 1
if last_day:
for k in stats.keys():
stats[k] -= current_visits['days_stats'][last_day][k]
stats['nb_visitors'] = 0
for k in visits.keys():
if visits[k]['robot']: continue
if visits[k]['last_access'].tm_mday == cur_time.tm_mday:
stats['nb_visitors'] += 1
print stats
current_visits['days_stats'][cur_time.tm_mday] = stats
def newHit(hit):
global current_visits
global analyse_started
decodeTime(hit)
t = hit['time_decoded']
cur_time = meta_visit['last_time']
if cur_time == None:
current_visits = deserialize(getDBFilename(t)) or createEmptyVisits()
analyse_started = True
else:
if not analyse_started:
if time.mktime(cur_time) >= time.mktime(t):
return
else:
analyse_started = True
if cur_time.tm_mon != t.tm_mon:
generateMonthStats()
current_visits = deserialize(getDBFilename(t)) or createEmptyVisits()
elif cur_time.tm_mday != t.tm_mday:
generateDayStats()
meta_visit['last_time'] = t
if not decodeHTTPRequest(hit): return False
for k in hit.keys():
if hit[k] == '-': hit[k] = ''
appendHit(hit)
return True
preloadPlugins()
print '==> Analysing log'
meta_visit = deserialize(META_PATH) or createEmptyMeta()
if meta_visit['last_time']:
current_visits = deserialize(getDBFilename(meta_visit['last_time'])) or createEmptyVisits()
else:
current_visits = createEmptyVisits()
f = open(analyzed_filename)
for l in f:
# print "line " + l
groups = log_re.match(l)
if groups:
if not newHit(groups.groupdict()):
break
else:
print "No match " + l
f.close()
if analyse_started:
generateDayStats()
generateMonthStats()
serialize(meta_visit, META_PATH)
else:
print '==> Analyse not started : nothing to do'
generateMonthStats()
visits = self.current_analysis['visits']
stats = self._generateStats(visits)
cur_time = self.meta_infos['last_time']
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
print stats
self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
self._callPlugins(POST_HOOK_DIRECTORY, valid_visitors, stats)
self.current_analysis['month_stats'] = stats
path = self.getDBFilename(cur_time)
if os.path.exists(path):
os.remove(path)
print "==> Serialize to %s" % path
self._serialize(self.current_analysis, path)
self._generateDisplay()
def _generateDayStats(self):
visits = self.current_analysis['visits']
self._callPlugins(PRE_HOOK_DIRECTORY, visits)
stats = self._generateStats(visits)
cur_time = self.meta_infos['last_time']
print "== Stats for %d/%d/%d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday)
if cur_time.tm_mday > 1:
last_day = cur_time.tm_mday - 1
while last_day:
if last_day in self.current_analysis['days_stats'].keys():
break
last_day -= 1
if last_day:
for k in stats.keys():
stats[k] -= self.current_analysis['days_stats'][last_day][k]
stats['nb_visitors'] = 0
for k in visits.keys():
if visits[k]['robot']: continue
if visits[k]['last_access'].tm_mday == cur_time.tm_mday:
stats['nb_visitors'] += 1
print stats
self.current_analysis['days_stats'][cur_time.tm_mday] = stats
def _newHit(self, hit):
self._decodeTime(hit)
t = hit['time_decoded']
cur_time = self.meta_infos['last_time']
if cur_time == None:
self.current_analysis = self._deserialize(self.getDBFilename(t)) or self._clearVisits()
self.analyse_started = True
else:
if not self.analyse_started:
if time.mktime(cur_time) >= time.mktime(t):
return
else:
self.analyse_started = True
if cur_time.tm_mon != t.tm_mon:
self._generateMonthStats()
self.current_analysis = self._deserialize(self.getDBFilename(t)) or self._clearVisits()
elif cur_time.tm_mday != t.tm_mday:
self._generateDayStats()
self.meta_infos['last_time'] = t
if not self._decodeHTTPRequest(hit): return False
for k in hit.keys():
if hit[k] == '-': hit[k] = ''
self._appendHit(hit)
return True
def start(self):
self._preloadPlugins()
print '==> Analysing log'
self.meta_infos = self._deserialize(META_PATH) or self._clearMeta()
if self.meta_infos['last_time']:
self.current_analysis = self._deserialize(self.getDBFilename(self.meta_infos['last_time'])) or self._clearVisits()
else:
self._clearVisits()
with open(analyzed_filename) as f:
for l in f:
# print "line " + l
groups = self.log_re.match(l)
if groups:
if not self._newHit(groups.groupdict()):
break
else:
print "No match for " + l
if self.analyse_started:
self._generateDayStats()
self._generateMonthStats()
self._serialize(meta_infos, META_PATH)
else:
print '==> Analyse not started : nothing to do'
self._generateMonthStats()
iwla = IWLA()
iwla.start()

Archive Download the corresponding diff file

Branches

Tags