iwla

iwla Commit Details

Date:2014-11-21 16:56:58 (6 years 8 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:e51e07f65e983dfaff29450e86e99b8b5bc88306
Parents: c3c201fda1ff70981d8f95f0c1651bc7df475598
Message:Very nice result

Changes:
Mconf.py (1 diff)
Mdisplay.py (1 diff)
Miwla.py (11 diffs)
Mplugins/pre_analysis/H001_robot.py (2 diffs)
Mplugins/pre_analysis/H002_soutade.py (2 diffs)

File differences

conf.py
1212
1313
1414
15
15
1616
1717
1818
DISPLAY_ROOT = './output/'
pre_analysis_hooks = ['H002_soutade', 'H001_robot']
post_analysis_hooks = ['top_visitors']
post_analysis_hooks = ['top_visitors', 'reverse_dns']
display_hooks = ['top_visitors']
# pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py']
display.py
11
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
3122
32
33
23
24
3425
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def createPage(display, filename, title):
page = {}
page['title'] = title;
page['blocks'] = []
display[filename] = page
return page
def appendBlockToPage(page, block):
page['blocks'].append(block)
def createTable(title, cols):
table = {'type' : 'table', 'title' : title}
table['cols'] = cols
table['rows'] = []
return table
def appendRowToTable(table, row):
table['rows'].append(row)
def buildTable(block, f):
print 'Write table %s' % block['title']
f.write('<table>')
f.write('<tr>')
for title in block['cols']:
f.write('<th>%s</th>' % (title))
f.write('</tr>')
for row in block['rows']:
class DisplayHTMLBlock(object):
def __init__(self, title):
self.title = title
def build(self, f):
pass
class DisplayHTMLBlockTable(DisplayHTMLBlock):
def __init__(self, title, cols):
super(DisplayHTMLBlockTable, self).__init__(title)
self.cols = cols
self.rows = []
def appendRow(self, row):
self.rows.append(row)
def build(self, f):
f.write('<table>')
f.write('<tr>')
for v in row:
f.write('<td>%s</td>' % (v))
for title in self.cols:
f.write('<th>%s</th>' % (title))
f.write('</tr>')
f.write('</table>')
def buildPages(display_root, display):
for filename in display.keys():
page = display[filename]
print "OPEN %s" % (display_root + filename)
with open(display_root + filename, 'w') as f:
f.write('<html><title>%s</title><body>' % (page['title']))
for block in page['blocks']:
print "Bluid block"
print block
print "End block"
if block['type'] == 'html':
f.write(block['value'])
elif block['type'] == 'table':
buildTable(block, f)
f.write('</body></html>')
for row in self.rows:
f.write('<tr>')
for v in row:
f.write('<td>%s</td>' % (v))
f.write('</tr>')
f.write('</table>')
class DisplayHTMLPage(object):
def __init__(self, title, filename):
self.title = title
self.filename = filename
self.blocks = []
def getFilename(self):
return self.filename;
def appendBlock(self, block):
self.blocks.append(block)
def build(self, root):
f = open(root + self.filename, 'w')
f.write('<html><title>%s</title><body>' % (self.title))
for block in self.blocks:
block.build(f)
f.write('</body></html>')
f.close()
class DisplayHTMLBuild(object):
def __init__(self):
self.pages = []
def getPage(self, filename):
for page in self.pages:
if page.getFilename() == filename:
return page
return None
def addPage(self, page):
self.pages.append(page)
def build(self, root):
for page in self.pages:
page.build(root)
iwla.py
2525
2626
2727
28
28
2929
3030
3131
......
4444
4545
4646
47
47
4848
4949
5050
......
7070
7171
7272
73
73
7474
7575
7676
7777
7878
7979
80
80
81
82
83
8184
8285
8386
......
8689
8790
8891
89
92
9093
9194
9295
......
100103
101104
102105
103
104
105
106
107
106
107
108
109
110
108111
109112
110113
......
210213
211214
212215
213
216
214217
215218
216219
217220
218221
219
222
223
220224
221
225
222226
223227
224228
......
227231
228232
229233
230
234
231235
232236
233237
......
240244
241245
242246
243
247
244248
245249
246250
247
248
251
252
253
249254
250255
251256
252
253
257
258
254259
255260
256261
......
293298
294299
295300
296
297
298
299301
300302
303
304
305
301306
302307
303308
......
311316
312317
313318
314
319
315320
316321
317322
......
391396
392397
393398
399
394400
395401
396402
397403
398
404
399405
400406
401407
402408
403
404
409
410
411
self.analyse_started = False
self.current_analysis = {}
self.cache_plugins = {}
self.display = {}
self.display = DisplayHTMLBuild()
self.valid_visitors = None
self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', log_format)
p = root + '/' + plugin_name
try:
fp, pathname, description = imp.find_module(plugin_name, [root])
self.cache_plugins[p] = imp.load_module(plugin_name, fp, pathname, description)
self.cache_plugins[p] = imp.load_module(p, fp, pathname, description)
mod = self.cache_plugins[p]
infos = mod.get_plugins_infos()
if infos['class'] != IWLA.ANALYSIS_CLASS or \
def getDaysStats(self):
return self.current_analysis['days_stats']
def getMonthStatsStats(self):
def getMonthStats(self):
return self.current_analysis['month_stats']
def getCurrentVisists(self):
return self.current_analysis['visits']
def getValidVisitors(self):
return self.current_analysis['visits']
return self.valid_visitors
def getDisplay(self):
return self.display
def _clearMeta(self):
self.meta_infos = {
return self.meta_infos
def _clearDisplay(self):
self.display = {}
self.display = DisplayHTMLBuild()
return self.display
def getDBFilename(self, time):
# TODO : remove return
return
with open(filename + '.tmp', 'wb+') as f:
pickle.dump(obj, f)
f.seek(0)
with gzip.open(filename, 'w') as fzip:
fzip.write(f.read())
with open(filename + '.tmp', 'wb+') as f:
pickle.dump(obj, f)
f.seek(0)
with gzip.open(filename, 'w') as fzip:
fzip.write(f.read())
os.remove(filename + '.tmp')
def _deserialize(self, filename):
cur_time = self.meta_infos['last_time']
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
return self.display.get(filename, None)
return self.display.getPage(filename)
def _generateDisplayDaysStat(self):
cur_time = self.meta_infos['last_time']
title = 'Stats %d/%d' % (cur_time.tm_mon, cur_time.tm_year)
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
page = createPage(self.display, filename, title)
print '==> Generate display (%s)' % (filename)
page = DisplayHTMLPage(title, filename)
days = createTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwidth', 'Robot Bandwidth'])
days = DisplayHTMLBlockTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwidth', 'Not viewed Bandwidth'])
keys = self.current_analysis['days_stats'].keys()
keys.sort()
stats = self.current_analysis['days_stats'][k]
row = [k, stats['nb_visitors'], stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
row = map(lambda(v): str(v), row)
appendRowToTable(days, row)
days.appendRow(row)
nb_visits += stats['nb_visitors']
stats = self.current_analysis['month_stats']
row = map(lambda(v): '0', row)
row[0] = 'Average'
appendRowToTable(days, row)
days.appendRow(row)
row = ['Total', nb_visits, stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
row = map(lambda(v): str(v), row)
appendRowToTable(days, row)
appendBlockToPage(page, days)
days.appendRow(row)
page.appendBlock(days)
self.display.addPage(page)
def _generateDisplay(self):
self._generateDisplayDaysStat()
self._callPlugins(DISPLAY_HOOK_DIRECTORY, self.current_analysis, self.display)
buildPages(DISPLAY_ROOT, self.display)
self._callPlugins(DISPLAY_HOOK_DIRECTORY, self)
self.display.build(DISPLAY_ROOT)
def _generateStats(self, visits):
stats = {}
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
print stats
self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
self._callPlugins(POST_HOOK_DIRECTORY, valid_visitors, stats)
self.current_analysis['month_stats'] = stats
self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
self._callPlugins(POST_HOOK_DIRECTORY, self)
path = self.getDBFilename(cur_time)
if os.path.exists(path):
os.remove(path)
def _generateDayStats(self):
visits = self.current_analysis['visits']
self._callPlugins(PRE_HOOK_DIRECTORY, visits)
self._callPlugins(PRE_HOOK_DIRECTORY, self)
stats = self._generateStats(visits)
break
else:
print "No match for " + l
#break
if self.analyse_started:
self._generateDayStats()
self._generateMonthStats()
self._serialize(meta_infos, META_PATH)
self._serialize(self.meta_infos, META_PATH)
else:
print '==> Analyse not started : nothing to do'
self._generateMonthStats()
iwla = IWLA()
iwla.start()
if __name__ == '__main__':
iwla = IWLA()
iwla.start()
plugins/pre_analysis/H001_robot.py
11
2
23
34
45
......
2122
2223
2324
24
25
26
2527
2628
2729
import re
from iwla import IWLA
from awstats_robots_data import awstats_robots
# Basic rule to detect robots
def hook(hits):
def hook(iwla):
hits = iwla.getCurrentVisists()
for k in hits.keys():
super_hit = hits[k]
plugins/pre_analysis/H002_soutade.py
11
2
23
34
45
......
1920
2021
2122
22
23
24
25
2326
2427
2528
import re
from iwla import IWLA
# Remove logo from indefero
logo_re = re.compile(r'^.+/logo/$')
# Basic rule to detect robots
def hook(hits):
def hook(iwla):
hits = iwla.getCurrentVisists()
for k in hits.keys():
super_hit = hits[k]

Archive Download the corresponding diff file

Branches

Tags