Very nice result

This commit is contained in:
Gregory Soutade 2014-11-21 16:56:58 +01:00
parent c3c201fda1
commit e51e07f65e
5 changed files with 106 additions and 75 deletions

View File

@ -12,7 +12,7 @@ DB_ROOT = './output/'
DISPLAY_ROOT = './output/' DISPLAY_ROOT = './output/'
pre_analysis_hooks = ['H002_soutade', 'H001_robot'] pre_analysis_hooks = ['H002_soutade', 'H001_robot']
post_analysis_hooks = ['top_visitors'] post_analysis_hooks = ['top_visitors', 'reverse_dns']
display_hooks = ['top_visitors'] display_hooks = ['top_visitors']
# pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py'] # pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py']

View File

@ -1,51 +1,70 @@
def createPage(display, filename, title): class DisplayHTMLBlock(object):
page = {}
page['title'] = title;
page['blocks'] = []
display[filename] = page
return page def __init__(self, title):
self.title = title
def appendBlockToPage(page, block): def build(self, f):
page['blocks'].append(block) pass
def createTable(title, cols): class DisplayHTMLBlockTable(DisplayHTMLBlock):
table = {'type' : 'table', 'title' : title}
table['cols'] = cols
table['rows'] = []
return table
def appendRowToTable(table, row):
table['rows'].append(row)
def buildTable(block, f):
print 'Write table %s' % block['title']
f.write('<table>')
f.write('<tr>')
for title in block['cols']:
f.write('<th>%s</th>' % (title))
f.write('</tr>')
for row in block['rows']:
f.write('<tr>')
for v in row:
f.write('<td>%s</td>' % (v))
f.write('</tr>')
f.write('</table>')
def buildPages(display_root, display): def __init__(self, title, cols):
for filename in display.keys(): super(DisplayHTMLBlockTable, self).__init__(title)
page = display[filename] self.cols = cols
print "OPEN %s" % (display_root + filename) self.rows = []
with open(display_root + filename, 'w') as f:
f.write('<html><title>%s</title><body>' % (page['title'])) def appendRow(self, row):
for block in page['blocks']: self.rows.append(row)
print "Bluid block"
print block def build(self, f):
print "End block" f.write('<table>')
if block['type'] == 'html': f.write('<tr>')
f.write(block['value']) for title in self.cols:
elif block['type'] == 'table': f.write('<th>%s</th>' % (title))
buildTable(block, f) f.write('</tr>')
f.write('</body></html>') for row in self.rows:
f.write('<tr>')
for v in row:
f.write('<td>%s</td>' % (v))
f.write('</tr>')
f.write('</table>')
class DisplayHTMLPage(object):
def __init__(self, title, filename):
self.title = title
self.filename = filename
self.blocks = []
def getFilename(self):
return self.filename;
def appendBlock(self, block):
self.blocks.append(block)
def build(self, root):
f = open(root + self.filename, 'w')
f.write('<html><title>%s</title><body>' % (self.title))
for block in self.blocks:
block.build(f)
f.write('</body></html>')
f.close()
class DisplayHTMLBuild(object):
def __init__(self):
self.pages = []
def getPage(self, filename):
for page in self.pages:
if page.getFilename() == filename:
return page
return None
def addPage(self, page):
self.pages.append(page)
def build(self, root):
for page in self.pages:
page.build(root)

59
iwla.py
View File

@ -25,7 +25,7 @@ class IWLA(object):
self.analyse_started = False self.analyse_started = False
self.current_analysis = {} self.current_analysis = {}
self.cache_plugins = {} self.cache_plugins = {}
self.display = {} self.display = DisplayHTMLBuild()
self.valid_visitors = None self.valid_visitors = None
self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', log_format) self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', log_format)
@ -44,7 +44,7 @@ class IWLA(object):
p = root + '/' + plugin_name p = root + '/' + plugin_name
try: try:
fp, pathname, description = imp.find_module(plugin_name, [root]) fp, pathname, description = imp.find_module(plugin_name, [root])
self.cache_plugins[p] = imp.load_module(plugin_name, fp, pathname, description) self.cache_plugins[p] = imp.load_module(p, fp, pathname, description)
mod = self.cache_plugins[p] mod = self.cache_plugins[p]
infos = mod.get_plugins_infos() infos = mod.get_plugins_infos()
if infos['class'] != IWLA.ANALYSIS_CLASS or \ if infos['class'] != IWLA.ANALYSIS_CLASS or \
@ -70,14 +70,17 @@ class IWLA(object):
def getDaysStats(self): def getDaysStats(self):
return self.current_analysis['days_stats'] return self.current_analysis['days_stats']
def getMonthStatsStats(self): def getMonthStats(self):
return self.current_analysis['month_stats'] return self.current_analysis['month_stats']
def getCurrentVisists(self): def getCurrentVisists(self):
return self.current_analysis['visits'] return self.current_analysis['visits']
def getValidVisitors(self): def getValidVisitors(self):
return self.current_analysis['visits'] return self.valid_visitors
def getDisplay(self):
return self.display
def _clearMeta(self): def _clearMeta(self):
self.meta_infos = { self.meta_infos = {
@ -86,7 +89,7 @@ class IWLA(object):
return self.meta_infos return self.meta_infos
def _clearDisplay(self): def _clearDisplay(self):
self.display = {} self.display = DisplayHTMLBuild()
return self.display return self.display
def getDBFilename(self, time): def getDBFilename(self, time):
@ -100,11 +103,11 @@ class IWLA(object):
# TODO : remove return # TODO : remove return
return return
with open(filename + '.tmp', 'wb+') as f: with open(filename + '.tmp', 'wb+') as f:
pickle.dump(obj, f) pickle.dump(obj, f)
f.seek(0) f.seek(0)
with gzip.open(filename, 'w') as fzip: with gzip.open(filename, 'w') as fzip:
fzip.write(f.read()) fzip.write(f.read())
os.remove(filename + '.tmp') os.remove(filename + '.tmp')
def _deserialize(self, filename): def _deserialize(self, filename):
@ -210,15 +213,16 @@ class IWLA(object):
cur_time = self.meta_infos['last_time'] cur_time = self.meta_infos['last_time']
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon) filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
return self.display.get(filename, None) return self.display.getPage(filename)
def _generateDisplayDaysStat(self): def _generateDisplayDaysStat(self):
cur_time = self.meta_infos['last_time'] cur_time = self.meta_infos['last_time']
title = 'Stats %d/%d' % (cur_time.tm_mon, cur_time.tm_year) title = 'Stats %d/%d' % (cur_time.tm_mon, cur_time.tm_year)
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon) filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
page = createPage(self.display, filename, title) print '==> Generate display (%s)' % (filename)
page = DisplayHTMLPage(title, filename)
days = createTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwidth', 'Robot Bandwidth']) days = DisplayHTMLBlockTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwidth', 'Not viewed Bandwidth'])
keys = self.current_analysis['days_stats'].keys() keys = self.current_analysis['days_stats'].keys()
keys.sort() keys.sort()
@ -227,7 +231,7 @@ class IWLA(object):
stats = self.current_analysis['days_stats'][k] stats = self.current_analysis['days_stats'][k]
row = [k, stats['nb_visitors'], stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']] row = [k, stats['nb_visitors'], stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
row = map(lambda(v): str(v), row) row = map(lambda(v): str(v), row)
appendRowToTable(days, row) days.appendRow(row)
nb_visits += stats['nb_visitors'] nb_visits += stats['nb_visitors']
stats = self.current_analysis['month_stats'] stats = self.current_analysis['month_stats']
@ -240,17 +244,18 @@ class IWLA(object):
row = map(lambda(v): '0', row) row = map(lambda(v): '0', row)
row[0] = 'Average' row[0] = 'Average'
appendRowToTable(days, row) days.appendRow(row)
row = ['Total', nb_visits, stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']] row = ['Total', nb_visits, stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
row = map(lambda(v): str(v), row) row = map(lambda(v): str(v), row)
appendRowToTable(days, row) days.appendRow(row)
appendBlockToPage(page, days) page.appendBlock(days)
self.display.addPage(page)
def _generateDisplay(self): def _generateDisplay(self):
self._generateDisplayDaysStat() self._generateDisplayDaysStat()
self._callPlugins(DISPLAY_HOOK_DIRECTORY, self.current_analysis, self.display) self._callPlugins(DISPLAY_HOOK_DIRECTORY, self)
buildPages(DISPLAY_ROOT, self.display) self.display.build(DISPLAY_ROOT)
def _generateStats(self, visits): def _generateStats(self, visits):
stats = {} stats = {}
@ -293,11 +298,11 @@ class IWLA(object):
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon) print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
print stats print stats
self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
self._callPlugins(POST_HOOK_DIRECTORY, valid_visitors, stats)
self.current_analysis['month_stats'] = stats self.current_analysis['month_stats'] = stats
self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
self._callPlugins(POST_HOOK_DIRECTORY, self)
path = self.getDBFilename(cur_time) path = self.getDBFilename(cur_time)
if os.path.exists(path): if os.path.exists(path):
os.remove(path) os.remove(path)
@ -311,7 +316,7 @@ class IWLA(object):
def _generateDayStats(self): def _generateDayStats(self):
visits = self.current_analysis['visits'] visits = self.current_analysis['visits']
self._callPlugins(PRE_HOOK_DIRECTORY, visits) self._callPlugins(PRE_HOOK_DIRECTORY, self)
stats = self._generateStats(visits) stats = self._generateStats(visits)
@ -391,14 +396,16 @@ class IWLA(object):
break break
else: else:
print "No match for " + l print "No match for " + l
#break
if self.analyse_started: if self.analyse_started:
self._generateDayStats() self._generateDayStats()
self._generateMonthStats() self._generateMonthStats()
self._serialize(meta_infos, META_PATH) self._serialize(self.meta_infos, META_PATH)
else: else:
print '==> Analyse not started : nothing to do' print '==> Analyse not started : nothing to do'
self._generateMonthStats() self._generateMonthStats()
iwla = IWLA() if __name__ == '__main__':
iwla.start() iwla = IWLA()
iwla.start()

View File

@ -1,4 +1,5 @@
import re import re
from iwla import IWLA
from awstats_robots_data import awstats_robots from awstats_robots_data import awstats_robots
@ -21,7 +22,8 @@ def load():
# Basic rule to detect robots # Basic rule to detect robots
def hook(hits): def hook(iwla):
hits = iwla.getCurrentVisists()
for k in hits.keys(): for k in hits.keys():
super_hit = hits[k] super_hit = hits[k]

View File

@ -1,4 +1,5 @@
import re import re
from iwla import IWLA
# Remove logo from indefero # Remove logo from indefero
logo_re = re.compile(r'^.+/logo/$') logo_re = re.compile(r'^.+/logo/$')
@ -19,7 +20,9 @@ def load():
# Basic rule to detect robots # Basic rule to detect robots
def hook(hits): def hook(iwla):
hits = iwla.getCurrentVisists()
for k in hits.keys(): for k in hits.keys():
super_hit = hits[k] super_hit = hits[k]