Very nice result

This commit is contained in:
Gregory Soutade 2014-11-21 16:56:58 +01:00
parent c3c201fda1
commit e51e07f65e
5 changed files with 106 additions and 75 deletions

View File

@ -12,7 +12,7 @@ DB_ROOT = './output/'
DISPLAY_ROOT = './output/'
pre_analysis_hooks = ['H002_soutade', 'H001_robot']
post_analysis_hooks = ['top_visitors']
post_analysis_hooks = ['top_visitors', 'reverse_dns']
display_hooks = ['top_visitors']
# pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py']

View File

@ -1,51 +1,70 @@
def createPage(display, filename, title):
page = {}
page['title'] = title;
page['blocks'] = []
display[filename] = page
class DisplayHTMLBlock(object):
return page
def __init__(self, title):
self.title = title
def appendBlockToPage(page, block):
page['blocks'].append(block)
def build(self, f):
pass
def createTable(title, cols):
table = {'type' : 'table', 'title' : title}
table['cols'] = cols
table['rows'] = []
return table
def appendRowToTable(table, row):
table['rows'].append(row)
def buildTable(block, f):
print 'Write table %s' % block['title']
f.write('<table>')
f.write('<tr>')
for title in block['cols']:
f.write('<th>%s</th>' % (title))
f.write('</tr>')
for row in block['rows']:
f.write('<tr>')
for v in row:
f.write('<td>%s</td>' % (v))
f.write('</tr>')
f.write('</table>')
class DisplayHTMLBlockTable(DisplayHTMLBlock):
def buildPages(display_root, display):
for filename in display.keys():
page = display[filename]
print "OPEN %s" % (display_root + filename)
with open(display_root + filename, 'w') as f:
f.write('<html><title>%s</title><body>' % (page['title']))
for block in page['blocks']:
print "Bluid block"
print block
print "End block"
if block['type'] == 'html':
f.write(block['value'])
elif block['type'] == 'table':
buildTable(block, f)
f.write('</body></html>')
def __init__(self, title, cols):
super(DisplayHTMLBlockTable, self).__init__(title)
self.cols = cols
self.rows = []
def appendRow(self, row):
self.rows.append(row)
def build(self, f):
f.write('<table>')
f.write('<tr>')
for title in self.cols:
f.write('<th>%s</th>' % (title))
f.write('</tr>')
for row in self.rows:
f.write('<tr>')
for v in row:
f.write('<td>%s</td>' % (v))
f.write('</tr>')
f.write('</table>')
class DisplayHTMLPage(object):
def __init__(self, title, filename):
self.title = title
self.filename = filename
self.blocks = []
def getFilename(self):
return self.filename;
def appendBlock(self, block):
self.blocks.append(block)
def build(self, root):
f = open(root + self.filename, 'w')
f.write('<html><title>%s</title><body>' % (self.title))
for block in self.blocks:
block.build(f)
f.write('</body></html>')
f.close()
class DisplayHTMLBuild(object):
def __init__(self):
self.pages = []
def getPage(self, filename):
for page in self.pages:
if page.getFilename() == filename:
return page
return None
def addPage(self, page):
self.pages.append(page)
def build(self, root):
for page in self.pages:
page.build(root)

59
iwla.py
View File

@ -25,7 +25,7 @@ class IWLA(object):
self.analyse_started = False
self.current_analysis = {}
self.cache_plugins = {}
self.display = {}
self.display = DisplayHTMLBuild()
self.valid_visitors = None
self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', log_format)
@ -44,7 +44,7 @@ class IWLA(object):
p = root + '/' + plugin_name
try:
fp, pathname, description = imp.find_module(plugin_name, [root])
self.cache_plugins[p] = imp.load_module(plugin_name, fp, pathname, description)
self.cache_plugins[p] = imp.load_module(p, fp, pathname, description)
mod = self.cache_plugins[p]
infos = mod.get_plugins_infos()
if infos['class'] != IWLA.ANALYSIS_CLASS or \
@ -70,14 +70,17 @@ class IWLA(object):
def getDaysStats(self):
return self.current_analysis['days_stats']
def getMonthStatsStats(self):
def getMonthStats(self):
return self.current_analysis['month_stats']
def getCurrentVisists(self):
return self.current_analysis['visits']
def getValidVisitors(self):
return self.current_analysis['visits']
return self.valid_visitors
def getDisplay(self):
return self.display
def _clearMeta(self):
self.meta_infos = {
@ -86,7 +89,7 @@ class IWLA(object):
return self.meta_infos
def _clearDisplay(self):
self.display = {}
self.display = DisplayHTMLBuild()
return self.display
def getDBFilename(self, time):
@ -100,11 +103,11 @@ class IWLA(object):
# TODO : remove return
return
with open(filename + '.tmp', 'wb+') as f:
pickle.dump(obj, f)
f.seek(0)
with gzip.open(filename, 'w') as fzip:
fzip.write(f.read())
with open(filename + '.tmp', 'wb+') as f:
pickle.dump(obj, f)
f.seek(0)
with gzip.open(filename, 'w') as fzip:
fzip.write(f.read())
os.remove(filename + '.tmp')
def _deserialize(self, filename):
@ -210,15 +213,16 @@ class IWLA(object):
cur_time = self.meta_infos['last_time']
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
return self.display.get(filename, None)
return self.display.getPage(filename)
def _generateDisplayDaysStat(self):
cur_time = self.meta_infos['last_time']
title = 'Stats %d/%d' % (cur_time.tm_mon, cur_time.tm_year)
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
page = createPage(self.display, filename, title)
print '==> Generate display (%s)' % (filename)
page = DisplayHTMLPage(title, filename)
days = createTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwidth', 'Robot Bandwidth'])
days = DisplayHTMLBlockTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwidth', 'Not viewed Bandwidth'])
keys = self.current_analysis['days_stats'].keys()
keys.sort()
@ -227,7 +231,7 @@ class IWLA(object):
stats = self.current_analysis['days_stats'][k]
row = [k, stats['nb_visitors'], stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
row = map(lambda(v): str(v), row)
appendRowToTable(days, row)
days.appendRow(row)
nb_visits += stats['nb_visitors']
stats = self.current_analysis['month_stats']
@ -240,17 +244,18 @@ class IWLA(object):
row = map(lambda(v): '0', row)
row[0] = 'Average'
appendRowToTable(days, row)
days.appendRow(row)
row = ['Total', nb_visits, stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
row = map(lambda(v): str(v), row)
appendRowToTable(days, row)
appendBlockToPage(page, days)
days.appendRow(row)
page.appendBlock(days)
self.display.addPage(page)
def _generateDisplay(self):
self._generateDisplayDaysStat()
self._callPlugins(DISPLAY_HOOK_DIRECTORY, self.current_analysis, self.display)
buildPages(DISPLAY_ROOT, self.display)
self._callPlugins(DISPLAY_HOOK_DIRECTORY, self)
self.display.build(DISPLAY_ROOT)
def _generateStats(self, visits):
stats = {}
@ -293,11 +298,11 @@ class IWLA(object):
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
print stats
self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
self._callPlugins(POST_HOOK_DIRECTORY, valid_visitors, stats)
self.current_analysis['month_stats'] = stats
self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
self._callPlugins(POST_HOOK_DIRECTORY, self)
path = self.getDBFilename(cur_time)
if os.path.exists(path):
os.remove(path)
@ -311,7 +316,7 @@ class IWLA(object):
def _generateDayStats(self):
visits = self.current_analysis['visits']
self._callPlugins(PRE_HOOK_DIRECTORY, visits)
self._callPlugins(PRE_HOOK_DIRECTORY, self)
stats = self._generateStats(visits)
@ -391,14 +396,16 @@ class IWLA(object):
break
else:
print "No match for " + l
#break
if self.analyse_started:
self._generateDayStats()
self._generateMonthStats()
self._serialize(meta_infos, META_PATH)
self._serialize(self.meta_infos, META_PATH)
else:
print '==> Analyse not started : nothing to do'
self._generateMonthStats()
iwla = IWLA()
iwla.start()
if __name__ == '__main__':
iwla = IWLA()
iwla.start()

View File

@ -1,4 +1,5 @@
import re
from iwla import IWLA
from awstats_robots_data import awstats_robots
@ -21,7 +22,8 @@ def load():
# Basic rule to detect robots
def hook(hits):
def hook(iwla):
hits = iwla.getCurrentVisists()
for k in hits.keys():
super_hit = hits[k]

View File

@ -1,4 +1,5 @@
import re
from iwla import IWLA
# Remove logo from indefero
logo_re = re.compile(r'^.+/logo/$')
@ -19,7 +20,9 @@ def load():
# Basic rule to detect robots
def hook(hits):
def hook(iwla):
hits = iwla.getCurrentVisists()
for k in hits.keys():
super_hit = hits[k]