Merge branch 'logging'

This commit is contained in:
Grégory Soutadé 2014-12-16 20:24:56 +01:00
commit b4fc831f06
4 changed files with 48 additions and 39 deletions

1
TODO
View File

@ -6,5 +6,4 @@ Limit hits/pages/downloads by rate
Automatic tests Automatic tests
Add Licence Add Licence
Free memory as soon as possible Free memory as soon as possible
gzip output files
different debug output levels different debug output levels

View File

@ -1,6 +1,7 @@
import os import os
import codecs import codecs
import time import time
import logging
# #
# Create output HTML files # Create output HTML files
@ -22,14 +23,8 @@ class DisplayHTMLRaw(object):
if html: f.write(html) if html: f.write(html)
def build(self, f): def build(self, f):
# t1 = time.time()
self._buildHTML() self._buildHTML()
# t2 = time.time()
# print 'Time for _buildHTML : %d seconds' % (t2-t1)
# t1 = time.time()
self._build(f, self.html) self._build(f, self.html)
# t2 = time.time()
# print 'Time for _build : %d seconds' % (t2-t1)
class DisplayHTMLBlock(DisplayHTMLRaw): class DisplayHTMLBlock(DisplayHTMLRaw):
@ -252,6 +247,7 @@ class DisplayHTMLPage(object):
self.filename = filename self.filename = filename
self.blocks = [] self.blocks = []
self.css_path = listToStr(css_path) self.css_path = listToStr(css_path)
self.logger = logging.getLogger(self.__class__.__name__)
def getFilename(self): def getFilename(self):
return self.filename; return self.filename;
@ -272,6 +268,8 @@ class DisplayHTMLPage(object):
if not os.path.exists(base): if not os.path.exists(base):
os.makedirs(base) os.makedirs(base)
self.logger.debug('Write %s' % (filename))
f = codecs.open(filename, 'w', 'utf-8') f = codecs.open(filename, 'w', 'utf-8')
f.write(u'<!DOCTYPE html>') f.write(u'<!DOCTYPE html>')
f.write(u'<html>') f.write(u'<html>')
@ -321,9 +319,7 @@ class DisplayHTMLBuild(object):
os.symlink(target, link_name) os.symlink(target, link_name)
for page in self.pages: for page in self.pages:
# print 'Build %s' % (page.filename)
page.build(root) page.build(root)
# print 'Built'
# #
# Global functions # Global functions

View File

@ -1,6 +1,6 @@
import importlib import importlib
import inspect import inspect
import traceback import logging
# #
# IWLA Plugin interface # IWLA Plugin interface
@ -47,7 +47,9 @@ def validConfRequirements(conf_requirements, iwla, plugin_path):
def preloadPlugins(plugins, iwla): def preloadPlugins(plugins, iwla):
cache_plugins = {} cache_plugins = {}
print "==> Preload plugins" logger = logging.getLogger(__name__)
logger.info("==> Preload plugins")
for (root, plugins_filenames) in plugins: for (root, plugins_filenames) in plugins:
for plugin_filename in plugins_filenames: for plugin_filename in plugins_filenames:
@ -61,7 +63,7 @@ def preloadPlugins(plugins, iwla):
] ]
if not classes: if not classes:
print 'No plugin defined in %s' % (plugin_path) logger.warning('No plugin defined in %s' % (plugin_path))
continue continue
plugin = classes[0](iwla) plugin = classes[0](iwla)
@ -86,18 +88,17 @@ def preloadPlugins(plugins, iwla):
requirement_validated = True requirement_validated = True
break break
if not requirement_validated: if not requirement_validated:
print 'Missing requirements \'%s\' for plugin %s' % (r, plugin_path) logger.error('Missing requirements \'%s\' for plugin %s' % (r, plugin_path))
break break
if requirements and not requirement_validated: continue if requirements and not requirement_validated: continue
if not plugin.load(): if not plugin.load():
print 'Plugin %s load failed' % (plugin_path) logger.error('Plugin %s load failed' % (plugin_path))
continue continue
print '\tRegister %s' % (plugin_path) logger.info('\tRegister %s' % (plugin_path))
cache_plugins[plugin_path] = plugin cache_plugins[plugin_path] = plugin
except Exception as e: except Exception as e:
print 'Error loading %s => %s' % (plugin_path, e) logger.exception('Error loading %s => %s' % (plugin_path, e))
traceback.print_exc()
return cache_plugins return cache_plugins

57
iwla.py
View File

@ -9,6 +9,7 @@ import pickle
import gzip import gzip
import importlib import importlib
import argparse import argparse
import logging
from calendar import monthrange from calendar import monthrange
from datetime import date from datetime import date
@ -108,9 +109,7 @@ class IWLA(object):
API_VERSION = 1 API_VERSION = 1
IWLA_VERSION = '0.1' IWLA_VERSION = '0.1'
def __init__(self): def __init__(self, logLevel):
print '==> Start'
self.meta_infos = {} self.meta_infos = {}
self.analyse_started = False self.analyse_started = False
self.current_analysis = {} self.current_analysis = {}
@ -127,6 +126,10 @@ class IWLA(object):
(conf.POST_HOOK_DIRECTORY , conf.post_analysis_hooks), (conf.POST_HOOK_DIRECTORY , conf.post_analysis_hooks),
(conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)] (conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)]
logging.basicConfig(format='%(name)s %(message)s', level=logLevel)
self.logger = logging.getLogger(self.__class__.__name__)
self.logger.info('==> Start')
def getVersion(self): def getVersion(self):
return IWLA.IWLA_VERSION return IWLA.IWLA_VERSION
@ -223,13 +226,13 @@ class IWLA(object):
return None return None
def _callPlugins(self, target_root, *args): def _callPlugins(self, target_root, *args):
print '==> Call plugins (%s)' % target_root self.logger.info('==> Call plugins (%s)' % (target_root))
for (root, plugins) in self.plugins: for (root, plugins) in self.plugins:
if root != target_root: continue if root != target_root: continue
for p in plugins: for p in plugins:
mod = self.cache_plugins.get(root + '.' + p, None) mod = self.cache_plugins.get(root + '.' + p, None)
if mod: if mod:
print '\t%s' % (p) self.logger.info('\t%s' % (p))
mod.hook(*args) mod.hook(*args)
def isPage(self, request): def isPage(self, request):
@ -299,7 +302,7 @@ class IWLA(object):
if 'extract_parameters' in d.keys(): if 'extract_parameters' in d.keys():
hit['extract_request']['extract_parameters'] = d['extract_parameters'] hit['extract_request']['extract_parameters'] = d['extract_parameters']
else: else:
print "Bad request extraction " + hit['request'] self.logger.warning("Bad request extraction %s" % (hit['request']))
return False return False
if hit['http_referer']: if hit['http_referer']:
@ -337,7 +340,7 @@ class IWLA(object):
cur_time = self.meta_infos['last_time'] cur_time = self.meta_infos['last_time']
title = 'Stats %d/%02d' % (cur_time.tm_year, cur_time.tm_mon) title = 'Stats %d/%02d' % (cur_time.tm_year, cur_time.tm_mon)
filename = self.getCurDisplayPath('index.html') filename = self.getCurDisplayPath('index.html')
print '==> Generate display (%s)' % (filename) self.logger.info('==> Generate display (%s)' % (filename))
page = self.display.createPage(title, filename, conf.css_path) page = self.display.createPage(title, filename, conf.css_path)
_, nb_month_days = monthrange(cur_time.tm_year, cur_time.tm_mon) _, nb_month_days = monthrange(cur_time.tm_year, cur_time.tm_mon)
@ -430,7 +433,8 @@ class IWLA(object):
def _generateDisplayWholeMonthStats(self): def _generateDisplayWholeMonthStats(self):
title = 'Stats for %s' % (conf.domain_name) title = 'Stats for %s' % (conf.domain_name)
filename = 'index.html' filename = 'index.html'
print '==> Generate main page (%s)' % (filename)
self.logger.info('==> Generate main page (%s)' % (filename))
page = self.display.createPage(title, filename, conf.css_path) page = self.display.createPage(title, filename, conf.css_path)
@ -445,7 +449,9 @@ class IWLA(object):
def _compressFile(self, build_time, root, filename): def _compressFile(self, build_time, root, filename):
path = os.path.join(root, filename) path = os.path.join(root, filename)
gz_path = path + '.gz' gz_path = path + '.gz'
#print 'Compress %s => %s' % (path, gz_path)
self.logger.debug('Compress %s => %s' % (path, gz_path))
if not os.path.exists(gz_path) or\ if not os.path.exists(gz_path) or\
os.stat(path).st_mtime > build_time: os.stat(path).st_mtime > build_time:
with open(path, 'rb') as f_in: with open(path, 'rb') as f_in:
@ -492,8 +498,8 @@ class IWLA(object):
duplicated_stats = {k:v for (k,v) in stats.items()} duplicated_stats = {k:v for (k,v) in stats.items()}
cur_time = self.meta_infos['last_time'] cur_time = self.meta_infos['last_time']
print "== Stats for %d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon) self.logger.info("== Stats for %d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon))
print stats self.logger.info(stats)
if not 'month_stats' in self.current_analysis.keys(): if not 'month_stats' in self.current_analysis.keys():
self.current_analysis['month_stats'] = stats self.current_analysis['month_stats'] = stats
@ -517,7 +523,7 @@ class IWLA(object):
if os.path.exists(path): if os.path.exists(path):
os.remove(path) os.remove(path)
print "==> Serialize to %s" % path self.logger.info("==> Serialize to %s" % (path))
self._serialize(self.current_analysis, path) self._serialize(self.current_analysis, path)
# Save month stats # Save month stats
@ -561,9 +567,8 @@ class IWLA(object):
not super_hit['robot']: not super_hit['robot']:
stats['nb_visits'] += 1 stats['nb_visits'] += 1
print "== Stats for %d/%02d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday) self.logger.info("== Stats for %d/%02d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday))
self.logger.info(stats)
print stats
self.current_analysis['days_stats'][cur_time.tm_mday] = stats self.current_analysis['days_stats'][cur_time.tm_mday] = stats
@ -601,12 +606,12 @@ class IWLA(object):
return True return True
def start(self, _file): def start(self, _file):
print '==> Load previous database' self.logger.info('==> Load previous database')
self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta() self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta()
if self.meta_infos['last_time']: if self.meta_infos['last_time']:
print 'Last time' self.logger.info('Last time')
print self.meta_infos['last_time'] self.logger.info(self.meta_infos['last_time'])
self.current_analysis = self._deserialize(self.getDBFilename(self.meta_infos['last_time'])) or self._clearVisits() self.current_analysis = self._deserialize(self.getDBFilename(self.meta_infos['last_time'])) or self._clearVisits()
else: else:
self._clearVisits() self._clearVisits()
@ -615,7 +620,7 @@ class IWLA(object):
self.cache_plugins = preloadPlugins(self.plugins, self) self.cache_plugins = preloadPlugins(self.plugins, self)
print '==> Analysing log' self.logger.info('==> Analysing log')
for l in _file: for l in _file:
# print "line " + l # print "line " + l
@ -626,7 +631,7 @@ class IWLA(object):
if not self._newHit(groups.groupdict()): if not self._newHit(groups.groupdict()):
continue continue
else: else:
print "No match for " + l self.logger.warning("No match for %s" % (l))
#break #break
if self.analyse_started: if self.analyse_started:
@ -635,7 +640,7 @@ class IWLA(object):
del self.meta_infos['start_analysis_time'] del self.meta_infos['start_analysis_time']
self._serialize(self.meta_infos, conf.META_PATH) self._serialize(self.meta_infos, conf.META_PATH)
else: else:
print '==> Analyse not started : nothing new' self.logger.info('==> Analyse not started : nothing new')
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Intelligent Web Log Analyzer') parser = argparse.ArgumentParser(description='Intelligent Web Log Analyzer')
@ -651,14 +656,22 @@ if __name__ == '__main__':
parser.add_argument('-f', '--file', dest='file', parser.add_argument('-f', '--file', dest='file',
help='Analyse this log file') help='Analyse this log file')
parser.add_argument('-d', '--log-level', dest='loglevel',
default='INFO', type=str,
help='Loglevel in %s, default : %s' % (['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 'INFO'))
args = parser.parse_args() args = parser.parse_args()
if args.clean_output: if args.clean_output:
if os.path.exists(conf.DB_ROOT): shutil.rmtree(conf.DB_ROOT) if os.path.exists(conf.DB_ROOT): shutil.rmtree(conf.DB_ROOT)
if os.path.exists(conf.DISPLAY_ROOT): shutil.rmtree(conf.DISPLAY_ROOT) if os.path.exists(conf.DISPLAY_ROOT): shutil.rmtree(conf.DISPLAY_ROOT)
iwla = IWLA() loglevel = getattr(logging, args.loglevel.upper(), None)
if not isinstance(loglevel, int):
raise ValueError('Invalid log level: %s' % (args.loglevel))
iwla = IWLA(loglevel)
required_conf = ['analyzed_filename', 'domain_name'] required_conf = ['analyzed_filename', 'domain_name']
if not validConfRequirements(required_conf, iwla, 'Main Conf'): if not validConfRequirements(required_conf, iwla, 'Main Conf'):
sys.exit(0) sys.exit(0)