Before dictionary rework

This commit is contained in:
Grégory Soutadé 2014-11-19 19:45:41 +01:00
parent 888b481b1d
commit 53452fa4c3
1 changed files with 39 additions and 26 deletions

65
iwla.py
View File

@ -7,13 +7,14 @@ import glob
import imp
import pickle
import gzip
from robots import awstats_robots;
print '==> Start'
meta_visit = {'last_time':None}
analyse_started = False
current_visit = {}
current_visits = {}
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
'"$request" $status $body_bytes_sent ' +\
@ -41,7 +42,11 @@ print '==> Generating robot dictionary'
awstats_robots = map(lambda (x) : re.compile(x, re.IGNORECASE), awstats_robots)
def get_db_filename(time):
def createEmptyVisits():
visits = {'days_stats' : {}, 'month_stats' : {}, 'visits' : {}}
return visits
def getDBFilename(time):
return (DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, DB_FILENAME)
def serialize(obj, filename):
@ -64,7 +69,10 @@ def deserialize(filename):
return pickle.load(f)
return None
def call_plugins(path, *kwargs):
def createEmptyVisits():
pass
def callPlugins(path, *kwargs):
print '==> Call plugins (%s)' % path
plugins = glob.glob(path)
plugins.sort()
@ -81,7 +89,7 @@ def isPage(request):
return False
def appendHit(hit):
super_hit = current_visit[hit['remote_addr']]
super_hit = current_visits[hit['remote_addr']]
super_hit['pages'].append(hit)
super_hit['bandwith'] += int(hit['body_bytes_sent'])
@ -111,7 +119,7 @@ def appendHit(hit):
super_hit[hit_key] += 1
def createUser(hit):
super_hit = current_visit[hit['remote_addr']] = {}
super_hit = current_visits[hit['remote_addr']] = {}
super_hit['viewed_pages'] = 0;
super_hit['viewed_hits'] = 0;
super_hit['not_viewed_pages'] = 0;
@ -127,7 +135,7 @@ def isRobot(hit):
return True
return False
def decode_http_request(hit):
def decodeHTTPRequest(hit):
if not 'request' in hit.keys(): return False
groups = http_request_extracted.match(hit['request'])
@ -149,18 +157,18 @@ def decode_http_request(hit):
referer = hit['extract_referer'] = referer_groups.groupdict()
return True
def decode_time(hit):
def decodeTime(hit):
t = hit['time_local']
hit['time_decoded'] = time.strptime(t, time_format)
def generate_month_stats():
call_plugins(PRE_HOOK_DIRECTORY, current_visit)
def generateMonthStats():
callPlugins(PRE_HOOK_DIRECTORY, current_visits)
valid_visitors = {k: v for (k,v) in current_visit.items() if not current_visit[k]['robot']}
valid_visitors = {k: v for (k,v) in current_visits.items() if not current_visits[k]['robot']}
call_plugins(POST_HOOK_DIRECTORY, valid_visitors)
callPlugins(POST_HOOK_DIRECTORY, valid_visitors)
stats = {}
stats['viewed_bandwidth'] = 0
@ -169,8 +177,8 @@ def generate_month_stats():
stats['viewed_hits'] = 0
stats['pages'] = set()
for k in current_visit.keys():
super_hit = current_visit[k]
for k in current_visits.keys():
super_hit = current_visits[k]
if super_hit['robot']:
stats['not_viewed_bandwidth'] += super_hit['bandwith']
continue
@ -189,27 +197,27 @@ def generate_month_stats():
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
print stats
path = get_db_filename(cur_time)
path = getDBFilename(cur_time)
if os.path.exists(path):
os.remove(path)
print "==> Serialize to %s" % path
serialize(current_visit, path)
serialize(current_visits, path)
def newHit(hit):
global current_visit
global current_visits
global analyse_started
decode_time(hit)
decodeTime(hit)
t = hit['time_decoded']
cur_time = meta_visit['last_time']
if cur_time == None:
current_visit = deserialize(get_db_filename(t))
if not current_visit: current_visit = {}
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
analyse_started = True
else:
if not analyse_started:
@ -217,20 +225,22 @@ def newHit(hit):
return
else:
analyse_started = True
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
if cur_time.tm_mon != t.tm_mon:
generate_month_stats()
current_visit = deserialize(get_db_filename(t))
if not current_visit: current_visit = {}
generateMonthStats()
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
meta_visit['last_time'] = t
if not decode_http_request(hit): return False
if not decodeHTTPRequest(hit): return False
for k in hit.keys():
if hit[k] == '-': hit[k] = ''
remote_addr = hit['remote_addr']
if remote_addr in current_visit.keys():
if remote_addr in current_visits.keys():
appendHit(hit)
else:
createUser(hit)
@ -256,5 +266,8 @@ for l in f:
print "No match " + l
f.close();
generate_month_stats()
serialize(meta_visit, META_PATH)
if analyse_started:
generateMonthStats()
serialize(meta_visit, META_PATH)
else:
print '==> Analyse not started : nothing to do'