iwla

iwla Commit Details

Date:2014-11-19 19:45:41 (6 years 8 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:53452fa4c3d2cabd945a1ea90f294b40afeb2aeb
Parents: 888b481b1dd2ee05d6bd087e3bb7122d26e33f77
Message:Before dictionary rework

Changes:
Miwla.py (11 diffs)

File differences

iwla.py
77
88
99
10
1011
1112
1213
1314
1415
1516
16
17
1718
1819
1920
......
4142
4243
4344
44
45
46
47
48
49
4550
4651
4752
......
6469
6570
6671
67
72
73
74
75
6876
6977
7078
......
8189
8290
8391
84
92
8593
8694
8795
......
111119
112120
113121
114
122
115123
116124
117125
......
127135
128136
129137
130
138
131139
132140
133141
......
149157
150158
151159
152
160
153161
154162
155163
156164
157165
158
159
166
167
160168
161
169
162170
163
171
164172
165173
166174
......
169177
170178
171179
172
173
180
181
174182
175183
176184
......
189197
190198
191199
192
200
193201
194202
195203
196204
197205
198
206
199207
200208
201
209
202210
203211
204
212
205213
206214
207215
208216
209217
210218
211
212
219
220
213221
214222
215223
......
217225
218226
219227
228
229
220230
221
222
223
231
232
233
224234
225235
226236
227
237
228238
229239
230240
231241
232242
233
243
234244
235245
236246
......
256266
257267
258268
259
260
269
270
271
272
273
import imp
import pickle
import gzip
from robots import awstats_robots;
print '==> Start'
meta_visit = {'last_time':None}
analyse_started = False
current_visit = {}
current_visits = {}
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
'"$request" $status $body_bytes_sent ' +\
awstats_robots = map(lambda (x) : re.compile(x, re.IGNORECASE), awstats_robots)
def get_db_filename(time):
def createEmptyVisits():
visits = {'days_stats' : {}, 'month_stats' : {}, 'visits' : {}}
return visits
def getDBFilename(time):
return (DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, DB_FILENAME)
def serialize(obj, filename):
return pickle.load(f)
return None
def call_plugins(path, *kwargs):
def createEmptyVisits():
pass
def callPlugins(path, *kwargs):
print '==> Call plugins (%s)' % path
plugins = glob.glob(path)
plugins.sort()
return False
def appendHit(hit):
super_hit = current_visit[hit['remote_addr']]
super_hit = current_visits[hit['remote_addr']]
super_hit['pages'].append(hit)
super_hit['bandwith'] += int(hit['body_bytes_sent'])
super_hit[hit_key] += 1
def createUser(hit):
super_hit = current_visit[hit['remote_addr']] = {}
super_hit = current_visits[hit['remote_addr']] = {}
super_hit['viewed_pages'] = 0;
super_hit['viewed_hits'] = 0;
super_hit['not_viewed_pages'] = 0;
return True
return False
def decode_http_request(hit):
def decodeHTTPRequest(hit):
if not 'request' in hit.keys(): return False
groups = http_request_extracted.match(hit['request'])
referer = hit['extract_referer'] = referer_groups.groupdict()
return True
def decode_time(hit):
def decodeTime(hit):
t = hit['time_local']
hit['time_decoded'] = time.strptime(t, time_format)
def generate_month_stats():
call_plugins(PRE_HOOK_DIRECTORY, current_visit)
def generateMonthStats():
callPlugins(PRE_HOOK_DIRECTORY, current_visits)
valid_visitors = {k: v for (k,v) in current_visit.items() if not current_visit[k]['robot']}
valid_visitors = {k: v for (k,v) in current_visits.items() if not current_visits[k]['robot']}
call_plugins(POST_HOOK_DIRECTORY, valid_visitors)
callPlugins(POST_HOOK_DIRECTORY, valid_visitors)
stats = {}
stats['viewed_bandwidth'] = 0
stats['viewed_hits'] = 0
stats['pages'] = set()
for k in current_visit.keys():
super_hit = current_visit[k]
for k in current_visits.keys():
super_hit = current_visits[k]
if super_hit['robot']:
stats['not_viewed_bandwidth'] += super_hit['bandwith']
continue
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
print stats
path = get_db_filename(cur_time)
path = getDBFilename(cur_time)
if os.path.exists(path):
os.remove(path)
print "==> Serialize to %s" % path
serialize(current_visit, path)
serialize(current_visits, path)
def newHit(hit):
global current_visit
global current_visits
global analyse_started
decode_time(hit)
decodeTime(hit)
t = hit['time_decoded']
cur_time = meta_visit['last_time']
if cur_time == None:
current_visit = deserialize(get_db_filename(t))
if not current_visit: current_visit = {}
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
analyse_started = True
else:
if not analyse_started:
return
else:
analyse_started = True
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
if cur_time.tm_mon != t.tm_mon:
generate_month_stats()
current_visit = deserialize(get_db_filename(t))
if not current_visit: current_visit = {}
generateMonthStats()
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
meta_visit['last_time'] = t
if not decode_http_request(hit): return False
if not decodeHTTPRequest(hit): return False
for k in hit.keys():
if hit[k] == '-': hit[k] = ''
remote_addr = hit['remote_addr']
if remote_addr in current_visit.keys():
if remote_addr in current_visits.keys():
appendHit(hit)
else:
createUser(hit)
print "No match " + l
f.close();
generate_month_stats()
serialize(meta_visit, META_PATH)
if analyse_started:
generateMonthStats()
serialize(meta_visit, META_PATH)
else:
print '==> Analyse not started : nothing to do'

Archive Download the corresponding diff file

Branches

Tags