iwla

iwla Commit Details

Date:2014-11-19 21:37:37 (6 years 8 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:b8027fe509f673f13df40268b9bf50815f0afab2
Parents: 53452fa4c3d2cabd945a1ea90f294b40afeb2aeb
Message:Need to separate day and month stats

Changes:
Rhooks/pre_analysis/H002_soutade.py → hooks/pre_analysis/H001_soutade.py
Rhooks/pre_analysis/H001_robot.py → hooks/pre_analysis/H002_robot.py
Miwla.py (12 diffs)

File differences

hooks/pre_analysis/H001_soutade.py
1515
1616
1717
18
19
18
19
20
if not p['is_page']: continue
if logo_re.match(p['extract_request']['extract_uri']):
p['is_page'] = False
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1
if super_hit['viewed_pages']:
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1
iwla.py
1515
1616
1717
18
1819
1920
2021
......
4647
4748
4849
50
51
52
53
4954
5055
5156
......
6974
7075
7176
72
73
74
7577
7678
7779
7880
7981
8082
81
83
84
85
86
87
8288
8389
8490
......
8995
9096
9197
92
98
9399
94100
95101
......
102108
103109
104110
105
106
111
112
113
107114
108115
109116
......
119126
120127
121128
122
129
123130
124131
125132
......
163170
164171
165172
166
167
168
169
170
171
172
173
173174
174175
175176
176177
177178
178
179
180
179181
180
181
182
183
182184
183185
184186
185187
188
189
190
186191
187192
188193
189194
190
191
192
193
195
196
197
198
194199
200
201
202
203
204
205
206
195207
196
197208
198209
199210
211
212
213
214
215
200216
201217
202218
......
205221
206222
207223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
208247
209248
210249
......
217256
218257
219258
220
259
221260
222261
223262
......
226265
227266
228267
229
268
230269
231270
232271
233
272
273
274
234275
235276
236277
......
240281
241282
242283
243
284
244285
245286
246287
......
251292
252293
253294
254
295
296
297
255298
256299
257300
meta_visit = {'last_time':None}
analyse_started = False
current_visits = {}
cache_plugins = {}
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
'"$request" $status $body_bytes_sent ' +\
visits = {'days_stats' : {}, 'month_stats' : {}, 'visits' : {}}
return visits
def createEmptyMeta():
meta = {'last_time':None}
return meta
def getDBFilename(time):
return (DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, DB_FILENAME)
return pickle.load(f)
return None
def createEmptyVisits():
pass
def callPlugins(path, *kwargs):
print '==> Call plugins (%s)' % path
plugins = glob.glob(path)
plugins.sort()
for p in plugins:
print '\t%s' % (p)
mod = imp.load_source('hook', p)
if not p in cache_plugins:
mod = imp.load_source('hook', p)
cache_plugins[p] = mod
else:
mod = cache_plugins[p]
mod.hook(*kwargs)
def isPage(request):
return False
def appendHit(hit):
super_hit = current_visits[hit['remote_addr']]
super_hit = current_visits['visits'][hit['remote_addr']]
super_hit['pages'].append(hit)
super_hit['bandwith'] += int(hit['body_bytes_sent'])
hit['is_page'] = isPage(uri)
# Don't count redirect status
if int(hit['status']) == 302: return
# Don't count 3xx status
status = int(hit['status'])
if status >= 300 and status < 400: return
if super_hit['robot'] or\
not int(hit['status']) in viewed_http_codes:
super_hit[hit_key] += 1
def createUser(hit):
super_hit = current_visits[hit['remote_addr']] = {}
super_hit = current_visits['visits'][hit['remote_addr']] = {}
super_hit['viewed_pages'] = 0;
super_hit['viewed_hits'] = 0;
super_hit['not_viewed_pages'] = 0;
hit['time_decoded'] = time.strptime(t, time_format)
def generateMonthStats():
callPlugins(PRE_HOOK_DIRECTORY, current_visits)
valid_visitors = {k: v for (k,v) in current_visits.items() if not current_visits[k]['robot']}
callPlugins(POST_HOOK_DIRECTORY, valid_visitors)
def generateStats(visits):
stats = {}
stats['viewed_bandwidth'] = 0
stats['not_viewed_bandwidth'] = 0
stats['viewed_pages'] = 0
stats['viewed_hits'] = 0
stats['pages'] = set()
#stats['pages'] = set()
stats['nb_visitors'] = 0
for k in current_visits.keys():
super_hit = current_visits[k]
for k in visits.keys():
super_hit = visits[k]
if super_hit['robot']:
stats['not_viewed_bandwidth'] += super_hit['bandwith']
continue
print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits'])
stats['nb_visitors'] += 1
stats['viewed_bandwidth'] += super_hit['bandwith']
stats['viewed_pages'] += super_hit['viewed_pages']
stats['viewed_hits'] += super_hit['viewed_hits']
for p in super_hit['pages']:
if not p['is_page']: continue
req = p['extract_request']
stats['pages'].add(req['extract_uri'])
# for p in super_hit['pages']:
# if not p['is_page']: continue
# req = p['extract_request']
# stats['pages'].add(req['extract_uri'])
return stats
def generateMonthStats():
visits = current_visits['visits']
stats = generateStats(visits)
cur_time = meta_visit['last_time']
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
print stats
valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
callPlugins(POST_HOOK_DIRECTORY, valid_visitors)
current_visits['month_stats'] = stats
path = getDBFilename(cur_time)
if os.path.exists(path):
os.remove(path)
serialize(current_visits, path)
def generateDayStats():
visits = current_visits['visits']
callPlugins(PRE_HOOK_DIRECTORY, visits)
stats = generateStats(visits)
cur_time = meta_visit['last_time']
print "== Stats for %d/%d/%d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday)
if cur_time.tm_mday > 1:
last_day = cur_time.tm_mday - 1
while last_day:
if last_day in current_visits['days_stats'].keys():
break
last_day -= 1
if last_day:
for k in stats.keys():
stats[k] -= current_visits['days_stats'][last_day][k]
print stats
current_visits['days_stats'][cur_time.tm_mday] = stats
def newHit(hit):
global current_visits
global analyse_started
if cur_time == None:
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
if not current_visits: current_visits = createEmptyVisits()
analyse_started = True
else:
if not analyse_started:
else:
analyse_started = True
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
if not current_visits: current_visits = createEmptyVisits()
if cur_time.tm_mon != t.tm_mon:
generateMonthStats()
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
if not current_visits: current_visits = createEmptyVisits()
elif cur_time.tm_mday != t.tm_mday:
generateDayStats()
meta_visit['last_time'] = t
if hit[k] == '-': hit[k] = ''
remote_addr = hit['remote_addr']
if remote_addr in current_visits.keys():
if remote_addr in current_visits['visits'].keys():
appendHit(hit)
else:
createUser(hit)
meta_visit = deserialize(META_PATH)
if not meta_visit:
meta_visit = {'last_time':None}
meta_visit = createEmptyMeta()
current_visits = createEmptyVisits()
f = open("access.log")
for l in f:

Archive Download the corresponding diff file

Branches

Tags