iwla

iwla Commit Details

Date:2014-11-26 22:03:19 (6 years 7 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:5e965f4cc10bf41ad9791d69ff76a60fbf308ac6
Parents: f8a48a71444da17df8ceb8bad24985ca97a82f1f
Message:Add top_pages plugin

Changes:
Aplugins/display/top_pages.py (full)
Aplugins/post_analysis/top_pages.py (full)
Mconf.py (1 diff)
Miwla.py (1 diff)
Mplugins/post_analysis/referers.py (1 diff)

File differences

conf.py
1616
1717
1818
19
19
2020
21
21
2222
2323
2424
DISPLAY_ROOT = './output/'
pre_analysis_hooks = ['page_to_hit', 'robots']
post_analysis_hooks = ['referers']
post_analysis_hooks = ['referers', 'top_pages']
# post_analysis_hooks = ['top_visitors', 'reverse_dns']
display_hooks = ['top_visitors', 'all_visits', 'referers']
display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages']
reverse_dns_timeout = 0.2
page_to_hit_conf = [r'^.+/logo/$']
iwla.py
140140
141141
142142
143
144
145
143
144
146145
147146
148147
request = hit['extract_request']
if 'extract_uri' in request.keys():
uri = request['extract_uri']
else:
uri = request['http_uri']
uri = request['extract_uri'] = request['http_uri']
uri = request['extract_uri']
hit['is_page'] = self.isPage(uri)
plugins/display/top_pages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import time
from iwla import IWLA
from iplugin import IPlugin
from display import *
class IWLADisplayTopPages(IPlugin):
def __init__(self, iwla):
super(IWLADisplayTopPages, self).__init__(iwla)
self.API_VERSION = 1
self.requires = ['IWLAPostAnalysisTopPages']
def hook(self):
top_pages = self.iwla.getMonthStats()['top_pages']
top_pages = sorted(top_pages.items(), key=lambda t: t[1], reverse=True)
index = self.iwla.getDisplayIndex()
table = DisplayHTMLBlockTable('Top Pages', ['URI', 'Entrance'])
for (uri, entrance) in top_pages[:10]:
table.appendRow([uri, entrance])
index.appendBlock(table)
cur_time = self.iwla.getCurTime()
title = time.strftime('Top Pages - %B %Y', cur_time)
filename = 'top_pages_%d.html' % (cur_time.tm_mon)
path = '%d/%s' % (cur_time.tm_year, filename)
page = DisplayHTMLPage(title, path)
table = DisplayHTMLBlockTable('Top Pages', ['URI', 'Entrance'])
for (uri, entrance) in top_pages:
table.appendRow([uri, entrance])
page.appendBlock(table)
display = self.iwla.getDisplay()
display.addPage(page)
block = DisplayHTMLRawBlock()
block.setRawHTML('<a href=\'%s\'>All pages</a>' % (filename))
index.appendBlock(block)
plugins/post_analysis/referers.py
7070
7171
7272
73
7374
7475
7576
start_time = time.mktime(start_time)
stats = self.iwla.getCurrentVisists()
month_stats = self.iwla.getMonthStats()
referers = month_stats.get('referers', {})
robots_referers = month_stats.get('robots_referers', {})
search_engine_referers = month_stats.get('search_engine_referers', {})
plugins/post_analysis/top_pages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import time
import re
from iwla import IWLA
from iplugin import IPlugin
class IWLAPostAnalysisTopPages(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisTopPages, self).__init__(iwla)
self.API_VERSION = 1
def load(self):
self.index_re = re.compile(r'/index.*')
return True
def hook(self):
start_time = self.iwla.getStartAnalysisTime()
start_time = time.mktime(start_time)
stats = self.iwla.getCurrentVisists()
month_stats = self.iwla.getMonthStats()
top_pages = month_stats.get('top_pages', {})
for (k, super_hit) in stats.items():
if super_hit['robot']: continue
for r in super_hit['requests']:
if not r['is_page']: continue
if time.mktime(r['time_decoded']) < start_time: continue
uri = r['extract_request']['extract_uri']
if self.index_re.match(uri):
uri = '/'
uri = "%s%s" % (r.get('server_name', ''), uri)
if not uri in top_pages.keys():
top_pages[uri] = 1
else:
top_pages[uri] += 1
month_stats['top_pages'] = top_pages

Archive Download the corresponding diff file

Branches

Tags