iwla

iwla Commit Details

Date:2014-11-27 13:46:58 (6 years 8 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:c87ddfb1aabdbbefd6dca0f01bd0aee3a6fd208a
Parents: 5ccc63c7ae33101cb4185b3217e5b0114f186d29
Message:Add hit_to_page_conf in addition to page_to_hit_conf

Changes:
Mconf.py (1 diff)
Mplugins/display/top_downloads.py (1 diff)
Mplugins/display/top_pages.py (1 diff)
Mplugins/post_analysis/top_pages.py (1 diff)
Mplugins/pre_analysis/page_to_hit.py (2 diffs)

File differences

conf.py
1616
1717
1818
19
19
2020
21
21
2222
2323
24
24
25
2526
2627
DISPLAY_ROOT = './output/'
pre_analysis_hooks = ['page_to_hit', 'robots']
post_analysis_hooks = ['referers', 'top_pages', 'top_downloads']
post_analysis_hooks = ['referers', 'top_pages', 'top_downloads', 'top_hits']
# post_analysis_hooks = ['top_visitors', 'reverse_dns']
display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads']
display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads', 'top_hits']
reverse_dns_timeout = 0.2
page_to_hit_conf = [r'^.+/logo[/]?$', r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$']
page_to_hit_conf = [r'^.+/logo[/]?$']
hit_to_page_conf = [r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$']
count_hit_only_visitors = True
plugins/display/top_downloads.py
2929
3030
3131
32
32
3333
3434
3535
path = '%d/%s' % (cur_time.tm_year, filename)
page = DisplayHTMLPage(title, path)
table = DisplayHTMLBlockTable('Top Downloads', ['URI', 'Hit'])
table = DisplayHTMLBlockTable('All Downloads', ['URI', 'Hit'])
for (uri, entrance) in top_downloads:
table.appendRow([uri, entrance])
page.appendBlock(table)
plugins/display/top_pages.py
2323
2424
2525
26
26
2727
2828
2929
index.appendBlock(table)
cur_time = self.iwla.getCurTime()
title = time.strftime('Top Pages - %B %Y', cur_time)
title = time.strftime('All Pages - %B %Y', cur_time)
filename = 'top_pages_%d.html' % (cur_time.tm_mon)
path = '%d/%s' % (cur_time.tm_year, filename)
plugins/post_analysis/top_pages.py
2323
2424
2525
26
26
27
28
2729
2830
2931
for r in super_hit['requests']:
if not r['is_page']: continue
if not self.iwla.isValidForCurrentAnalysis(r): continue
if not self.iwla.isValidForCurrentAnalysis(r) or\
not self.iwla.hasBeenViewed(r):
continue
uri = r['extract_request']['extract_uri']
if self.index_re.match(uri):
plugins/pre_analysis/page_to_hit.py
1212
1313
1414
15
16
17
18
15
16
17
18
19
20
21
22
23
1924
2025
2126
......
2934
3035
3136
32
37
3338
34
35
36
37
38
39
40
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
self.API_VERSION = 1
def load(self):
# Remove logo from indefero
self.regexps = self.iwla.getConfValue('page_to_hit_conf', [])
if not self.regexps: return False
self.regexps = map(lambda(r): re.compile(r), self.regexps)
# Page to hit
self.ph_regexps = self.iwla.getConfValue('page_to_hit_conf', [])
if not self.ph_regexps: return False
self.ph_regexps = map(lambda(r): re.compile(r), self.ph_regexps)
# Hit to page
self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', [])
if not self.hp_regexps: return False
self.hp_regexps = map(lambda(r): re.compile(r), self.hp_regexps)
return True
if not self.iwla.isValidForCurrentAnalysis(request) or\
not self.iwla.hasBeenViewed(request):
continue
if not request['is_page']: continue
uri = request['extract_request']['extract_uri']
for regexp in self.regexps:
if regexp.match(uri):
#print '%s is an hit' % uri
request['is_page'] = False
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1
break
if request['is_page']:
# Page to hit
for regexp in self.ph_regexps:
if regexp.match(uri):
#print '%s is a hit' % (uri )
request['is_page'] = False
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1
break
else:
# Hit to page
for regexp in self.hp_regexps:
if regexp.match(uri):
#print '%s is a page' % (uri )
request['is_page'] = True
super_hit['viewed_pages'] += 1
super_hit['viewed_hits'] -= 1
break

Archive Download the corresponding diff file

Branches

Tags