iwla

iwla Commit Details

Date:2014-12-09 16:54:02 (6 years 7 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:751a9b3fae17a5caf76933e65415573e0b346b93
Parents: 43e5e97c5a810db3cd06e000be1732f52f1d58bf
Message:Start big comments (post analysis / referers)

Changes:
Mdisplay.py (2 diffs)
Miplugin.py (1 diff)
Miwla.py (4 diffs)
Mplugins/post_analysis/referers.py (1 diff)
Mplugins/pre_analysis/page_to_hit.py (1 diff)
Mplugins/pre_analysis/robots.py (1 diff)

File differences

display.py
11
22
33
4
5
6
7
48
59
610
......
310314
311315
312316
317
318
319
320
313321
314322
315323
import os
import codecs
#
# Create output HTML files
#
class DisplayHTMLRaw(object):
def __init__(self, iwla, html=u''):
for page in self.pages:
page.build(root)
#
# Global functions
#
def bytesToStr(bytes):
suffixes = [u'', u' kB', u' MB', u' GB', u' TB']
iplugin.py
22
33
44
5
6
7
8
59
610
711
import inspect
import traceback
#
# IWLA Plugin interface
#
class IPlugin(object):
def __init__(self, iwla):
iwla.py
2020
2121
2222
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
23101
24102
25103
......
105183
106184
107185
108
186
187
109188
110189
111190
......
163242
164243
165244
166
167245
168246
169247
......
206284
207285
208286
209
210287
211288
212289
from iplugin import *
from display import *
#
# Main class IWLA
# Parse Log, compute them, call plugins and produce output
# For now, only HTTP log are valid
#
# Plugin requirements : None
#
# Conf values needed :
# analyzed_filename
# domain_name
#
# Output files :
# DB_ROOT/meta.db
# DB_ROOT/year/month/iwla.db
# OUTPUT_ROOT/index.html
# OUTPUT_ROOT/year/month/index.html
#
# Statistics creation :
#
# meta =>
# last_time
# start_analysis_time
# stats =>
# year =>
# month =>
# viewed_bandwidth
# not_viewed_bandwidth
# viewed_pages
# viewed_hits
# nb_visitors
#
# month_stats :
# viewed_bandwidth
# not_viewed_bandwidth
# viewed_pages
# viewed_hits
# nb_visitors
#
# days_stats :
# day =>
# viewed_bandwidth
# not_viewed_bandwidth
# viewed_pages
# viewed_hits
# nb_visitors
#
# visits :
# remote_addr =>
# remote_addr
# remote_ip
# viewed_pages
# viewed_hits
# not_viewed_pages
# not_viewed_hits
# bandwidth
# last_access
# requests =>
# [fields_from_format_log]
# extract_request =>
# extract_uri
# extract_parameters*
# extract_referer* =>
# extract_uri
# extract_parameters*
# robot
# hit_only
# is_page
#
# valid_visitors:
# month_stats without robot and hit only visitors (if not conf.count_hit_only_visitors)
#
# Statistics update :
# None
#
# Statistics deletion :
# None
#
class IWLA(object):
ANALYSIS_CLASS = 'HTTP'
def _clearMeta(self):
self.meta_infos = {
'last_time' : None
'last_time' : None,
'start_analysis_time' : None
}
return self.meta_infos
if not remote_addr in self.current_analysis['visits'].keys():
self._createVisitor(hit)
return
super_hit = self.current_analysis['visits'][remote_addr]
super_hit['requests'].append(hit)
super_hit['requests'] = []
super_hit['robot'] = False
super_hit['hit_only'] = 0
self._appendHit(hit)
def _decodeHTTPRequest(self, hit):
if not 'request' in hit.keys(): return False
plugins/post_analysis/referers.py
66
77
88
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
935
1036
1137
import awstats_data
#
# Post analysis hook
#
# Extract referers and key phrases from requests
#
# Plugin requirements : None
#
# Conf values needed :
# page_to_hit_conf*
# hit_to_page_conf*
#
# Output files :
# None
#
# Statistics creation :
# None
#
# Statistics update :
# visits :
# remote_addr =>
# robot
#
# Statistics deletion :
# None
#
class IWLAPostAnalysisReferers(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisReferers, self).__init__(iwla)
plugins/pre_analysis/page_to_hit.py
33
44
55
6
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
730
831
932
from iwla import IWLA
from iplugin import IPlugin
# Basic rule to detect robots
#
# Pre analysis hook
# Change page into hit and hit into page into statistics
#
# Plugin requirements : None
#
# Conf values needed :
# page_to_hit_conf*
# hit_to_page_conf*
#
# Output files :
# None
#
# Statistics creation :
# None
#
# Statistics update :
# visits :
# remote_addr =>
# is_page
#
# Statistics deletion :
# None
#
class IWLAPreAnalysisPageToHit(IPlugin):
plugins/pre_analysis/robots.py
55
66
77
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
834
935
1036
import awstats_data
#
# Pre analysis hook
#
# Filter robots
#
# Plugin requirements : None
#
# Conf values needed :
# page_to_hit_conf*
# hit_to_page_conf*
#
# Output files :
# None
#
# Statistics creation :
# None
#
# Statistics update :
# visits :
# remote_addr =>
# robot
#
# Statistics deletion :
# None
#
class IWLAPreAnalysisRobots(IPlugin):
def __init__(self, iwla):
super(IWLAPreAnalysisRobots, self).__init__(iwla)

Archive Download the corresponding diff file

Branches

Tags