iwla

iwla Commit Details

Date:2014-11-24 17:13:59 (6 years 8 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:21a95cc2fab7a3ff5d48ee09ccaef59f8f7e0ff6
Parents: 670f0249053a9a265fff0c80ffadee6100e5fb1c
Message:Rework plugins with classes

Changes:
Dplugins/pre_analysis/H001_robot.py (full)
Dplugins/pre_analysis/H002_soutade.py (full)
Aiplugin.py (full)
Aplugins/pre_analysis/robots.py (full)
Aplugins/pre_analysis/soutade.py (full)
Mconf.py (1 diff)
Miwla.py (2 diffs)
Mplugins/display/top_visitors.py (1 diff)
Mplugins/post_analysis/reverse_dns.py (1 diff)
Mplugins/post_analysis/top_visitors.py (1 diff)

File differences

conf.py
1111
1212
1313
14
14
1515
1616
1717
DB_ROOT = './output/'
DISPLAY_ROOT = './output/'
pre_analysis_hooks = ['H002_soutade', 'H001_robot']
pre_analysis_hooks = ['soutade', 'robots']
post_analysis_hooks = ['top_visitors', 'reverse_dns']
display_hooks = ['top_visitors']
iplugin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import importlib
import inspect
import traceback
class IPlugin(object):
def __init__(self, iwla):
self.iwla = iwla
self.requires = []
self.API_VERSION = 1
self.ANALYSIS_CLASS = 'HTTP'
def isValid(self, analysis_class, api_version):
if analysis_class != self.ANALYSIS_CLASS: return False
# For now there is only version 1
if self.API_VERSION != api_version:
return False
return True
def getRequirements(self):
return self.requires
def load(self):
return True
def hook(self, iwla):
pass
def preloadPlugins(plugins, iwla):
cache_plugins = {}
for root in plugins.keys():
for plugin_filename in plugins[root]:
plugin_path = root + '.' + plugin_filename
try:
mod = importlib.import_module(plugin_path)
classes = [c for _,c in inspect.getmembers(mod)\
if inspect.isclass(c) and \
issubclass(c, IPlugin) and \
c.__name__ != 'IPlugin'
]
if not classes:
print 'No plugin defined in %s' % (plugin_path)
continue
plugin = classes[0](iwla)
plugin_name = plugin.__class__.__name__
if not plugin.isValid(iwla.ANALYSIS_CLASS, iwla.API_VERSION):
#print 'Plugin not valid %s' % (plugin_filename)
continue
#print 'Load plugin %s' % (plugin_name)
requirements = plugin.getRequirements()
if requirements:
requirement_validated = False
for r in requirements:
for (_,p) in cache_plugins.items():
if p.__class__.__name__ == r:
requirement_validated = True
break
if not requirement_validated:
print 'Missing requirements for plugin %s' % (plugin_path)
break
if not requirement_validated: continue
if not plugin.load():
print 'Plugin %s load failed' % (plugin_path)
continue
print '\tRegister %s' % (plugin_path)
cache_plugins[plugin_path] = plugin
except Exception as e:
print 'Error loading \'%s\' => %s' % (plugin_path, e)
traceback.print_exc()
return cache_plugins
iwla.py
1010
1111
1212
13
1314
1415
1516
......
4041
4142
4243
44
45
4346
4447
4548
46
4749
4850
49
50
51
52
53
54
5551
5652
5753
import gzip
import importlib
from iplugin import *
from display import *
from default_conf import *
DISPLAY_HOOK_DIRECTORY : display_hooks}
def _preloadPlugins(self):
self.cache_plugins = preloadPlugins(self.plugins, self)
return
ret = True
for root in self.plugins.keys():
for plugin_name in self.plugins[root]:
#p = root + '/' + plugin_name
p = root + '.' + plugin_name
try:
# fp, pathname, description = imp.find_module(plugin_name, [root])
# self.cache_plugins[p] = imp.load_module(p, fp, pathname, description)
#p = 'plugins.display.top_visitors'
#sys.path.append(root)
#self.cache_plugins[p] = importlib.import_module(plugin_name, root)
#sys.path.remove(root)
self.cache_plugins[p] = importlib.import_module(p)
mod = self.cache_plugins[p]
infos = mod.get_plugins_infos()
plugins/display/top_visitors.py
11
2
3
4
5
62
7
8
9
10
11
12
13
14
15
16
3
4
5
176
18
19
7
8
9
10
11
2012
21
22
23
24
13
14
2515
26
27
28
29
30
31
32
33
34
35
36
37
16
17
18
19
20
21
22
23
24
25
26
27
import time
from display import *
PLUGIN_CLASS = 'HTTP'
API_VERSION = 1
def get_plugins_infos():
infos = {
'class' : PLUGIN_CLASS,
'min_version' : API_VERSION,
'max_version' : -1
}
return infos
def load():
return True
from iwla import IWLA
from iplugin import IPlugin
from display import *
def hook(iwla):
stats = iwla.getMonthStats()
class IWLADisplayTopVisitors(IPlugin):
def __init__(self, iwla):
super(IWLADisplayTopVisitors, self).__init__(iwla)
self.API_VERSION = 1
self.requires = ['IWLAPostAnalysisTopVisitors']
top_visitors = stats.get('top_visitors', None)
if not top_visitors:
print 'Top visitors post analysis plugin not installed'
return
def hook(self, iwla):
stats = iwla.getMonthStats()
index = iwla.getDisplayIndex()
table = DisplayHTMLBlockTable('Top visitors', ['Host', 'Pages', 'Hits', 'Bandwidth', 'Last seen'])
for super_hit in top_visitors:
row = [
super_hit['remote_addr'],
super_hit['viewed_pages'],
super_hit['viewed_hits'],
bytesToStr(super_hit['bandwidth']),
time.asctime(super_hit['last_access'])
]
table.appendRow(row)
index.appendBlock(table)
index = iwla.getDisplayIndex()
table = DisplayHTMLBlockTable('Top visitors', ['Host', 'Pages', 'Hits', 'Bandwidth', 'Last seen'])
for super_hit in stats['top_visitors']:
row = [
super_hit['remote_addr'],
super_hit['viewed_pages'],
super_hit['viewed_hits'],
bytesToStr(super_hit['bandwidth']),
time.asctime(super_hit['last_access'])
]
table.appendRow(row)
index.appendBlock(table)
plugins/post_analysis/reverse_dns.py
1
21
2
33
4
5
4
5
6
7
68
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
9
10
11
12
13
14
15
16
17
18
19
3020
import socket
from iwla import IWLA
from iplugin import IPlugin
PLUGIN_CLASS = 'HTTP'
API_VERSION = 1
class IWLAPostAnalysisReverseDNS(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisReverseDNS, self).__init__(iwla)
self.API_VERSION = 1
def get_plugins_infos():
infos = {
'class' : PLUGIN_CLASS,
'min_version' : API_VERSION,
'max_version' : -1
}
return infos
def load():
socket.setdefaulttimeout(0.5)
return True
def hook(iwla):
hits = iwla.getValidVisitors()
for (k, hit) in hits.items():
if hit.get('dns_analysed', False): continue
try:
name, _, _ = socket.gethostbyaddr(k)
hit['remote_addr'] = name
except:
pass
finally:
hit['dns_analysed'] = True
def hook(self, iwla):
hits = iwla.getValidVisitors()
for (k, hit) in hits.items():
if hit.get('dns_analysed', False): continue
try:
name, _, _ = socket.gethostbyaddr(k)
hit['remote_addr'] = name
except:
pass
finally:
hit['dns_analysed'] = True
plugins/post_analysis/top_visitors.py
11
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
2
3
4
5
6
7
8
9
10
11
12
13
14
2315
from iwla import IWLA
PLUGIN_CLASS = 'HTTP'
API_VERSION = 1
def get_plugins_infos():
infos = {
'class' : PLUGIN_CLASS,
'min_version' : API_VERSION,
'max_version' : -1
}
return infos
def load():
return True
def hook(iwla):
hits = iwla.getValidVisitors()
stats = iwla.getMonthStats()
top_bandwidth = [(k,hits[k]['bandwidth']) for (k,v) in hits.items()]
top_bandwidth = sorted(top_bandwidth, key=lambda t: t[1], reverse=True)
stats['top_visitors'] = [hits[h[0]] for h in top_bandwidth[:10]]
from iplugin import IPlugin
class IWLAPostAnalysisTopVisitors(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisTopVisitors, self).__init__(iwla)
self.API_VERSION = 1
def hook(self, iwla):
hits = iwla.getValidVisitors()
stats = iwla.getMonthStats()
top_bandwidth = [(k,hits[k]['bandwidth']) for (k,v) in hits.items()]
top_bandwidth = sorted(top_bandwidth, key=lambda t: t[1], reverse=True)
stats['top_visitors'] = [hits[h[0]] for h in top_bandwidth[:10]]
plugins/pre_analysis/H001_robot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import re
from iwla import IWLA
from awstats_robots_data import awstats_robots
PLUGIN_CLASS = 'HTTP'
API_VERSION = 1
def get_plugins_infos():
infos = {'class' : PLUGIN_CLASS,
'min_version' : API_VERSION,
'max_version' : -1}
return infos
def load():
global awstats_robots
print '==> Generating robot dictionary'
awstats_robots = map(lambda (x) : re.compile(x, re.IGNORECASE), awstats_robots)
return True
# Basic rule to detect robots
def hook(iwla):
hits = iwla.getCurrentVisists()
for k in hits.keys():
super_hit = hits[k]
if super_hit['robot']: continue
isRobot = False
referers = 0
first_page = super_hit['requests'][0]
if first_page['time_decoded'].tm_mday == super_hit['last_access'].tm_mday:
for r in awstats_robots:
if r.match(first_page['http_user_agent']):
super_hit['robot'] = 1
continue
# 1) no pages view --> robot
if not super_hit['viewed_pages']:
super_hit['robot'] = 1
continue
# 2) pages without hit --> robot
if not super_hit['viewed_hits']:
super_hit['robot'] = 1
continue
for hit in super_hit['requests']:
# 3) /robots.txt read
if hit['extract_request']['http_uri'] == '/robots.txt':
isRobot = True
break
# 4) Any referer for hits
if not hit['is_page'] and hit['http_referer']:
referers += 1
if isRobot:
super_hit['robot'] = 1
continue
if super_hit['viewed_hits'] and not referers:
super_hit['robot'] = 1
continue
plugins/pre_analysis/H002_soutade.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import re
from iwla import IWLA
# Remove logo from indefero
logo_re = re.compile(r'^.+/logo/$')
PLUGIN_CLASS = 'HTTP'
API_VERSION = 1
def get_plugins_infos():
infos = {
'class' : PLUGIN_CLASS,
'min_version' : API_VERSION,
'max_version' : -1
}
return infos
def load():
return True
# Basic rule to detect robots
def hook(iwla):
hits = iwla.getCurrentVisists()
for k in hits.keys():
super_hit = hits[k]
if super_hit['robot']: continue
for p in super_hit['requests']:
if not p['is_page']: continue
if int(p['status']) != 200: continue
if p['time_decoded'].tm_mday != super_hit['last_access'].tm_mday: continue
if logo_re.match(p['extract_request']['extract_uri']):
p['is_page'] = False
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1
plugins/pre_analysis/robots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import re
from iwla import IWLA
from iplugin import IPlugin
from awstats_robots_data import awstats_robots
class IWLAPreAnalysisRobots(IPlugin):
def __init__(self, iwla):
super(IWLAPreAnalysisRobots, self).__init__(iwla)
self.API_VERSION = 1
def load(self):
global awstats_robots
self.awstats_robots = map(lambda (x) : re.compile(x, re.IGNORECASE), awstats_robots)
return True
# Basic rule to detect robots
def hook(self, iwla):
hits = iwla.getCurrentVisists()
for k in hits.keys():
super_hit = hits[k]
if super_hit['robot']: continue
isRobot = False
referers = 0
first_page = super_hit['requests'][0]
if first_page['time_decoded'].tm_mday == super_hit['last_access'].tm_mday:
for r in self.awstats_robots:
if r.match(first_page['http_user_agent']):
super_hit['robot'] = 1
continue
# 1) no pages view --> robot
if not super_hit['viewed_pages']:
super_hit['robot'] = 1
continue
# 2) pages without hit --> robot
if not super_hit['viewed_hits']:
super_hit['robot'] = 1
continue
for hit in super_hit['requests']:
# 3) /robots.txt read
if hit['extract_request']['http_uri'] == '/robots.txt':
isRobot = True
break
# 4) Any referer for hits
if not hit['is_page'] and hit['http_referer']:
referers += 1
if isRobot:
super_hit['robot'] = 1
continue
if super_hit['viewed_hits'] and not referers:
super_hit['robot'] = 1
continue
plugins/pre_analysis/soutade.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import re
from iwla import IWLA
from iplugin import IPlugin
# Basic rule to detect robots
class IWLAPreAnalysisSoutade(IPlugin):
def __init__(self, iwla):
super(IWLAPreAnalysisSoutade, self).__init__(iwla)
self.API_VERSION = 1
def load(self):
# Remove logo from indefero
self.logo_re = re.compile(r'^.+/logo/$')
return True
def hook(self, iwla):
hits = iwla.getCurrentVisists()
for k in hits.keys():
super_hit = hits[k]
if super_hit['robot']: continue
for p in super_hit['requests']:
if not p['is_page']: continue
if int(p['status']) != 200: continue
if p['time_decoded'].tm_mday != super_hit['last_access'].tm_mday: continue
if self.logo_re.match(p['extract_request']['extract_uri']):
p['is_page'] = False
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1

Archive Download the corresponding diff file

Branches

Tags