Add conf_requires.

Load plugins in order
This commit is contained in:
Grégory Soutadé 2014-11-27 12:34:42 +01:00
parent dd8349ab08
commit 9fbc5448bc
7 changed files with 60 additions and 35 deletions

View File

@ -16,11 +16,11 @@ DB_ROOT = './output/'
DISPLAY_ROOT = './output/' DISPLAY_ROOT = './output/'
pre_analysis_hooks = ['page_to_hit', 'robots'] pre_analysis_hooks = ['page_to_hit', 'robots']
post_analysis_hooks = ['referers', 'top_pages'] post_analysis_hooks = ['referers', 'top_pages', 'top_downloads']
# post_analysis_hooks = ['top_visitors', 'reverse_dns'] # post_analysis_hooks = ['top_visitors', 'reverse_dns']
display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages'] display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads']
reverse_dns_timeout = 0.2 reverse_dns_timeout = 0.2
page_to_hit_conf = [r'^.+/logo/$'] page_to_hit_conf = [r'^.+/logo[/]?$', r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$']
count_hit_only_visitors = False count_hit_only_visitors = True

View File

@ -20,7 +20,10 @@ pre_analysis_hooks = []
post_analysis_hooks = [] post_analysis_hooks = []
display_hooks = [] display_hooks = []
pages_extensions = ['/', 'html', 'xhtml', 'py', 'pl', 'rb', 'php'] pages_extensions = ['/', 'htm', 'html', 'xhtml', 'py', 'pl', 'rb', 'php']
viewed_http_codes = [200, 304] viewed_http_codes = [200, 304]
count_hit_only_visitors = True count_hit_only_visitors = True
multimedia_files = ['png', 'jpg', 'jpeg', 'gif', 'ico',
'css', 'js']

View File

@ -7,6 +7,7 @@ class IPlugin(object):
def __init__(self, iwla): def __init__(self, iwla):
self.iwla = iwla self.iwla = iwla
self.requires = [] self.requires = []
self.conf_requires = []
self.API_VERSION = 1 self.API_VERSION = 1
self.ANALYSIS_CLASS = 'HTTP' self.ANALYSIS_CLASS = 'HTTP'
@ -22,6 +23,9 @@ class IPlugin(object):
def getRequirements(self): def getRequirements(self):
return self.requires return self.requires
def getConfRequirements(self):
return self.conf_requires
def load(self): def load(self):
return True return True
@ -33,8 +37,8 @@ def preloadPlugins(plugins, iwla):
print "==> Preload plugins" print "==> Preload plugins"
for root in plugins.keys(): for (root, plugins_filenames) in plugins:
for plugin_filename in plugins[root]: for plugin_filename in plugins_filenames:
plugin_path = root + '.' + plugin_filename plugin_path = root + '.' + plugin_filename
try: try:
mod = importlib.import_module(plugin_path) mod = importlib.import_module(plugin_path)
@ -57,9 +61,19 @@ def preloadPlugins(plugins, iwla):
#print 'Load plugin %s' % (plugin_name) #print 'Load plugin %s' % (plugin_name)
conf_requirements = plugin.getConfRequirements()
requirement_validated = True
for r in conf_requirements:
conf_value = iwla.getConfValue(r, None)
if conf_value is None:
print '\'%s\' conf value required for %s' % (r, plugin_path)
requirement_validated = False
break
if not requirement_validated: continue
requirements = plugin.getRequirements() requirements = plugin.getRequirements()
if requirements:
requirement_validated = False requirement_validated = False
for r in requirements: for r in requirements:
for (_,p) in cache_plugins.items(): for (_,p) in cache_plugins.items():
@ -67,9 +81,9 @@ def preloadPlugins(plugins, iwla):
requirement_validated = True requirement_validated = True
break break
if not requirement_validated: if not requirement_validated:
print 'Missing requirements for plugin %s' % (plugin_path) print 'Missing requirements \'%s\' for plugin %s' % (r, plugin_path)
break break
if not requirement_validated: continue if requirements and not requirement_validated: continue
if not plugin.load(): if not plugin.load():
print 'Plugin %s load failed' % (plugin_path) print 'Plugin %s load failed' % (plugin_path)
@ -78,7 +92,7 @@ def preloadPlugins(plugins, iwla):
print '\tRegister %s' % (plugin_path) print '\tRegister %s' % (plugin_path)
cache_plugins[plugin_path] = plugin cache_plugins[plugin_path] = plugin
except Exception as e: except Exception as e:
print 'Error loading \'%s\' => %s' % (plugin_path, e) print 'Error loading %s => %s' % (plugin_path, e)
traceback.print_exc() traceback.print_exc()
return cache_plugins return cache_plugins

25
iwla.py
View File

@ -35,11 +35,11 @@ class IWLA(object):
self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)') self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
self.log_re = re.compile(self.log_format_extracted) self.log_re = re.compile(self.log_format_extracted)
self.uri_re = re.compile(r'(?P<extract_uri>[^\?]+)(\?(?P<extract_parameters>.+))?') self.uri_re = re.compile(r'(?P<extract_uri>[^\?]+)(\?(?P<extract_parameters>.+))?')
self.plugins = {conf.PRE_HOOK_DIRECTORY : conf.pre_analysis_hooks, self.plugins = [(conf.PRE_HOOK_DIRECTORY , conf.pre_analysis_hooks),
conf.POST_HOOK_DIRECTORY : conf.post_analysis_hooks, (conf.POST_HOOK_DIRECTORY , conf.post_analysis_hooks),
conf.DISPLAY_HOOK_DIRECTORY : conf.display_hooks} (conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)]
def getConfValue(self, key, default): def getConfValue(self, key, default=None):
if not key in dir(conf): if not key in dir(conf):
return default return default
else: else:
@ -77,7 +77,7 @@ class IWLA(object):
def isValidForCurrentAnalysis(self, request): def isValidForCurrentAnalysis(self, request):
cur_time = self.meta_infos['start_analysis_time'] cur_time = self.meta_infos['start_analysis_time']
return (time.mktime(cur_time) < time.mktime(request['time_decoded'])) return (time.mktime(cur_time) <= time.mktime(request['time_decoded']))
def _clearMeta(self): def _clearMeta(self):
self.meta_infos = { self.meta_infos = {
@ -115,11 +115,14 @@ class IWLA(object):
return pickle.load(f) return pickle.load(f)
return None return None
def _callPlugins(self, root, *args): def _callPlugins(self, target_root, *args):
print '==> Call plugins (%s)' % root print '==> Call plugins (%s)' % target_root
for p in self.plugins[root]: for (root, plugins) in self.plugins:
if root != target_root: continue
for p in plugins:
mod = self.cache_plugins.get(root + '.' + p, None)
if mod:
print '\t%s' % (p) print '\t%s' % (p)
mod = self.cache_plugins[root + '.' + p]
mod.hook(*args) mod.hook(*args)
def isPage(self, request): def isPage(self, request):
@ -143,9 +146,7 @@ class IWLA(object):
request = hit['extract_request'] request = hit['extract_request']
if 'extract_uri' in request.keys(): uri = request.get('extract_uri', request['http_uri'])
uri = request['extract_uri'] = request['http_uri']
uri = request['extract_uri']
hit['is_page'] = self.isPage(uri) hit['is_page'] = self.isPage(uri)

View File

@ -10,6 +10,7 @@ class IWLAPostAnalysisReferers(IPlugin):
def __init__(self, iwla): def __init__(self, iwla):
super(IWLAPostAnalysisReferers, self).__init__(iwla) super(IWLAPostAnalysisReferers, self).__init__(iwla)
self.API_VERSION = 1 self.API_VERSION = 1
self.conf_requires = ['domain_name']
def _getSearchEngine(self, hashid): def _getSearchEngine(self, hashid):
for (k, e) in self.search_engines.items(): for (k, e) in self.search_engines.items():
@ -22,7 +23,7 @@ class IWLAPostAnalysisReferers(IPlugin):
domain_name = self.iwla.getConfValue('domain_name', '') domain_name = self.iwla.getConfValue('domain_name', '')
if not domain_name: if not domain_name:
print 'domain_name required in conf' print 'domain_name must not be empty !'
return False return False
self.own_domain_re = re.compile(r'.*%s.*' % (domain_name)) self.own_domain_re = re.compile(r'.*%s.*' % (domain_name))

View File

@ -4,12 +4,15 @@ from iwla import IWLA
from iplugin import IPlugin from iplugin import IPlugin
class IWLAPostAnalysisReverseDNS(IPlugin): class IWLAPostAnalysisReverseDNS(IPlugin):
DEFAULT_DNS_TIMEOUT = 0.5
def __init__(self, iwla): def __init__(self, iwla):
super(IWLAPostAnalysisReverseDNS, self).__init__(iwla) super(IWLAPostAnalysisReverseDNS, self).__init__(iwla)
self.API_VERSION = 1 self.API_VERSION = 1
def load(self): def load(self):
timeout = self.iwla.getConfValue('reverse_dns_timeout', 0.5) timeout = self.iwla.getConfValue('reverse_dns_timeout',
IWLAPostAnalysisReverseDNS.DEFAULT_DNS_TIMEOUT)
socket.setdefaulttimeout(timeout) socket.setdefaulttimeout(timeout)
return True return True

View File

@ -10,6 +10,7 @@ class IWLAPreAnalysisPageToHit(IPlugin):
def __init__(self, iwla): def __init__(self, iwla):
super(IWLAPreAnalysisPageToHit, self).__init__(iwla) super(IWLAPreAnalysisPageToHit, self).__init__(iwla)
self.API_VERSION = 1 self.API_VERSION = 1
self.conf_requires = ['viewed_http_codes']
def load(self): def load(self):
# Remove logo from indefero # Remove logo from indefero
@ -21,7 +22,8 @@ class IWLAPreAnalysisPageToHit(IPlugin):
def hook(self): def hook(self):
hits = self.iwla.getCurrentVisists() hits = self.iwla.getCurrentVisists()
viewed_http_codes = self.iwla.getConfValue('viewed_http_codes', [200, 304]) viewed_http_codes = self.iwla.getConfValue('viewed_http_codes')
for (k, super_hit) in hits.items(): for (k, super_hit) in hits.items():
if super_hit['robot']: continue if super_hit['robot']: continue
@ -31,6 +33,7 @@ class IWLAPreAnalysisPageToHit(IPlugin):
uri = request['extract_request']['extract_uri'] uri = request['extract_request']['extract_uri']
for regexp in self.regexps: for regexp in self.regexps:
if regexp.match(uri): if regexp.match(uri):
#print '%s is an hit' % uri
request['is_page'] = False request['is_page'] = False
super_hit['viewed_pages'] -= 1 super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1 super_hit['viewed_hits'] += 1