iwla

iwla Git Source Tree

Root/iwla.py

Source at commit 4e02325733e5e8e4f5de2f0046e721f8da7abfff created 6 years 10 months ago.
By Gregory Soutade, Initial commit
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Copyright Grégory Soutadé 2015
5
6# This file is part of iwla
7
8# iwla is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# iwla is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with iwla. If not, see <http://www.gnu.org/licenses/>.
20#
21
22import os
23import shutil
24import sys
25import re
26import time
27import pickle
28import gzip
29import importlib
30import argparse
31import logging
32import gettext
33from calendar import monthrange
34from datetime import date
35
36import default_conf as conf
37import conf as _
38conf.__dict__.update(_.__dict__)
39del _
40
41from iplugin import *
42from display import *
43
44"""
45Main class IWLA
46Parse Log, compute them, call plugins and produce output
47For now, only HTTP log are valid
48
49Plugin requirements :
50 None
51
52Conf values needed :
53 analyzed_filename
54 domain_name
55 locales_path
56 compress_output_files*
57
58Output files :
59 DB_ROOT/meta.db
60 DB_ROOT/year/month/iwla.db
61 OUTPUT_ROOT/index.html
62 OUTPUT_ROOT/year/month/index.html
63
64Statistics creation :
65
66meta :
67 last_time
68 start_analysis_time
69 stats =>
70 year =>
71 month =>
72 viewed_bandwidth
73 not_viewed_bandwidth
74 viewed_pages
75 viewed_hits
76 nb_visits
77 nb_visitors
78
79month_stats :
80 viewed_bandwidth
81 not_viewed_bandwidth
82 viewed_pages
83 viewed_hits
84 nb_visits
85
86days_stats :
87 day =>
88 viewed_bandwidth
89 not_viewed_bandwidth
90 viewed_pages
91 viewed_hits
92 nb_visits
93 nb_visitors
94
95visits :
96 remote_addr =>
97 remote_addr
98 remote_ip
99 viewed_pages
100 viewed_hits
101 not_viewed_pages
102 not_viewed_hits
103 bandwidth
104 last_access
105 requests =>
106 [fields_from_format_log]
107 extract_request =>
108 extract_uri
109 extract_parameters*
110 extract_referer* =>
111 extract_uri
112 extract_parameters*
113 robot
114 hit_only
115 is_page
116
117valid_visitors:
118 month_stats without robot and hit only visitors (if not conf.count_hit_only_visitors)
119
120Statistics update :
121 None
122
123Statistics deletion :
124 None
125"""
126
127
128class IWLA(object):
129
130 ANALYSIS_CLASS = 'HTTP'
131 API_VERSION = 1
132 IWLA_VERSION = '0.1'
133
134 def __init__(self, logLevel):
135 self.meta_infos = {}
136 self.analyse_started = False
137 self.current_analysis = {}
138 self.cache_plugins = {}
139 self.display = DisplayHTMLBuild(self)
140 self.valid_visitors = None
141
142 self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', conf.log_format)
143 self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted)
144 self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
145 self.log_re = re.compile(self.log_format_extracted)
146 self.uri_re = re.compile(r'(?P<extract_uri>[^\?#]+)(\?(?P<extract_parameters>[^#]+))?(#.*)?')
147 self.domain_name_re = re.compile(r'.*%s' % conf.domain_name)
148 self.plugins = [(conf.PRE_HOOK_DIRECTORY , conf.pre_analysis_hooks),
149 (conf.POST_HOOK_DIRECTORY , conf.post_analysis_hooks),
150 (conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)]
151
152 logging.basicConfig(format='%(name)s %(message)s', level=logLevel)
153 self.logger = logging.getLogger(self.__class__.__name__)
154 self.logger.info('==> Start')
155 try:
156 t = gettext.translation('iwla', localedir=conf.locales_path, languages=[conf.locale], codeset='utf8')
157 self.logger.info('\tUsing locale %s' % (conf.locale))
158 except IOError:
159 t = gettext.NullTranslations()
160 self.logger.info('\tUsing default locale en_EN')
161 self._ = t.ugettext
162
163 def getVersion(self):
164 return IWLA.IWLA_VERSION
165
166 def getConfValue(self, key, default=None):
167 if not key in dir(conf):
168 return default
169 else:
170 return conf.__dict__[key]
171
172 def _clearVisits(self):
173 self.current_analysis = {
174 'days_stats' : {},
175 'month_stats' : {},
176 'visits' : {}
177 }
178 self.valid_visitors = None
179 return self.current_analysis
180
181 def getDaysStats(self):
182 return self.current_analysis['days_stats']
183
184 def getMonthStats(self):
185 return self.current_analysis['month_stats']
186
187 def getCurrentVisists(self):
188 return self.current_analysis['visits']
189
190 def getValidVisitors(self):
191 return self.valid_visitors
192
193 def getDisplay(self):
194 return self.display
195
196 def getCurTime(self):
197 return self.meta_infos['last_time']
198
199 def getStartAnalysisTime(self):
200 return self.meta_infos['start_analysis_time']
201
202 def isValidForCurrentAnalysis(self, request):
203 cur_time = self.meta_infos['start_analysis_time']
204 # Analyse not started
205 if not cur_time: return False
206 return (time.mktime(cur_time) < time.mktime(request['time_decoded']))
207
208 def hasBeenViewed(self, request):
209 return int(request['status']) in conf.viewed_http_codes
210
211 def getCurDisplayPath(self, filename):
212 cur_time = self.meta_infos['last_time']
213 return os.path.join(str(cur_time.tm_year), '%02d' % (cur_time.tm_mon), filename)
214
215 def getResourcesPath(self):
216 return conf.resources_path
217
218 def getCSSPath(self):
219 return conf.css_path
220
221 def _clearMeta(self):
222 self.meta_infos = {
223 'last_time' : None,
224 'start_analysis_time' : None
225 }
226 return self.meta_infos
227
228 def _clearDisplay(self):
229 self.display = DisplayHTMLBuild(self)
230return self.display
231
232 def getDBFilename(self, time):
233 return os.path.join(conf.DB_ROOT, str(time.tm_year), '%02d' % (time.tm_mon), conf.DB_FILENAME)
234
235 def _serialize(self, obj, filename):
236 base = os.path.dirname(filename)
237 if not os.path.exists(base):
238 os.makedirs(base)
239
240 # TODO : remove return
241 #return
242
243 with open(filename + '.tmp', 'wb+') as f, gzip.open(filename, 'w') as fzip:
244 pickle.dump(obj, f)
245 f.seek(0)
246 fzip.write(f.read())
247 os.remove(filename + '.tmp')
248
249 def _deserialize(self, filename):
250 if not os.path.exists(filename):
251 return None
252
253 with gzip.open(filename, 'r') as f:
254 return pickle.load(f)
255 return None
256
257 def _callPlugins(self, target_root, *args):
258 self.logger.info('==> Call plugins (%s)' % (target_root))
259 for (root, plugins) in self.plugins:
260 if root != target_root: continue
261 for p in plugins:
262 mod = self.cache_plugins.get(root + '.' + p, None)
263 if mod:
264 self.logger.info('\t%s' % (p))
265 mod.hook(*args)
266
267 def isPage(self, request):
268 for e in conf.pages_extensions:
269 if request.endswith(e):
270 return True
271
272 return False
273
274 def _appendHit(self, hit):
275 remote_addr = hit['remote_addr']
276
277 if not remote_addr: return
278
279 if not remote_addr in self.current_analysis['visits'].keys():
280 self._createVisitor(hit)
281
282 super_hit = self.current_analysis['visits'][remote_addr]
283 super_hit['requests'].append(hit)
284 super_hit['bandwidth'] += int(hit['body_bytes_sent'])
285 super_hit['last_access'] = self.meta_infos['last_time']
286
287 request = hit['extract_request']
288
289 uri = request.get('extract_uri', request['http_uri'])
290
291 hit['is_page'] = self.isPage(uri)
292
293 if super_hit['robot'] or\
294 not self.hasBeenViewed(hit):
295 page_key = 'not_viewed_pages'
296 hit_key = 'not_viewed_hits'
297 else:
298 page_key = 'viewed_pages'
299 hit_key = 'viewed_hits'
300
301 if hit['is_page']:
302 super_hit[page_key] += 1
303 else:
304 super_hit[hit_key] += 1
305
306 def _createVisitor(self, hit):
307 super_hit = self.current_analysis['visits'][hit['remote_addr']] = {}
308 super_hit['remote_addr'] = hit['remote_addr']
309 super_hit['remote_ip'] = hit['remote_addr']
310 super_hit['viewed_pages'] = 0
311 super_hit['viewed_hits'] = 0
312 super_hit['not_viewed_pages'] = 0
313 super_hit['not_viewed_hits'] = 0
314 super_hit['bandwidth'] = 0
315 super_hit['last_access'] = self.meta_infos['last_time']
316 super_hit['requests'] = []
317 super_hit['robot'] = False
318 super_hit['hit_only'] = 0
319
320 def _decodeHTTPRequest(self, hit):
321 if not 'request' in hit.keys(): return False
322
323 groups = self.http_request_extracted.match(hit['request'])
324
325 if groups:
326 hit['extract_request'] = groups.groupdict()
327 uri_groups = self.uri_re.match(hit['extract_request']['http_uri'])
328 if uri_groups:
329 d = uri_groups.groupdict()
330 hit['extract_request']['extract_uri'] = d['extract_uri']
331 if 'extract_parameters' in d.keys():
332 hit['extract_request']['extract_parameters'] = d['extract_parameters']
333 else:
334 self.logger.warning("Bad request extraction %s" % (hit['request']))
335 return False
336
337 if hit['http_referer']:
338 referer_groups = self.uri_re.match(hit['http_referer'])
339 if referer_groups:
340 hit['extract_referer'] = referer_groups.groupdict()
341 return True
342
343 def _decodeTime(self, hit):
344 try:
345 hit['time_decoded'] = time.strptime(hit['time_local'], conf.time_format)
346 except ValueError, e:
347 if sys.version_info < (3, 2):
348 # Try without UTC value at the end (%z not recognized)
349 gmt_offset_str = hit['time_local'][-5:]
350 gmt_offset_hours = int(gmt_offset_str[1:3])*60*60
351 gmt_offset_minutes = int(gmt_offset_str[3:5])*60
352 gmt_offset = gmt_offset_hours + gmt_offset_minutes
353 hit['time_decoded'] = time.strptime(hit['time_local'][:-6], conf.time_format[:-3])
354 if gmt_offset_str[0] == '+':
355 hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])+gmt_offset)
356 else:
357 hit['time_decoded'] = time.localtime(time.mktime(hit['time_decoded'])-gmt_offset)
358 else:
359 raise e
360 return hit['time_decoded']
361
362 def getDisplayIndex(self):
363 cur_time = self.meta_infos['last_time']
364 filename = self.getCurDisplayPath('index.html')
365
366 return self.display.getPage(filename)
367
368 def _generateDisplayDaysStats(self):
369 cur_time = self.meta_infos['last_time']
370 title = createCurTitle(self, self._('Statistics'))
371 filename = self.getCurDisplayPath('index.html')
372 self.logger.info('==> Generate display (%s)' % (filename))
373 page = self.display.createPage(title, filename, conf.css_path)
374
375 _, nb_month_days = monthrange(cur_time.tm_year, cur_time.tm_mon)
376 days = self.display.createBlock(DisplayHTMLBlockTableWithGraph, self._('By day'), [self._('Day'), self._('Visits'), self._('Pages'), self._('Hits'), self._('Bandwidth'), self._('Not viewed Bandwidth')], None, nb_month_days, range(1,6))
377 days.setColsCSSClass(['', 'iwla_visit', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth'])
378 nb_visits = 0
379 nb_days = 0
380 for i in range(1, nb_month_days+1):
381 day = '%d<br/>%s' % (i, time.strftime('%b', cur_time))
382 full_day = '%02d %s %d' % (i, time.strftime('%b', cur_time), cur_time.tm_year)
383 if i in self.current_analysis['days_stats'].keys():
384 stats = self.current_analysis['days_stats'][i]
385 row = [full_day, stats['nb_visits'], stats['viewed_pages'], stats['viewed_hits'],
386 stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
387 nb_visits += stats['nb_visits']
388 nb_days += 1
389 else:
390 row = [full_day, 0, 0, 0, 0, 0]
391 days.appendRow(row)
392 days.setCellValue(i-1, 4, bytesToStr(row[4]))
393 days.setCellValue(i-1, 5, bytesToStr(row[5]))
394 days.appendShortTitle(day)
395 adate = date(cur_time.tm_year, cur_time.tm_mon, i)
396 week_day = adate.weekday()
397 if week_day == 5 or week_day == 6:
398 days.setRowCSSClass(i-1, 'iwla_weekend')
399 if adate == date.today():
400 css = days.getCellCSSClass(i-1, 0)
401 if css: css = '%s %s' % (css, 'iwla_curday')
402 else: css = 'iwla_curday'
403 days.setCellCSSClass(i-1, 0, css)
404
405 stats = self.current_analysis['month_stats']
406
407 row = [0, nb_visits, stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']]
408 if nb_days:
409 average_row = map(lambda(v): int(v/nb_days), row)
410 else:
411 average_row = map(lambda(v): 0, row)
412
413 average_row[0] = self._('Average')
414 average_row[4] = bytesToStr(average_row[4])
415 average_row[5] = bytesToStr(average_row[5])
416 days.appendRow(average_row)
417
418 row[0] = self._('Total')
419 row[4] = bytesToStr(row[4])
420 row[5] = bytesToStr(row[5])
421 days.appendRow(row)
422 page.appendBlock(days)
423 self.display.addPage(page)
424
425 def _generateDisplayMonthStats(self, page, year, month_stats):
426 cur_time = time.localtime()
427 months_name = ['', self._('Jan'), self._('Feb'), self._('Mar'), self._('Apr'), self._('May'), self._('June'), self._('Jul'), self._('Aug'), self._('Sep'), self._('Oct'), self._('Nov'), self._('Dec')]
428 title = '%s %d' % (self._('Summary'), year)
429 cols = [self._('Month'), self._('Visitors'), self._('Visits'), self._('Pages'), self._('Hits'), self._('Bandwidth'), self._('Not viewed Bandwidth'), self._('Details')]
430 graph_cols=range(1,7)
431 months = self.display.createBlock(DisplayHTMLBlockTableWithGraph, title, cols, None, 12, graph_cols)
432 months.setColsCSSClass(['', 'iwla_visitor', 'iwla_visit', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth', ''])
433 total = [0] * len(cols)
434 for i in range(1, 13):
435 month = '%s<br/>%d' % (months_name[i], year)
436 full_month = '%s %d' % (months_name[i], year)
437 if i in month_stats.keys():
438 stats = month_stats[i]
439 link = '<a href="%d/%02d/index.html">%s</a>' % (year, i, self._('Details'))
440 row = [full_month, stats['nb_visitors'], stats['nb_visits'], stats['viewed_pages'], stats['viewed_hits'],
441 stats['viewed_bandwidth'], stats['not_viewed_bandwidth'], link]
442 for j in graph_cols:
443 total[j] += row[j]
444 else:
445 row = [full_month, 0, 0, 0, 0, 0, 0, '']
446 months.appendRow(row)
447 months.setCellValue(i-1, 5, bytesToStr(row[5]))
448 months.setCellValue(i-1, 6, bytesToStr(row[6]))
449 months.appendShortTitle(month)
450 if year == cur_time.tm_year and i == cur_time.tm_mon:
451 css = months.getCellCSSClass(i-1, 0)
452 if css: css = '%s %s' % (css, 'iwla_curday')
453 else: css = 'iwla_curday'
454 months.setCellCSSClass(i-1, 0, css)
455
456 total[0] = self._('Total')
457 total[5] = bytesToStr(total[5])
458 total[6] = bytesToStr(total[6])
459 total[7] = u''
460 months.appendRow(total)
461 page.appendBlock(months)
462
463 def _generateDisplayWholeMonthStats(self):
464 title = '%s %s' % (self._('Statistics for'), conf.domain_name)
465 filename = 'index.html'
466
467 self.logger.info('==> Generate main page (%s)' % (filename))
468
469 page = self.display.createPage(title, filename, conf.css_path)
470
471 last_update = '<b>%s</b> %s<br />' % (self._('Last update'), time.strftime('%02d %b %Y %H:%M', time.localtime()))
472 page.appendBlock(self.display.createBlock(DisplayHTMLRaw, last_update))
473
474 for year in sorted(self.meta_infos['stats'].keys(), reverse=True):
475 self._generateDisplayMonthStats(page, year, self.meta_infos['stats'][year])
476
477 self.display.addPage(page)
478
479 def _compressFile(self, build_time, root, filename):
480 path = os.path.join(root, filename)
481 gz_path = path + '.gz'
482
483 self.logger.debug('Compress %s => %s' % (path, gz_path))
484
485 if not os.path.exists(gz_path) or\
486 os.stat(path).st_mtime > build_time:
487 with open(path, 'rb') as f_in, gzip.open(gz_path, 'wb') as f_out:
488 f_out.write(f_in.read())
489
490 def _compressFiles(self, build_time, root):
491 if not conf.compress_output_files: return
492 for rootdir, subdirs, files in os.walk(root, followlinks=True):
493 for f in files:
494 for ext in conf.compress_output_files:
495 if f.endswith(ext):
496 self._compressFile(build_time, rootdir, f)
497 break
498
499 def _generateDisplay(self):
500 self._generateDisplayDaysStats()
501 self._callPlugins(conf.DISPLAY_HOOK_DIRECTORY)
502 self._generateDisplayWholeMonthStats()
503 build_time = time.localtime()
504 self.display.build(conf.DISPLAY_ROOT)
505 self._compressFiles(build_time, conf.DISPLAY_ROOT)
506
507 def _createEmptyStats(self):
508 stats = {}
509 stats['viewed_bandwidth'] = 0
510 stats['not_viewed_bandwidth'] = 0
511 stats['viewed_pages'] = 0
512 stats['viewed_hits'] = 0
513 stats['nb_visits'] = 0
514
515 return stats
516
517 def _generateMonthStats(self):
518 self._clearDisplay()
519
520 visits = self.current_analysis['visits']
521
522 stats = self._createEmptyStats()
523 for (day, stat) in self.current_analysis['days_stats'].items():
524 for k in stats.keys():
525 stats[k] += stat[k]
526
527 duplicated_stats = {k:v for (k,v) in stats.items()}
528
529 cur_time = self.meta_infos['last_time']
530 self.logger.info("== Stats for %d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon))
531 self.logger.info(stats)
532
533 if not 'month_stats' in self.current_analysis.keys():
534 self.current_analysis['month_stats'] = stats
535 else:
536 for (k,v) in stats.items():
537 self.current_analysis['month_stats'][k] = v
538
539 self.valid_visitors = {}
540 for (k,v) in visits.items():
541 if v['robot']: continue
542 if not (conf.count_hit_only_visitors or\
543 v['viewed_pages']):
544 continue
545 self.valid_visitors[k] = v
546
547 duplicated_stats['nb_visitors'] = stats['nb_visitors'] = len(self.valid_visitors.keys())
548
549 self._callPlugins(conf.POST_HOOK_DIRECTORY)
550
551 path = self.getDBFilename(cur_time)
552 if os.path.exists(path):
553 os.remove(path)
554
555 self.logger.info("==> Serialize to %s" % (path))
556 self._serialize(self.current_analysis, path)
557
558 # Save month stats
559 year = cur_time.tm_year
560 month = cur_time.tm_mon
561 if not 'stats' in self.meta_infos.keys():
562 self.meta_infos['stats'] = {}
563 if not year in self.meta_infos['stats'].keys():
564 self.meta_infos['stats'][year] = {}
565 self.meta_infos['stats'][year][month] = duplicated_stats
566
567 self._generateDisplay()
568
569 def _generateDayStats(self):
570 visits = self.current_analysis['visits']
571 cur_time = self.meta_infos['last_time']
572
573 self._callPlugins(conf.PRE_HOOK_DIRECTORY)
574
575 stats = self._createEmptyStats()
576
577 for (k, super_hit) in visits.items():
578 if super_hit['last_access'].tm_mday != cur_time.tm_mday:
579 continue
580 viewed_page = False
581 for hit in super_hit['requests'][::-1]:
582 if hit['time_decoded'].tm_mday != cur_time.tm_mday:
583 break
584 if super_hit['robot'] or\
585 not self.hasBeenViewed(hit):
586 stats['not_viewed_bandwidth'] += int(hit['body_bytes_sent'])
587 continue
588 stats['viewed_bandwidth'] += int(hit['body_bytes_sent'])
589 if hit['is_page']:
590 stats['viewed_pages'] += 1
591 viewed_pages = True
592 else:
593 stats['viewed_hits'] += 1
594 if (conf.count_hit_only_visitors or\
595 viewed_pages) and\
596 not super_hit['robot']:
597 stats['nb_visits'] += 1
598
599 self.logger.info("== Stats for %d/%02d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday))
600 self.logger.info(stats)
601
602 self.current_analysis['days_stats'][cur_time.tm_mday] = stats
603
604 def _newHit(self, hit):
605 if not self.domain_name_re.match(hit['server_name']):
606 return False
607
608 t = self._decodeTime(hit)
609
610 cur_time = self.meta_infos['last_time']
611
612 if cur_time == None:
613 self.current_analysis = self._deserialize(self.getDBFilename(t)) or self._clearVisits()
614 self.analyse_started = True
615 else:
616 if time.mktime(t) <= time.mktime(cur_time):
617 return False
618 self.analyse_started = True
619 if cur_time.tm_mon != t.tm_mon:
620 self._generateMonthStats()
621 self.current_analysis = self._deserialize(self.getDBFilename(t)) or self._clearVisits()
622 elif cur_time.tm_mday != t.tm_mday:
623 self._generateDayStats()
624
625 self.meta_infos['last_time'] = t
626
627 if not self.meta_infos['start_analysis_time']:
628 self.meta_infos['start_analysis_time'] = t
629
630 if not self._decodeHTTPRequest(hit): return False
631
632 for k in hit.keys():
633 if hit[k] == '-' or hit[k] == '*':
634 hit[k] = ''
635
636 self._appendHit(hit)
637
638 return True
639
640 def start(self, _file):
641 self.logger.info('==> Load previous database')
642
643 self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta()
644 if self.meta_infos['last_time']:
645 self.logger.info('Last time')
646 self.logger.info(self.meta_infos['last_time'])
647 self.current_analysis = self._deserialize(self.getDBFilename(self.meta_infos['last_time'])) or self._clearVisits()
648 else:
649 self._clearVisits()
650
651 self.meta_infos['start_analysis_time'] = None
652
653 self.cache_plugins = preloadPlugins(self.plugins, self)
654
655 self.logger.info('==> Analysing log')
656
657 for l in _file:
658 # print "line " + l
659
660 groups = self.log_re.match(l)
661
662 if groups:
663 self._newHit(groups.groupdict())
664 else:
665 self.logger.warning("No match for %s" % (l))
666 #break
667
668 if self.analyse_started:
669 self._generateDayStats()
670 self._generateMonthStats()
671 del self.meta_infos['start_analysis_time']
672 self._serialize(self.meta_infos, conf.META_PATH)
673 else:
674 self.logger.info('==> Analyse not started : nothing new')
675
676if __name__ == '__main__':
677 parser = argparse.ArgumentParser(description='Intelligent Web Log Analyzer')
678
679 parser.add_argument('-c', '--clean-output', dest='clean_output', action='store_true',
680 default=False,
681 help='Clean output before starting')
682
683 parser.add_argument('-i', '--stdin', dest='stdin', action='store_true',
684 default=False,
685 help='Read data from stdin instead of conf.analyzed_filename')
686
687 parser.add_argument('-f', '--file', dest='file',
688 help='Analyse this log file')
689
690 parser.add_argument('-d', '--log-level', dest='loglevel',
691 default='INFO', type=str,
692 help='Loglevel in %s, default : %s' % (['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 'INFO'))
693
694 args = parser.parse_args()
695
696 if args.clean_output:
697 if os.path.exists(conf.DB_ROOT): shutil.rmtree(conf.DB_ROOT)
698 if os.path.exists(conf.DISPLAY_ROOT): shutil.rmtree(conf.DISPLAY_ROOT)
699
700 loglevel = getattr(logging, args.loglevel.upper(), None)
701 if not isinstance(loglevel, int):
702 raise ValueError('Invalid log level: %s' % (args.loglevel))
703
704 iwla = IWLA(loglevel)
705
706 required_conf = ['analyzed_filename', 'domain_name']
707 if not validConfRequirements(required_conf, iwla, 'Main Conf'):
708 sys.exit(0)
709
710 if args.stdin:
711 iwla.start(sys.stdin)
712 else:
713 filename = args.file or conf.analyzed_filename
714 if not os.path.exists(filename):
715 print 'No such file \'%s\'' % (filename)
716 sys.exit(-1)
717 with open(filename) as f:
718 iwla.start(f)

Archive Download this file

Branches

Tags