Update iwla :

* Rework arg variable management
  * Manage dry run at top level
  * 'robot' property is now None by default (allow to do analysis only once)
  * Add --disable-display option
This commit is contained in:
Gregory Soutade 2023-03-11 20:51:44 +01:00
parent 6500d98bdd
commit adc04bf753
3 changed files with 23 additions and 24 deletions

37
iwla.py
View File

@ -134,7 +134,7 @@ class IWLA(object):
API_VERSION = 1
IWLA_VERSION = '0.7'
def __init__(self, logLevel, dry_run):
def __init__(self, logLevel, args):
self.meta_infos = {}
self.analyse_started = False
self.current_analysis = {}
@ -142,7 +142,7 @@ class IWLA(object):
self.cache_plugins = {}
self.display = DisplayHTMLBuild(self)
self.valid_visitors = None
self.dry_run = dry_run
self.args = args
self.log_format_extracted = re.sub(r'([^\$?\w])', r'\\\g<1>', conf.log_format)
self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted)
@ -161,7 +161,7 @@ class IWLA(object):
logging.basicConfig(format='%(name)s %(message)s', level=logLevel)
self.logger = logging.getLogger(self.__class__.__name__)
if self.dry_run:
if self.args.dry_run:
self.logger.info('==> Start (DRY RUN)')
else:
self.logger.info('==> Start')
@ -256,7 +256,8 @@ class IWLA(object):
return gzip.open(filename, prot)
def _serialize(self, obj, filename):
if self.dry_run: return
if self.args.dry_run: return
self.logger.info("==> Serialize to %s" % (filename))
base = os.path.dirname(filename)
if not os.path.exists(base):
os.makedirs(base)
@ -318,7 +319,8 @@ class IWLA(object):
return True
def isRobot(self, hit):
return hit['robot']
# By default robot is None
return hit['robot'] == True
def _appendHit(self, hit):
remote_addr = hit['remote_addr']
@ -379,7 +381,7 @@ class IWLA(object):
super_hit['bandwidth'] = {0:0}
super_hit['last_access'] = self.meta_infos['last_time']
super_hit['requests'] = []
super_hit['robot'] = False
super_hit['robot'] = None
super_hit['hit_only'] = 0
def _normalizeURI(self, uri, removeFileSlash=False):
@ -578,7 +580,7 @@ class IWLA(object):
if not os.path.exists(gz_path) or\
os.stat(path).st_mtime > os.stat(gz_path).st_mtime:
if self.dry_run: return
if self.args.dry_run: return
with open(path, 'rb') as f_in, gzip.open(gz_path, 'wb') as f_out:
f_out.write(f_in.read())
@ -592,6 +594,8 @@ class IWLA(object):
break
def _generateDisplay(self):
if self.args.dry_run or\
self.args.disable_display: return
self._generateDisplayDaysStats()
self._callPlugins(conf.DISPLAY_HOOK_DIRECTORY)
self._generateDisplayWholeMonthStats()
@ -639,7 +643,7 @@ class IWLA(object):
self._callPlugins(conf.POST_HOOK_DIRECTORY)
if args.display_only:
if self.args.display_only:
if not 'stats' in self.meta_infos.keys():
self.meta_infos['stats'] = {}
self._generateDisplay()
@ -653,7 +657,6 @@ class IWLA(object):
path = self.getDBFilename(cur_time)
self.logger.info("==> Serialize to %s" % (path))
self._serialize(self.current_analysis, path)
# Save month stats
@ -666,7 +669,6 @@ class IWLA(object):
self.meta_infos['stats'][year][month] = duplicated_stats
meta_path = os.path.join(conf.DB_ROOT, conf.META_FILENAME)
self.logger.info("==> Serialize to %s" % (meta_path))
self._serialize(self.meta_infos, meta_path)
self._generateDisplay()
@ -766,8 +768,7 @@ class IWLA(object):
if os.path.exists(output_path): shutil.rmtree(output_path)
month += 1
def start(self, _file, args):
self.args = args
def start(self, _file):
self.start_time = datetime.now()
meta_path = os.path.join(conf.DB_ROOT, conf.META_FILENAME)
@ -876,7 +877,11 @@ if __name__ == '__main__':
parser.add_argument('-p', '--display-only', dest='display_only', action='store_true',
default=False,
help='Only generate display')
help='Only generate display (don\'t write database)')
parser.add_argument('-P', '--disable-display', dest='disable_display', action='store_true',
default=False,
help='Don\'t generate display')
parser.add_argument('-D', '--dry-run', dest='dry_run', action='store_true',
default=False,
@ -914,14 +919,14 @@ if __name__ == '__main__':
if not isinstance(loglevel, int):
raise ValueError('Invalid log level: %s' % (args.loglevel))
iwla = IWLA(loglevel, args.dry_run)
iwla = IWLA(loglevel, args)
required_conf = ['analyzed_filename', 'domain_name']
if not validConfRequirements(required_conf, iwla, 'Main Conf'):
sys.exit(0)
if args.stdin:
iwla.start(sys.stdin, args)
iwla.start(sys.stdin)
else:
filename = args.file or conf.analyzed_filename
iwla.start(FileIter(filename), args)
iwla.start(FileIter(filename))

View File

@ -80,10 +80,6 @@ Statistics deletion :
"""
class IWLAPostAnalysisFilterUsers(IPlugin):
def __init__(self, iwla):
super(IWLAPostAnalysisFilterUsers, self).__init__(iwla)
self.API_VERSION = 1
def _check_filter(self, _filter):
if len(_filter) != 3:
raise Exception('Bad filter ' + ' '.join(_filter))

View File

@ -53,10 +53,6 @@ Statistics deletion :
class IWLAPostAnalysisReverseDNS(IPlugin):
DEFAULT_DNS_TIMEOUT = 0.5
def __init__(self, iwla):
super(IWLAPostAnalysisReverseDNS, self).__init__(iwla)
self.API_VERSION = 1
def load(self):
timeout = self.iwla.getConfValue('reverse_dns_timeout',
IWLAPostAnalysisReverseDNS.DEFAULT_DNS_TIMEOUT)
@ -67,6 +63,8 @@ class IWLAPostAnalysisReverseDNS(IPlugin):
hits = self.iwla.getCurrentVisits()
for (k, hit) in hits.items():
if hit.get('dns_analysed', False): continue
# Do reverse for feed parser even if they're not
# valid visitors
if not hit.get('feed_parser', False) and\
not self.iwla.isValidVisitor(hit):
continue