iwla

iwla Commit Details

Date:2017-08-23 20:11:17 (6 years 3 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:fffab335fa4ac053365c491162748e0daaa12e70
Parents: 3b3ae1ea3e53b16f8452b199f0cabe32cbdbe2be
Message:Handle URLs with empty referer

Changes:
Mdefault_conf.py (1 diff)
Miwla.py (4 diffs)

File differences

default_conf.py
2222
2323
2424
25
25
2626
2727
2828
# Web server log format (nginx style). Default is apache log format
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
'"$request" $status $body_bytes_sent ' +\
'"$http_referer" "$http_user_agent"'
'"$http_referer?" "$http_user_agent?"'
# Time format used in log format
time_format = '%d/%b/%Y:%H:%M:%S %z'
iwla.py
143143
144144
145145
146
146
147147
148148
149149
......
374374
375375
376376
377
377
378378
379379
380
380
381381
382382
383383
......
390390
391391
392392
393
393
394394
395395
396396
......
781781
782782
783783
784
784
785785
786786
787787
self.valid_visitors = None
self.dry_run = dry_run
self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', conf.log_format)
self.log_format_extracted = re.sub(r'([^\$?\w])', r'\\\g<1>', conf.log_format)
self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted)
self.http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
self.log_re = re.compile(self.log_format_extracted)
groups = self.http_request_extracted.match(hit['request'])
if groups:
hit['extract_request'] = groups.groupdict()
hit['extract_request'] = groups.groupdict("")
uri_groups = self.uri_re.match(hit['extract_request']['http_uri'])
if uri_groups:
d = uri_groups.groupdict()
d = uri_groups.groupdict("")
hit['extract_request']['extract_uri'] = self._normalizeURI(d['extract_uri'])
if 'extract_parameters' in d.keys():
parameters = self._normalizeParameters(d['extract_parameters'])
if hit['http_referer']:
referer_groups = self.uri_re.match(hit['http_referer'])
if referer_groups:
hit['extract_referer'] = referer_groups.groupdict()
hit['extract_referer'] = referer_groups.groupdict("")
hit['extract_referer']['extract_uri'] = self._removeFinalSlashes(hit['extract_referer']['extract_uri'])
hit['extract_referer']['extract_parameters'] = self._normalizeParameters(hit['extract_referer']['extract_parameters'])
return True
groups = self.log_re.match(l)
if groups:
self._newHit(groups.groupdict())
self._newHit(groups.groupdict(""))
else:
self.logger.warning("No match for %s" % (l))
#break

Archive Download the corresponding diff file

Branches

Tags