iwla

iwla Commit Details

Date:2022-06-23 21:11:43 (5 months 7 days ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:d36676ca38ac87528701641c23ee570664809b96
Parents: 37a33f1291fa3a828f3dd9f8b1a33ebb5a0269ee
Message:Normalize URL before counting in stats

Changes:
Miwla.py (3 diffs)

File differences

iwla.py
149149
150150
151151
152
152
153
153154
154155
155156
......
372373
373374
374375
375
376
376377
377
378
379
380
381
382
383
378384
379385
380
381
382
383
384386
385387
386388
......
409411
410412
411413
412
414
413415
414416
415417
self.log_re = re.compile(self.log_format_extracted)
self.uri_re = re.compile(r'(?P<extract_uri>[^\?#]+)(\?(?P<extract_parameters>[^#]+))?(#.*)?')
self.domain_name_re = re.compile(r'.*%s' % conf.domain_name)
self.final_slashes_re = re.compile(r'/+$')
self.slash_re = re.compile(r'//')
self.protocol_re = re.compile(r'^.*://')
self.excluded_ip = []
for ip in conf.excluded_ip:
self.excluded_ip += [re.compile(ip)]
super_hit['robot'] = False
super_hit['hit_only'] = 0
def _normalizeURI(self, uri):
def _normalizeURI(self, uri, removeFileSlash=False):
if uri == '/': return uri
uri = self.final_slashes_re.sub('/', uri)
# Remove protocol
uri = self.protocol_re.sub('', uri)
# Remove double /
uri = self.slash_re.sub('/', uri)
if removeFileSlash and uri[-1] == '/':
uri = uri[:-1]
return uri
def _removeFinalSlashes(self, uri):
if uri == '/': return uri
return self.final_slashes_re.sub('', uri)
def _normalizeParameters(self, parameters):
# No parameters
if parameters == '?': return None
referer_groups = self.uri_re.match(hit['http_referer'])
if referer_groups:
hit['extract_referer'] = referer_groups.groupdict("")
hit['extract_referer']['extract_uri'] = self._removeFinalSlashes(hit['extract_referer']['extract_uri'])
hit['extract_referer']['extract_uri'] = self._normalizeURI(hit['extract_referer']['extract_uri'], True)
hit['extract_referer']['extract_parameters'] = self._normalizeParameters(hit['extract_referer']['extract_parameters'])
return True

Archive Download the corresponding diff file

Branches

Tags