iwla

iwla Commit Details

Date:2014-11-26 19:33:08 (6 years 8 months ago)
Author:Grégory Soutadé
Branch:dev, master
Commit:92533cc244e05caa7a608308ac81f53e540b8378
Parents: e6b31fbf8a700b04ceb1ff0725f74848977bd2f9
Message:Fix key_phrases

Changes:
Mplugins/display/referers.py (6 diffs)

File differences

plugins/display/referers.py
11
22
3
3
44
55
66
......
1313
1414
1515
16
16
17
1718
18
19
19
20
2021
21
22
2223
2324
2425
......
2829
2930
3031
31
32
3233
3334
34
35
36
37
38
39
40
41
42
43
44
35
36
37
38
39
4540
4641
47
48
42
43
44
45
46
4947
5048
51
49
5250
5351
5452
5553
56
57
58
59
60
61
62
63
64
65
66
67
68
69
54
7055
7156
7257
......
7661
7762
7863
79
80
64
65
66
8167
8268
8369
8470
71
8572
8673
8774
......
9986
10087
10188
102
103
104
89
90
91
92
93
10594
10695
10796
108
97
10998
11099
111
112
113
114
100
115101
116102
117
118103
119104
120105
......
218203
219204
220205
206
207
208
209
import time
import re
import HTMLParser
import xml.sax.saxutils as saxutils
from iwla import IWLA
from iplugin import IPlugin
super(IWLADisplayReferers, self).__init__(iwla)
self.API_VERSION = 1
def _getSearchEngine(self, engine):
def _getSearchEngine(self, hashid):
#print 'Look for %s' % engine
for (k, e) in self.search_engines.items():
for hashid in e['hashid']:
if hashid.match(engine):
for (h,h_re) in e['hashid']:
if hashid == h:
return k
print 'Not found %s' % (engine)
#print 'Not found %s' % (hashid)
return None
def load(self):
print 'domain_name required in conf'
return False
self.own_domain_re = re.compile('.*%s.*' % (domain_name))
self.own_domain_re = re.compile(r'.*%s.*' % (domain_name))
self.search_engines = {}
for (engine, known_url) in awstats_data.search_engines_knwown_url.items():
self.search_engines[engine] = {
'known_url' : re.compile(known_url + '(?P<key_phrase>.+)'),
'hashid' : []
}
for (hashid, engine) in awstats_data.search_engines_hashid.items():
hashid_re = re.compile('.*%s.*' % (hashid))
if not engine in self.search_engines.keys():
self.search_engines[engine] = {
'hashid' : [hashid_re]
for (hashid, name) in awstats_data.search_engines_hashid.items():
hashid_re = re.compile(r'.*%s.*' % (hashid))
if not name in self.search_engines.keys():
self.search_engines[name] = {
'hashid' : [(hashid, hashid_re)]
}
else:
self.search_engines[engine]['hashid'].append(hashid_re)
print 'Hashid %s => %s' % (engine, hashid)
self.search_engines[name]['hashid'].append((hashid, hashid_re))
#print 'Hashid %s => %s' % (name, hashid)
for (name, known_url) in awstats_data.search_engines_knwown_url.items():
self.search_engines[name]['known_url'] = re.compile(known_url + '(?P<key_phrase>.+)')
for (engine, not_engine) in awstats_data.not_search_engines_keys.items():
not_engine_re = re.compile('.*%s.*' % (not_engine))
not_engine_re = re.compile(r'.*%s.*' % (not_engine))
key = self._getSearchEngine(engine)
if key:
self.search_engines[key]['not_search_engine'] = not_engine_re
for engine in awstats_data.search_engines:
engine_re = re.compile('.*%s.*' % (engine), re.IGNORECASE)
key = self._getSearchEngine(engine)
if key:
self.search_engines[key]['re'] = not_engine_re
for (k,e) in self.search_engines.items():
if not 're' in e.keys():
print 'Remove %s' % k
del self.search_engines[k]
print self.search_engines
self.html_parser = HTMLParser.HTMLParser()
#self.html_parser = html.parser.HTMLParser()
return True
for p in parameters.split('&'):
groups = key_phrase_re.match(p)
if groups:
print groups.groupddict()
key_phrase = self.html_parser.unescape(groups.groupddict()['key_phrase']).lower()
key_phrase = groups.groupdict()['key_phrase']
key_phrase = key_phrase.replace('+', ' ').lower()
key_phrase = saxutils.unescape(key_phrase)
if not key_phrase in key_phrases.keys():
key_phrases[key_phrase] = 1
else:
key_phrases[key_phrase] += 1
break
def hook(self, iwla):
stats = iwla.getCurrentVisists()
if self.own_domain_re.match(uri): continue
for e in self.search_engines.values():
if e['re'].match(uri):
not_engine = e.get('not_search_engine', None)
for (name, engine) in self.search_engines.items():
for (hashid, hashid_re) in engine['hashid']:
if not hashid_re.match(uri): continue
not_engine = engine.get('not_search_engine', None)
# Try not engine
if not_engine and not_engine.match(uri): break
is_search_engine = True
uri = e['name']
uri = name
parameters = r['extract_referer'].get('extract_parameters', None)
key_phrase_re = e.get('known_url', None)
# print parameters
# print key_phrase_re
key_phrase_re = engine.get('known_url', None)
self._extractKeyPhrase(key_phrase_re, parameters, key_phrases)
break
if is_search_engine:
page.appendBlock(table)
display.addPage(page)
block = DisplayHTMLRawBlock()
block.setRawHTML('<a href=\'%s\'>All key phrases</a>' % (filename))
index.appendBlock(block)

Archive Download the corresponding diff file

Branches

Tags