import time␊ |
import re␊ |
import HTMLParser␊ |
import xml.sax.saxutils as saxutils␊ |
␊ |
from iwla import IWLA␊ |
from iplugin import IPlugin␊ |
|
super(IWLADisplayReferers, self).__init__(iwla)␊ |
self.API_VERSION = 1␊ |
␊ |
def _getSearchEngine(self, engine):␊ |
def _getSearchEngine(self, hashid):␊ |
#print 'Look for %s' % engine␊ |
for (k, e) in self.search_engines.items():␊ |
for hashid in e['hashid']:␊ |
if hashid.match(engine):␊ |
for (h,h_re) in e['hashid']:␊ |
if hashid == h:␊ |
return k␊ |
print 'Not found %s' % (engine)␊ |
#print 'Not found %s' % (hashid)␊ |
return None␊ |
␊ |
def load(self):␊ |
|
print 'domain_name required in conf'␊ |
return False␊ |
␊ |
self.own_domain_re = re.compile('.*%s.*' % (domain_name))␊ |
self.own_domain_re = re.compile(r'.*%s.*' % (domain_name))␊ |
self.search_engines = {}␊ |
␊ |
for (engine, known_url) in awstats_data.search_engines_knwown_url.items():␊ |
self.search_engines[engine] = {␊ |
'known_url' : re.compile(known_url + '(?P<key_phrase>.+)'),␊ |
'hashid' : []␊ |
}␊ |
␊ |
for (hashid, engine) in awstats_data.search_engines_hashid.items():␊ |
hashid_re = re.compile('.*%s.*' % (hashid))␊ |
if not engine in self.search_engines.keys():␊ |
self.search_engines[engine] = {␊ |
'hashid' : [hashid_re]␊ |
for (hashid, name) in awstats_data.search_engines_hashid.items():␊ |
hashid_re = re.compile(r'.*%s.*' % (hashid))␊ |
if not name in self.search_engines.keys():␊ |
self.search_engines[name] = {␊ |
'hashid' : [(hashid, hashid_re)]␊ |
}␊ |
else:␊ |
self.search_engines[engine]['hashid'].append(hashid_re)␊ |
print 'Hashid %s => %s' % (engine, hashid)␊ |
self.search_engines[name]['hashid'].append((hashid, hashid_re))␊ |
#print 'Hashid %s => %s' % (name, hashid)␊ |
␊ |
for (name, known_url) in awstats_data.search_engines_knwown_url.items():␊ |
self.search_engines[name]['known_url'] = re.compile(known_url + '(?P<key_phrase>.+)')␊ |
␊ |
for (engine, not_engine) in awstats_data.not_search_engines_keys.items():␊ |
not_engine_re = re.compile('.*%s.*' % (not_engine))␊ |
not_engine_re = re.compile(r'.*%s.*' % (not_engine))␊ |
key = self._getSearchEngine(engine)␊ |
if key:␊ |
self.search_engines[key]['not_search_engine'] = not_engine_re␊ |
␊ |
for engine in awstats_data.search_engines:␊ |
engine_re = re.compile('.*%s.*' % (engine), re.IGNORECASE)␊ |
key = self._getSearchEngine(engine)␊ |
if key:␊ |
self.search_engines[key]['re'] = not_engine_re␊ |
␊ |
for (k,e) in self.search_engines.items():␊ |
if not 're' in e.keys():␊ |
print 'Remove %s' % k␊ |
del self.search_engines[k]␊ |
␊ |
print self.search_engines␊ |
␊ |
self.html_parser = HTMLParser.HTMLParser()␊ |
#self.html_parser = html.parser.HTMLParser()␊ |
␊ |
return True␊ |
␊ |
|
for p in parameters.split('&'):␊ |
groups = key_phrase_re.match(p)␊ |
if groups:␊ |
print groups.groupddict()␊ |
key_phrase = self.html_parser.unescape(groups.groupddict()['key_phrase']).lower()␊ |
key_phrase = groups.groupdict()['key_phrase']␊ |
key_phrase = key_phrase.replace('+', ' ').lower()␊ |
key_phrase = saxutils.unescape(key_phrase)␊ |
if not key_phrase in key_phrases.keys():␊ |
key_phrases[key_phrase] = 1␊ |
else:␊ |
key_phrases[key_phrase] += 1␊ |
break␊ |
␊ |
def hook(self, iwla):␊ |
stats = iwla.getCurrentVisists()␊ |
|
␊ |
if self.own_domain_re.match(uri): continue␊ |
␊ |
for e in self.search_engines.values():␊ |
if e['re'].match(uri):␊ |
not_engine = e.get('not_search_engine', None)␊ |
for (name, engine) in self.search_engines.items():␊ |
for (hashid, hashid_re) in engine['hashid']:␊ |
if not hashid_re.match(uri): continue␊ |
␊ |
not_engine = engine.get('not_search_engine', None)␊ |
# Try not engine␊ |
if not_engine and not_engine.match(uri): break␊ |
is_search_engine = True␊ |
uri = e['name']␊ |
uri = name␊ |
␊ |
parameters = r['extract_referer'].get('extract_parameters', None)␊ |
key_phrase_re = e.get('known_url', None)␊ |
␊ |
# print parameters␊ |
# print key_phrase_re␊ |
key_phrase_re = engine.get('known_url', None)␊ |
␊ |
self._extractKeyPhrase(key_phrase_re, parameters, key_phrases)␊ |
␊ |
break␊ |
␊ |
if is_search_engine:␊ |
|
page.appendBlock(table)␊ |
␊ |
display.addPage(page)␊ |
␊ |
block = DisplayHTMLRawBlock()␊ |
block.setRawHTML('<a href=\'%s\'>All key phrases</a>' % (filename))␊ |
index.appendBlock(block)␊ |