1 | # -*- coding: utf-8 -*-␊ |
2 | #␊ |
3 | # Copyright Grégory Soutadé 2015␊ |
4 | ␊ |
5 | # This file is part of iwla␊ |
6 | ␊ |
7 | # iwla is free software: you can redistribute it and/or modify␊ |
8 | # it under the terms of the GNU General Public License as published by␊ |
9 | # the Free Software Foundation, either version 3 of the License, or␊ |
10 | # (at your option) any later version.␊ |
11 | #␊ |
12 | # iwla is distributed in the hope that it will be useful,␊ |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of␊ |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the␊ |
15 | # GNU General Public License for more details.␊ |
16 | #␊ |
17 | # You should have received a copy of the GNU General Public License␊ |
18 | # along with iwla. If not, see <http://www.gnu.org/licenses/>.␊ |
19 | #␊ |
20 | ␊ |
21 | import re␊ |
22 | import logging␊ |
23 | ␊ |
24 | from iwla import IWLA␊ |
25 | from iplugin import IPlugin␊ |
26 | ␊ |
27 | """␊ |
28 | Pre analysis hook␊ |
29 | Change page into hit and hit into page into statistics␊ |
30 | ␊ |
31 | Plugin requirements :␊ |
32 | None␊ |
33 | ␊ |
34 | Conf values needed :␊ |
35 | page_to_hit_conf*␊ |
36 | hit_to_page_conf*␊ |
37 | ␊ |
38 | Output files :␊ |
39 | None␊ |
40 | ␊ |
41 | Statistics creation :␊ |
42 | None␊ |
43 | ␊ |
44 | Statistics update :␊ |
45 | visits :␊ |
46 | remote_addr =>␊ |
47 | is_page␊ |
48 | ␊ |
49 | Statistics deletion :␊ |
50 | None␊ |
51 | """␊ |
52 | ␊ |
53 | class IWLAPreAnalysisPageToHit(IPlugin):␊ |
54 | ␊ |
55 | def __init__(self, iwla):␊ |
56 | super(IWLAPreAnalysisPageToHit, self).__init__(iwla)␊ |
57 | self.API_VERSION = 1␊ |
58 | ␊ |
59 | def load(self):␊ |
60 | # Page to hit␊ |
61 | self.ph_regexps = self.iwla.getConfValue('page_to_hit_conf', [])␊ |
62 | self.ph_regexps = list(map(lambda r: re.compile(r), self.ph_regexps))␊ |
63 | ␊ |
64 | # Hit to page␊ |
65 | self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', [])␊ |
66 | self.hp_regexps = list(map(lambda r: re.compile(r), self.hp_regexps))␊ |
67 | ␊ |
68 | self.logger = logging.getLogger(self.__class__.__name__)␊ |
69 | return True␊ |
70 | ␊ |
71 | def hook(self):␊ |
72 | hits = self.iwla.getCurrentVisits()␊ |
73 | ␊ |
74 | for (k, super_hit) in hits.items():␊ |
75 | if super_hit['robot']: continue␊ |
76 | ␊ |
77 | for request in super_hit['requests'][::-1]:␊ |
78 | if not self.iwla.isValidForCurrentAnalysis(request):␊ |
79 | break␊ |
80 | ␊ |
81 | if not self.iwla.hasBeenViewed(request):␊ |
82 | continue␊ |
83 | ␊ |
84 | uri = request['extract_request']['extract_uri']␊ |
85 | ␊ |
86 | day = request['time_decoded'].tm_mday␊ |
87 | if request['is_page']:␊ |
88 | # Page to hit␊ |
89 | for regexp in self.ph_regexps:␊ |
90 | if regexp.match(uri):␊ |
91 | self.logger.debug('%s changed from page to hit' % (uri))␊ |
92 | request['is_page'] = False␊ |
93 | super_hit['viewed_pages'][day] -= 1␊ |
94 | super_hit['viewed_hits'][day] = super_hit['viewed_hits'].get(day, 0) + 1␊ |
95 | super_hit['viewed_pages'][0] -= 1␊ |
96 | super_hit['viewed_hits'][0] += 1␊ |
97 | break␊ |
98 | else:␊ |
99 | # Hit to page␊ |
100 | for regexp in self.hp_regexps:␊ |
101 | if regexp.match(uri):␊ |
102 | self.logger.debug('%s changed from hit to page' % (uri))␊ |
103 | request['is_page'] = True␊ |
104 | super_hit['viewed_pages'][day] = super_hit['viewed_pages'].get(day, 0) + 1␊ |
105 | super_hit['viewed_hits'][day] -= 1␊ |
106 | super_hit['viewed_pages'][0] += 1␊ |
107 | super_hit['viewed_hits'][0] -= 1␊ |
108 | ␊ |
109 | break␊ |