Merge branch 'dev' of soutade.fr:iwla into dev

This commit is contained in:
Gregory Soutade 2016-08-20 13:15:43 +02:00
commit 4759207f7c
6 changed files with 37 additions and 6 deletions

View File

@ -1,8 +1,11 @@
v0.4 (02/05/2016)
v0.4 (20/08/2016)
** User **
Remove crwalers from feed parsers
Add display only switch (-p)
** Dev **
** Bugs **
for robots, we have to use not_viewed_pages (feeds plugin)
gz files were not generated due to bad time comparison
v0.3 (12/04/2016)
** User **

View File

@ -34,7 +34,8 @@ display_hooks = []
# Extensions that are considered as a HTML page (or result) in opposite to hits
pages_extensions = ['/', 'htm', 'html', 'xhtml', 'py', 'pl', 'rb', 'php']
# HTTP codes that are cosidered OK
# HTTP codes that are considered OK
viewed_http_codes = [200, 304]
# If False, doesn't cout visitors that doesn't GET a page but resources only (images, rss...)

13
iwla.py
View File

@ -565,7 +565,7 @@ class IWLA(object):
self._generateDisplayDaysStats()
self._callPlugins(conf.DISPLAY_HOOK_DIRECTORY)
self._generateDisplayWholeMonthStats()
build_time = time.localtime()
build_time = time.mktime(time.localtime())
self.display.build(conf.DISPLAY_ROOT)
self._compressFiles(build_time, conf.DISPLAY_ROOT)
@ -608,6 +608,10 @@ class IWLA(object):
duplicated_stats['nb_visitors'] = stats['nb_visitors'] = len(self.valid_visitors.keys())
if args.display_only:
self._generateDisplay()
return
self._callPlugins(conf.POST_HOOK_DIRECTORY)
path = self.getDBFilename(cur_time)
@ -632,6 +636,9 @@ class IWLA(object):
self._generateDisplay()
def _generateDayStats(self):
if args.display_only:
return
visits = self.current_analysis['visits']
cur_time = self.meta_infos['last_time']
@ -835,6 +842,10 @@ if __name__ == '__main__':
default=False,
help='Don\'t compress databases (bigger but faster, not compatible with compressed databases)')
parser.add_argument('-p', '--display-only', dest='display_only', action='store_true',
default=False,
help='Only generate display')
args = parser.parse_args()
# Load user conf

View File

@ -76,7 +76,9 @@ class IWLADisplayFeeds(IPlugin):
table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'All feeds parsers'), [self.iwla._(u'Host'), self.iwla._(u'Pages'), self.iwla._(u'Hits')])
table.setColsCSSClass(['', 'iwla_page', 'iwla_hit'])
for super_hit in hits.values():
if not super_hit['feed_parser']: continue
if not super_hit.get('feed_parser', False): continue
if super_hit['feed_parser'] == IWLAPostAnalysisFeeds.BAD_FEED_PARSER:
continue
nb_feeds_parsers += 1
address = super_hit['remote_addr']
if display_visitor_ip and\

View File

@ -55,6 +55,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
NOT_A_FEED_PARSER = 0
FEED_PARSER = 1
MERGED_FEED_PARSER = 2
BAD_FEED_PARSER = 3
def __init__(self, iwla):
super(IWLAPostAnalysisFeeds, self).__init__(iwla)
@ -71,6 +72,9 @@ class IWLAPostAnalysisFeeds(IPlugin):
for f in feeds:
self.feeds_re.append(re.compile(r'.*%s.*' % (f)))
self.bad_feeds_re = []
self.bad_feeds_re.append(re.compile(r'.*crawl.*'))
return True
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
@ -94,7 +98,17 @@ class IWLAPostAnalysisFeeds(IPlugin):
self.merge_one_hit_only_feeds_parsers:
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
if not isFeedParser is None: continue
if isFeedParser:
if hit['feed_parser'] == self.BAD_FEED_PARSER: continue
if not hit.get('feed_name_analysed', False) and\
hit.get('dns_name_replaced', False):
hit['feed_name_analysed'] = True
addr = hit.get('remote_addr', None)
for r in self.bad_feeds_re:
if r.match(addr):
hit['feed_parser'] = self.BAD_FEED_PARSER
return
return
isFeedParser = self.NOT_A_FEED_PARSER
uri = hit['requests'][0]['extract_request']['extract_uri'].lower()

View File

@ -67,7 +67,7 @@ class IWLAPostAnalysisReverseDNS(IPlugin):
hits = self.iwla.getCurrentVisits()
for (k, hit) in hits.items():
if hit.get('dns_analysed', False): continue
if not hit['feed_parser'] and\
if not hit.get('feed_parser', False) and\
not self.iwla.isValidVisitor(hit):
continue
try: