Fix unicode problems

Add generateHTMLLink()
This commit is contained in:
Grégory Soutadé 2014-12-04 21:04:41 +01:00
parent 5f72a9c912
commit 2362fd1fd2
7 changed files with 98 additions and 91 deletions

View File

@ -1,8 +1,9 @@
import os
import codecs
class DisplayHTMLRaw(object):
def __init__(self, html=''):
def __init__(self, html=u''):
self.html = html
def setRawHTML(self, html):
@ -23,31 +24,31 @@ class DisplayHTMLBlock(DisplayHTMLRaw):
def __init__(self, title=''):
super(DisplayHTMLBlock, self).__init__(html='')
self.title = title
self.cssclass = 'iwla_block'
self.title_cssclass = 'iwla_block_title'
self.value_cssclass = 'iwla_block_value'
self.cssclass = u'iwla_block'
self.title_cssclass = u'iwla_block_title'
self.value_cssclass = u'iwla_block_value'
def getTitle(self):
return self.title
def setTitle(self, value):
self.title = value
self.title = unicode(value)
def setCSSClass(self, cssclass):
self.cssclass = cssclass
self.cssclass = unicode(cssclass)
def setTitleCSSClass(self, cssclass):
self.title_cssclass = cssclass
self.title_cssclass = unicode(cssclass)
def setValueCSSClass(self, cssclass):
self.value_cssclass = cssclass
self.value_cssclass = unicode(cssclass)
def _buildHTML(self):
html = '<div class="%s">' % (self.cssclass)
html = u'<div class="%s">' % (self.cssclass)
if self.title:
html += '<div class="%s">%s</div>' % (self.title_cssclass, self.title)
html += '<div class="%s">%s</div>' % (self.value_cssclass, self.html)
html += '</div>'
html += u'<div class="%s">%s</div>' % (self.title_cssclass, self.title)
html += u'<div class="%s">%s</div>' % (self.value_cssclass, self.html)
html += u'</div>'
self.html = html
@ -55,15 +56,15 @@ class DisplayHTMLBlockTable(DisplayHTMLBlock):
def __init__(self, title, cols):
super(DisplayHTMLBlockTable, self).__init__(title=title)
self.cols = cols
self.cols = listToStr(cols)
self.rows = []
self.cols_cssclasses = [''] * len(cols)
self.cols_cssclasses = [u''] * len(cols)
self.rows_cssclasses = []
self.table_css = 'iwla_table'
self.table_css = u'iwla_table'
def appendRow(self, row):
self.rows.append(listToStr(row))
self.rows_cssclasses.append([''] * len(row))
self.rows_cssclasses.append([u''] * len(row))
def getCellValue(self, row, col):
if row < 0 or col < 0 or\
@ -77,14 +78,14 @@ class DisplayHTMLBlockTable(DisplayHTMLBlock):
row >= len(self.rows) or col >= len(self.cols):
raise ValueError('Invalid indices %d,%d' % (row, col))
self.rows[row][col] = value
self.rows[row][col] = unicode(value)
def setCellCSSClass(self, row, col, value):
if row < 0 or col < 0 or\
row >= len(self.rows) or col >= len(self.cols):
raise ValueError('Invalid indices %d,%d' % (row, col))
self.rows_cssclasses[row][col] = value
self.rows_cssclasses[row][col] = unicode(value)
def getCellCSSClass(self, row, col):
if row < 0 or col < 0 or\
@ -103,42 +104,42 @@ class DisplayHTMLBlockTable(DisplayHTMLBlock):
if row < 0 or row >= len(self.rows):
raise ValueError('Invalid indice %d' % (row))
self.rows_cssclasses[row] = [value] * len(self.rows_cssclasses[row])
self.rows_cssclasses[row] = [unicode(value)] * len(self.rows_cssclasses[row])
def setColCSSClass(self, col, value):
if col < 0 or col >= len(self.cols):
raise ValueError('Invalid indice %d' % (col))
self.cols_cssclasses[col] = value
self.cols_cssclasses[col] = unicode(value)
def setColsCSSClass(self, values):
if len(values) != len(self.cols):
raise ValueError('Invalid values size')
self.cols_cssclasses = values
self.cols_cssclasses = [unicode(values)] * len(self.cols)
def _buildHTML(self):
style = ''
if self.table_css: style = ' class="%s"' % (self.table_css)
html = '<table%s>' % (style)
style = u''
if self.table_css: style = u' class="%s"' % (self.table_css)
html = u'<table%s>' % (style)
if self.cols:
html += '<tr>'
html += u'<tr>'
for i in range (0, len(self.cols)):
title = self.cols[i]
style = self.getColCSSClass(i)
if style: style = ' class="%s"' % (style)
html += '<th%s>%s</th>' % (style, title)
html += '</tr>'
if style: style = u' class="%s"' % (style)
html += u'<th%s>%s</th>' % (style, title)
html += u'</tr>'
for i in range(0, len(self.rows)):
row = self.rows[i]
html += '<tr>'
html += u'<tr>'
for j in range(0, len(row)):
v = row[j]
style = self.getCellCSSClass(i, j)
if style: style = ' class="%s"' % (style)
html += '<td%s>%s</td>' % (style, v)
html += '</tr>'
html += '</table>'
if style: style = u' class="%s"' % (style)
html += u'<td%s>%s</td>' % (style, v)
html += u'</tr>'
html += u'</table>'
self.html += html
@ -149,14 +150,15 @@ class DisplayHTMLBlockTableWithGraph(DisplayHTMLBlockTable):
def __init__(self, title, cols, short_titles=None, nb_valid_rows=0, graph_cols=None):
super(DisplayHTMLBlockTableWithGraph, self).__init__(title=title, cols=cols)
self.short_titles = short_titles or []
self.short_titles = listToStr(self.short_titles)
self.nb_valid_rows = nb_valid_rows
# TOFIX
self.icon_path = 'resources/icon'
self.icon_path = u'resources/icon'
# self.icon_path = self.iwla.getConfValue('icon_path', '/')
self.raw_rows = []
self.maxes = [0] * len(cols)
self.table_graph_css = 'iwla_graph_table'
self.td_img_css = 'iwla_td_img'
self.table_graph_css = u'iwla_graph_table'
self.td_img_css = u'iwla_td_img'
self.graph_cols = graph_cols or []
def appendRow(self, row):
@ -164,10 +166,10 @@ class DisplayHTMLBlockTableWithGraph(DisplayHTMLBlockTable):
super(DisplayHTMLBlockTableWithGraph, self).appendRow(row)
def appendShortTitle(self, short_title):
self.short_titles.append(short_title)
self.short_titles.append(unicode(short_title))
def setShortTitle(self, short_titles):
self.short_titles = short_titles
self.short_titles = listToStr(short_titles)
def setNbValidRows(self, nb_valid_rows):
self.nb_valid_rows = nb_valid_rows
@ -180,47 +182,47 @@ class DisplayHTMLBlockTableWithGraph(DisplayHTMLBlockTable):
self.maxes[j] = row[j]
def _getIconFromStyle(self, style):
if style.startswith('iwla_page'): icon = 'vp.png'
elif style.startswith('iwla_hit'): icon = 'vh.png'
elif style.startswith('iwla_bandwidth'): icon = 'vk.png'
elif style.startswith('iwla_visit'): icon = 'vv.png'
elif style.startswith('iwla_search'): icon = 'vu.png'
if style.startswith(u'iwla_page'): icon = u'vp.png'
elif style.startswith(u'iwla_hit'): icon = u'vh.png'
elif style.startswith(u'iwla_bandwidth'): icon = u'vk.png'
elif style.startswith(u'iwla_visitor'): icon = u'vu.png'
elif style.startswith(u'iwla_visit'): icon = u'vv.png'
else: return ''
return '%s/%s' % (self.icon_path, icon)
return u'%s/%s' % (self.icon_path, icon)
def _buildHTML(self):
self._computeMax()
style = ''
if self.table_graph_css: style = ' class="%s"' % (self.table_graph_css)
html = '<table%s>' % (style)
html += '<tr>'
style = u''
if self.table_graph_css: style = u' class="%s"' % (self.table_graph_css)
html = u'<table%s>' % (style)
html += u'<tr>'
for i in range(0, self.nb_valid_rows):
row = self.rows[i]
css = ''
if self.td_img_css: css=' class="%s"' % (self.td_img_css)
html += '<td%s>' % (css)
css = u''
if self.td_img_css: css=u' class="%s"' % (self.td_img_css)
html += u'<td%s>' % (css)
for j in self.graph_cols:
style = self.getColCSSClass(j)
icon = self._getIconFromStyle(style)
if not icon: continue
if style: style = ' class="%s"' % (style)
alt = '%s: %s' % (row[j], self.cols[j])
if style: style = u' class="%s"' % (style)
alt = u'%s: %s' % (row[j], self.cols[j])
if self.maxes[j]:
height = int((self.raw_rows[i][j] * 100) / self.maxes[j]) or 1
else:
height = 1
html += '<img%s src="%s" height="%d" width="6" alt="%s" title="%s" />' % (style, icon, height, alt, alt)
html += '</td>'
html += '</tr>'
html += '<tr>'
html += u'<img%s src="%s" height="%d" width="6" alt="%s" title="%s" />' % (style, icon, height, alt, alt)
html += u'</td>'
html += u'</tr>'
html += u'<tr>'
for i in range(0, len(self.short_titles)):
style = self.getCellCSSClass(i, 0)
if style: style = ' class="%s"' % (style)
html += '<td%s>%s</td>' % (style, self.short_titles[i])
html += '</tr>'
html += '</table>'
if style: style = u' class="%s"' % (style)
html += u'<td%s>%s</td>' % (style, self.short_titles[i])
html += u'</tr>'
html += u'</table>'
self.html += html
@ -229,10 +231,10 @@ class DisplayHTMLBlockTableWithGraph(DisplayHTMLBlockTable):
class DisplayHTMLPage(object):
def __init__(self, title, filename, css_path):
self.title = title
self.title = unicode(title)
self.filename = filename
self.blocks = []
self.css_path = css_path
self.css_path = listToStr(css_path)
def getFilename(self):
return self.filename;
@ -253,19 +255,19 @@ class DisplayHTMLPage(object):
if not os.path.exists(base):
os.makedirs(base)
f = open(filename, 'w')
f.write('<!DOCTYPE html>')
f.write('<html>')
f.write('<head>')
f.write('<meta http-equiv="Content-type" content="text/html; charset=UTF-8" />')
f = codecs.open(filename, 'w', 'utf-8')
f.write(u'<!DOCTYPE html>')
f.write(u'<html>')
f.write(u'<head>')
f.write(u'<meta http-equiv="Content-type" content="text/html; charset=UTF-8" />')
for css in self.css_path:
f.write('<link rel="stylesheet" href="%s"/>' % (css))
f.write(u'<link rel="stylesheet" href="%s"/>' % (css))
if self.title:
f.write('<title>%s</title>' % (self.title))
f.write('</head>')
f.write(u'<title>%s</title>' % (self.title))
f.write(u'</head>')
for block in self.blocks:
block.build(f)
f.write('</body></html>')
f.write(u'</body></html>')
f.close()
class DisplayHTMLBuild(object):
@ -295,19 +297,25 @@ class DisplayHTMLBuild(object):
page.build(root)
def bytesToStr(bytes):
suffixes = ['', ' kB', ' MB', ' GB', ' TB']
suffixes = [u'', u' kB', u' MB', u' GB', u' TB']
for i in range(0, len(suffixes)):
if bytes < 1024: break
bytes /= 1024.0
if i:
return '%.02f%s' % (bytes, suffixes[i])
return u'%.02f%s' % (bytes, suffixes[i])
else:
return '%d%s' % (bytes, suffixes[i])
return u'%d%s' % (bytes, suffixes[i])
def _toStr(v):
if type(v) != str: return str(v)
if type(v) != unicode: return unicode(v)
else: return v
def listToStr(l): return map(lambda(v) : _toStr(v), l)
def generateHTMLLink(url, name=None, max_length=100, prefix=u'http'):
url = unicode(url)
if not name: name = unicode(url)
if not url.startswith(prefix): url = u'%s://%s' % (prefix, url)
return u'<a href="%s">%s</a>' % (url, name[:max_length])

View File

@ -301,7 +301,7 @@ class IWLA(object):
cols = ['Month', 'Visitors', 'Pages', 'Hits', 'Bandwidth', 'Not viewed Bandwidth', 'Details']
graph_cols=range(1,6)
months = DisplayHTMLBlockTableWithGraph(title, cols, nb_valid_rows=12, graph_cols=graph_cols)
months.setColsCSSClass(['', 'iwla_visit', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth', ''])
months.setColsCSSClass(['', 'iwla_visitor', 'iwla_page', 'iwla_hit', 'iwla_bandwidth', 'iwla_bandwidth', ''])
total = [0] * len(cols)
for i in range(1, 13):
month = '%s<br/>%d' % (months_name[i], year)

View File

@ -49,12 +49,12 @@ class IWLADisplayReferers(IPlugin):
table.appendRow(['<b>External URL</b>', '', ''])
for r,_ in top_referers:
row = [r, referers[r]['pages'], referers[r]['hits']]
row = [generateHTMLLink(r), referers[r]['pages'], referers[r]['hits']]
table.appendRow(row)
table.appendRow(['<b>External URL (robot)</b>', '', ''])
for r,_ in top_robots_referers:
row = [r, robots_referers[r]['pages'], robots_referers[r]['hits']]
row = [generateHTMLLink(r), robots_referers[r]['pages'], robots_referers[r]['hits']]
table.appendRow(row)
page.appendBlock(table)
@ -77,12 +77,12 @@ class IWLADisplayReferers(IPlugin):
table.appendRow(['<b>External URL</b>', '', ''])
for r,_ in top_referers[:10]:
row = [r, referers[r]['pages'], referers[r]['hits']]
row = [generateHTMLLink(r), referers[r]['pages'], referers[r]['hits']]
table.appendRow(row)
table.appendRow(['<b>External URL (robot)</b>', '', ''])
for r,_ in top_robots_referers[:10]:
row = [r, robots_referers[r]['pages'], robots_referers[r]['hits']]
row = [generateHTMLLink(r), robots_referers[r]['pages'], robots_referers[r]['hits']]
table.appendRow(row)
index.appendBlock(table)
@ -99,7 +99,7 @@ class IWLADisplayReferers(IPlugin):
for phrase in top_key_phrases:
table.appendRow([phrase[0], phrase[1]])
page.appendBlock(table)
display.addPage(page)
link = '<a href=\'%s\'>All key phrases</a>' % (filename)

View File

@ -23,7 +23,7 @@ class IWLADisplayTopDownloads(IPlugin):
table = DisplayHTMLBlockTable('All Downloads', ['URI', 'Hit'])
table.setColsCSSClass(['', 'iwla_hit'])
for (uri, entrance) in top_downloads:
table.appendRow([uri, entrance])
table.appendRow([generateHTMLLink(uri), entrance])
page.appendBlock(table)
self.iwla.getDisplay().addPage(page)
@ -37,5 +37,5 @@ class IWLADisplayTopDownloads(IPlugin):
table = DisplayHTMLBlockTable(title, ['URI', 'Hits'])
table.setColsCSSClass(['', 'iwla_hit'])
for (uri, entrance) in top_downloads[:10]:
table.appendRow([uri, entrance])
table.appendRow([generateHTMLLink(uri), entrance])
index.appendBlock(table)

View File

@ -23,7 +23,7 @@ class IWLADisplayTopHits(IPlugin):
table = DisplayHTMLBlockTable('All Hits', ['URI', 'Entrance'])
table.setColsCSSClass(['', 'iwla_hit'])
for (uri, entrance) in top_hits:
table.appendRow([uri, entrance])
table.appendRow([generateHTMLLink(uri), entrance])
page.appendBlock(table)
self.iwla.getDisplay().addPage(page)
@ -37,5 +37,5 @@ class IWLADisplayTopHits(IPlugin):
table = DisplayHTMLBlockTable(title, ['URI', 'Entrance'])
table.setColsCSSClass(['', 'iwla_hit'])
for (uri, entrance) in top_hits[:10]:
table.appendRow([uri, entrance])
table.appendRow([generateHTMLLink(uri), entrance])
index.appendBlock(table)

View File

@ -23,7 +23,7 @@ class IWLADisplayTopPages(IPlugin):
table = DisplayHTMLBlockTable('All Pages', ['URI', 'Entrance'])
table.setColsCSSClass(['', 'iwla_hit'])
for (uri, entrance) in top_pages:
table.appendRow([uri, entrance])
table.appendRow([generateHTMLLink(uri), entrance])
page.appendBlock(table)
self.iwla.getDisplay().addPage(page)
@ -37,5 +37,5 @@ class IWLADisplayTopPages(IPlugin):
table = DisplayHTMLBlockTable(title, ['URI', 'Entrance'])
table.setColsCSSClass(['', 'iwla_hit'])
for (uri, entrance) in top_pages[:10]:
table.appendRow([uri, entrance])
table.appendRow([generateHTMLLink(uri), entrance])
index.appendBlock(table)

View File

@ -1,5 +1,5 @@
import re
import xml.sax.saxutils as saxutils
import urllib
from iwla import IWLA
from iplugin import IPlugin
@ -57,8 +57,7 @@ class IWLAPostAnalysisReferers(IPlugin):
groups = key_phrase_re.match(p)
if groups:
key_phrase = groups.groupdict()['key_phrase']
key_phrase = key_phrase.replace('+', ' ').lower()
key_phrase = saxutils.unescape(key_phrase)
key_phrase = urllib.unquote_plus(key_phrase).decode('utf8')
if not key_phrase in key_phrases.keys():
key_phrases[key_phrase] = 1
else: