Do a more pertinent search by including titles in indexing

This commit is contained in:
Grégory Soutadé 2012-12-10 20:50:27 +01:00
parent 2f1f38ca5c
commit 839b935d47
2 changed files with 34 additions and 16 deletions

View File

@ -5,6 +5,7 @@ import os
import operator
import pickle
from django.db import models
from dynastie.models import Post
class Search:
MINIMUM_LETTERS = 3
@ -16,7 +17,6 @@ class Search:
self.htmlreg = re.compile('&[^;]+;')
self.numreg = re.compile('[0-9]+')
self.pat = re.compile(r'\s+')
self.wordreg = re.compile('\w+')
self.replace_by_space = ('(', ')', '#', '\'', '{', '}', '[', ']',
'-', '|', '\t', '\\', '_', '^' '=', '+', '$',
@ -97,14 +97,10 @@ class Search:
return content
def _index_file(self, hashtable, filename, index):
f = open(filename, 'r')
content = f.read()
f.close()
def _indexContent(self, hashtable, index, content, word_weight):
content = self._prepare_string(content)
wordlist = re.findall(self.wordreg, content)
wordlist = content.split(' ')
for word in wordlist:
if len(word) < self.MINIMUM_LETTERS:
@ -113,8 +109,25 @@ class Search:
if not word in hashtable:
hashtable[word] = []
if not index in hashtable[word]:
hashtable[word].append(index)
hashtable[word].append([index, word_weight])
else:
weight = hashtable[word][1]
hashtable[word][1] = weight + word_weight
def _index_file(self, hashtable, filename, index):
f = open(filename, 'r')
content = f.read()
f.close()
try:
post = Post.objects.get(pk=index)
if post.published == False: return
except:
return
self._indexContent(hashtable, index, content, 1)
self._indexContent(hashtable, index, post.title.encode('utf-8'), 5)
def create_index(self, blog):
hashtable = {}
@ -172,7 +185,7 @@ class Search:
string = self._prepare_string(string.encode('utf-8'))
wordlist = re.findall(self.wordreg, string)
wordlist = string.split(' ')
res = {}
for word in wordlist:
@ -184,9 +197,9 @@ class Search:
if word not in hashtable:
continue
for post in hashtable[word]:
if not post in res:
res[post] = 0
res[post] = res[post] + 1
if not post[0] in res:
res[post[0]] = post[1]
res[post[0]] += post[1]
sorted_res = sorted(res.iteritems(), key=operator.itemgetter(1))
sorted_res.reverse()

View File

@ -43,15 +43,20 @@
</ul>
</div>
<div class="menu">
<!-- <div class="menu_content"> -->
<!-- <div class="menu_content_header">Search</div> -->
<!-- <div class="menu_content_content">La recherche</div> -->
<!-- </div> -->
<div class="menu_content">
<div class="menu_content_header">Recherche</div>
<div id="menu_main">
<dyn:replace div_name="form" id="search_form" method="POST" action="/search/dyn:blog_id">
<input type="text" name="text" onkeypress="handleKeyPress(event,this.form)"/>
</dyn:replace>
</div>
</div>
<div class="menu_content">
<div class="menu_content_header">Menu principal</div>
<div id="menu_main">
<div class="menu_content_content"><a href="/">Première page</a></div>
<div class="menu_content_content"><a href="/about.html">À propos</a></div>
<div class="menu_content_content"><a href="http://indefero.soutade.fr">Projets personnels</a></div>
</div>
</div>
<div class="menu_content">