Dénote

Dénote Git Source Tree

Root/denote/search.py

1# -*- coding: utf-8 -*-
2"""
3 Copyright 2016 Grégory Soutadé
4
5 This file is part of Dénote.
6
7 Dynastie is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Dynastie is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Dynastie. If not, see <http://www.gnu.org/licenses/>.
19"""
20import re
21import unicodedata
22import os
23import operator
24import pickle
25from django.db import models
26
27import models
28#from models import Note
29
30class Search:
31 MINIMUM_LETTERS = 3
32
33 def __init__(self):
34 self.report = ''
35
36 self.tagreg = re.compile('<[^>]+>')
37 self.htmlreg = re.compile('&[^;]+;')
38 self.numreg = re.compile('[0-9]+')
39 self.pat = re.compile(r'\s+')
40
41 self.replace_by_space = (u'(', u')', u'#', u'\'', u'{', u'}', u'[', u']',
42 u'-', u'|', u'\t', u'\\', u'_', u'^' '=', u'+', u'$',
43 u'£', u'%', u'µ', u'*', u',', u'?', u';', u'.', u'/',
44 u':', u'!', u'§', u'€', u'²')
45
46 # Imported from generator.py
47 def _addReport(self, string, color=''):
48 if color != '':
49 self.report = self.report + '<span style="color:' + color + '">'
50 self.report = self.report + '<b>' + self.__class__.__name__ + '</b> : '
51 self.report = self.report + string
52 if color != '':
53 self.report = self.report + '</span>'
54 self.report = self.report + '<br/>\n'
55
56 def _addWarning(self, string):
57 self.addReport(string, 'yellow')
58
59 def _addError(self, string):
60 self.addReport(string, 'red')
61
62
63 def _saveDatabase(self, hashtable):
64 d = pickle.dumps(hashtable)
65
66 f = open(os.environ['DENOTE_ROOT'] + '/_search.db', 'w')
67 f.write(d)
68 f.close()
69
70 def _loadDatabase(self):
71 filename = os.environ['DENOTE_ROOT'] + '/_search.db'
72
73 if not os.path.exists(filename):
74 print 'No search index !'
75 return {}
76
77 f = open(filename, 'rb')
78 hashtable = pickle.load(f)
79 f.close()
80
81 return hashtable
82
83 def _strip_accents(self, s):
84 return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
85
86 def _remove_tag(self, content):
87 content = self.htmlreg.sub('', content)
88 content = self.numreg.sub('', content)
89
90 content = content.replace('\n', '')
91 content = content.replace('\r', '')
92 content = content.replace('"', '')
93
94 for c in self.replace_by_space:
95 content = content.replace(c, ' ')
96
97 content = self.tagreg.sub('', content)
98
99 content = self.pat.sub(' ', content)
100
101 return content
102
103 def _prepare_string(self, content):
104 content = self._remove_tag(content)
105 content = self._strip_accents(content)
106
107 return content
108
109 def _indexContent(self, hashtable, index, content, word_weight):
110 content = self._prepare_string(content)
111
112 wordlist = content.split(' ')
113
114 for word in wordlist:
115 if len(word) < self.MINIMUM_LETTERS:
116 continue
117 word = word.lower()
118 if not word in hashtable:
119 hashtable[word] = []
120 if not index in hashtable[word]:
121 hashtable[word].insert(0, [index, word_weight])
122 else:
123 weight = hashtable[word][1]
124 hashtable[word][1] = weight + word_weight
125
126 def _index(self, hashtable, index):
127 try:
128 note = Note.objects.get(pk=index)
129 except:
130 return
131
132 self._indexContent(hashtable, index, note.text, 1)
133 self._indexContent(hashtable, index, note.title.encode('utf-8'), 5)
134
135 def _index_note(self, note, saveDatabase=True):
136 hashtable = self._loadDatabase()
137
138 self._index(hashtable, int(note))
139
140 if saveDatabase:
141 self._saveDatabase(hashtable)
142
143 def _remove_note(self, note, saveDatabase=True):
144 hashtable = self._loadDatabase()
145
146 if hashtable is None: return
147
148 for k, v in hashtable.items():
149 # For tuples in values
150 for t in v:
151 if note == v[0]:
152 v.remove(t)
153
154 if saveDatabase:
155 self._saveDatabase(hashtable)
156
157 def generate_index(self, notes):
158 hashtable = self._loadDatabase()
159
160 for note in notes:
161 self._indexContent(hashtable, note.id, note.text, 1)
162 self._indexContent(hashtable, note.id, note.title, 5)
163
164 self._saveDatabase(hashtable)
165
166 def index_note(self, note):
167 return self._index_note(note, True)
168
169 def delete_note(self, note):
170 return self._remove_note(note, True)
171
172 def edit_note(self, note, saveDatabase=True):
173 self._remove_note(note, False)
174 self._index_note(note, True)
175
176 def search(self, string):
177 hashtable = self._loadDatabase()
178
179 string = self._prepare_string(string.encode('utf-8'))
180
181 wordlist = string.split(' ')
182
183 res = {}
184 for word in wordlist:
185 if len(word) < Search.MINIMUM_LETTERS:
186 continue
187 word = word.lower()
188 reg = re.compile('.*' + word + '.*')
189 for key in hashtable.keys():
190 if reg.match(key):
191 for note in hashtable[key]:
192 res[note[0]] = res.get(note[0],0) + note[1]
193
194 sorted_res = sorted(res.iteritems(), key=operator.itemgetter(1))
195 sorted_res.reverse()
196
197 res = [sorted_res[i][0] for i in range(len(sorted_res))]
198
199 return res

Archive Download this file

Branches

Tags