Improved search slightly by adding keyword based lookup

This commit is contained in:
Armin Ronacher
2008-09-23 21:21:32 +00:00
parent 449adba3b6
commit 6373729f1d
3 changed files with 45 additions and 61 deletions

View File

@@ -386,7 +386,7 @@ class StandaloneHTMLBuilder(Builder):
def prepare_writing(self, docnames): def prepare_writing(self, docnames):
from sphinx.search import IndexBuilder from sphinx.search import IndexBuilder
self.indexer = IndexBuilder() self.indexer = IndexBuilder(self.env)
self.load_indexer(docnames) self.load_indexer(docnames)
self.docwriter = HTMLWriter(self) self.docwriter = HTMLWriter(self)
self.docsettings = OptionParser( self.docsettings = OptionParser(

View File

@@ -87,7 +87,8 @@ class IndexBuilder(object):
'pickle': pickle 'pickle': pickle
} }
def __init__(self): def __init__(self, env):
self.env = env
self._stemmer = Stemmer() self._stemmer = Stemmer()
# filename -> title # filename -> title
self._titles = {} self._titles = {}
@@ -110,19 +111,28 @@ class IndexBuilder(object):
format = self.formats[format] format = self.formats[format]
format.dump(self.freeze(), stream) format.dump(self.freeze(), stream)
def get_keyword_map(self):
"""Return a dict of all keywords."""
rv = {}
for kw, (ref, _, _, _) in self.env.modules.iteritems():
rv[kw] = (ref, 'module', 'module-' + kw)
for kw, (ref, ref_type) in self.env.descrefs.iteritems():
rv[kw] = (ref, ref_type, kw)
return rv
def freeze(self): def freeze(self):
""" """Create a useable data structure for serializing."""
Create a useable data structure. You can pass this output filenames = self._titles.keys()
to the `SearchFrontend` to search the index. titles = self._titles.values()
""" fn2index = dict((f, i) for (i, f) in enumerate(filenames))
fns, titles = self._titles.keys(), self._titles.values() return dict(
fn2index = dict((f, i) for (i, f) in enumerate(fns)) filenames=filenames,
return [ titles=titles,
fns, terms=dict((k, [fn2index[fn] for fn in v])
titles,
dict((k, [fn2index[fn] for fn in v])
for (k, v) in self._mapping.iteritems()), for (k, v) in self._mapping.iteritems()),
] keywords=dict((k, (fn2index[v[0]],) + v[1:]) for k, v in
self.get_keyword_map().iteritems())
)
def prune(self, filenames): def prune(self, filenames):
"""Remove data for all filenames not in the list.""" """Remove data for all filenames not in the list."""
@@ -147,45 +157,6 @@ class IndexBuilder(object):
for word in word_re.findall(title): for word in word_re.findall(title):
add_term(word) add_term(word)
add_term(word, 'T')
for word in visitor.found_words: for word in visitor.found_words:
add_term(word) add_term(word)
class SearchFrontend(object):
"""
This class acts as a frontend for the search index. It can search
a searchindex as provided by `IndexBuilder`.
"""
def __init__(self, index):
self.filenames, self.titles, self.words = index
self._stemmer = Stemmer()
def query(self, required, excluded):
file_map = {}
for word in required:
if word not in self.words:
break
for fid in self.words[word]:
file_map.setdefault(fid, set()).add(word)
return sorted(((self.filenames[fid], self.titles[fid])
for fid, words in file_map.iteritems()
if len(words) == len(required) and not
any(fid in self.words.get(word, ()) for word in excluded)
), key=lambda x: x[1].lower())
def search(self, searchstring):
required = set()
excluded = set()
for word in searchstring.split():
if word.startswith('-'):
storage = excluded
word = word[1:]
else:
storage = required
storage.add(self._stemmer.stem(word))
return self.query(required, excluded)

View File

@@ -294,6 +294,7 @@ var Search = {
var excluded = []; var excluded = [];
var hlwords = []; var hlwords = [];
var tmp = query.split(/\s+/); var tmp = query.split(/\s+/);
var keyword = (tmp.length == 1) ? tmp[0] : null;
for (var i = 0; i < tmp.length; i++) { for (var i = 0; i < tmp.length; i++) {
// stem the word // stem the word
var word = stemmer.stemWord(tmp[i]).toLowerCase(); var word = stemmer.stemWord(tmp[i]).toLowerCase();
@@ -317,13 +318,22 @@ var Search = {
console.info('excluded: ', excluded); console.info('excluded: ', excluded);
// prepare search // prepare search
var filenames = this._index[0]; var filenames = this._index.filenames;
var titles = this._index[1]; var titles = this._index.titles;
var words = this._index[2]; var words = this._index.terms;
var fileMap = {}; var fileMap = {};
var files = null; var files = null;
var results = [];
var regularResults = [];
$('#search-progress').empty(); $('#search-progress').empty();
// lookup the keyword
if (keyword != null) {
var match = this._index.keywords[keyword];
if (match)
results.push([filenames[match[0]], titles[match[0]], match[2]]);
}
// perform the search on the required words // perform the search on the required words
for (var i = 0; i < searchwords.length; i++) { for (var i = 0; i < searchwords.length; i++) {
var word = searchwords[i]; var word = searchwords[i];
@@ -342,7 +352,6 @@ var Search = {
// now check if the files are in the correct // now check if the files are in the correct
// areas and if the don't contain excluded words // areas and if the don't contain excluded words
var results = [];
for (var file in fileMap) { for (var file in fileMap) {
var valid = true; var valid = true;
@@ -362,20 +371,23 @@ var Search = {
// if we have still a valid result we can add it // if we have still a valid result we can add it
// to the result list // to the result list
if (valid) if (valid)
results.push([filenames[file], titles[file]]); results.push([filenames[file], titles[file], null]);
} }
// delete unused variables in order to not waste // delete unused variables in order to not waste
// memory until list is retrieved completely // memory until list is retrieved completely
delete filenames, titles, words; delete filenames, titles, words;
// now sort the results by title // now sort the regular results by title
results.sort(function(a, b) { regularResults.sort(function(a, b) {
var left = a[1].toLowerCase(); var left = a[1].toLowerCase();
var right = b[1].toLowerCase(); var right = b[1].toLowerCase();
return (left > right) ? -1 : ((left < right) ? 1 : 0); return (left > right) ? -1 : ((left < right) ? 1 : 0);
}); });
// combine both
results = results.concat(regularResults);
// print the results // print the results
var resultCount = results.length; var resultCount = results.length;
function displayNextItem() { function displayNextItem() {
@@ -386,7 +398,8 @@ var Search = {
listItem.append($('<a/>').attr( listItem.append($('<a/>').attr(
'href', 'href',
item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX + item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
highlightstring).html(item[1])); highlightstring +
(item[2] ? '#' + item[2] : '')).html(item[1]));
$.get('_sources/' + item[0] + '.txt', function(data) { $.get('_sources/' + item[0] + '.txt', function(data) {
listItem.append($.makeSearchSummary(data, searchwords, hlwords)); listItem.append($.makeSearchSummary(data, searchwords, hlwords));
Search.output.append(listItem); Search.output.append(listItem);