From 6373729f1ddc7afac0ae927e7641697f8e0d8766 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 23 Sep 2008 21:21:32 +0000 Subject: [PATCH] Improved search slightly by adding keyword based lookup --- sphinx/builder.py | 2 +- sphinx/search.py | 75 +++++++++++------------------------- sphinx/static/searchtools.js | 29 ++++++++++---- 3 files changed, 45 insertions(+), 61 deletions(-) diff --git a/sphinx/builder.py b/sphinx/builder.py index c478873fd..07e646518 100644 --- a/sphinx/builder.py +++ b/sphinx/builder.py @@ -386,7 +386,7 @@ class StandaloneHTMLBuilder(Builder): def prepare_writing(self, docnames): from sphinx.search import IndexBuilder - self.indexer = IndexBuilder() + self.indexer = IndexBuilder(self.env) self.load_indexer(docnames) self.docwriter = HTMLWriter(self) self.docsettings = OptionParser( diff --git a/sphinx/search.py b/sphinx/search.py index 7368f7cfe..9237ddcfd 100644 --- a/sphinx/search.py +++ b/sphinx/search.py @@ -87,7 +87,8 @@ class IndexBuilder(object): 'pickle': pickle } - def __init__(self): + def __init__(self, env): + self.env = env self._stemmer = Stemmer() # filename -> title self._titles = {} @@ -110,19 +111,28 @@ class IndexBuilder(object): format = self.formats[format] format.dump(self.freeze(), stream) + def get_keyword_map(self): + """Return a dict of all keywords.""" + rv = {} + for kw, (ref, _, _, _) in self.env.modules.iteritems(): + rv[kw] = (ref, 'module', 'module-' + kw) + for kw, (ref, ref_type) in self.env.descrefs.iteritems(): + rv[kw] = (ref, ref_type, kw) + return rv + def freeze(self): - """ - Create a useable data structure. You can pass this output - to the `SearchFrontend` to search the index. - """ - fns, titles = self._titles.keys(), self._titles.values() - fn2index = dict((f, i) for (i, f) in enumerate(fns)) - return [ - fns, - titles, - dict((k, [fn2index[fn] for fn in v]) - for (k, v) in self._mapping.iteritems()), - ] + """Create a useable data structure for serializing.""" + filenames = self._titles.keys() + titles = self._titles.values() + fn2index = dict((f, i) for (i, f) in enumerate(filenames)) + return dict( + filenames=filenames, + titles=titles, + terms=dict((k, [fn2index[fn] for fn in v]) + for (k, v) in self._mapping.iteritems()), + keywords=dict((k, (fn2index[v[0]],) + v[1:]) for k, v in + self.get_keyword_map().iteritems()) + ) def prune(self, filenames): """Remove data for all filenames not in the list.""" @@ -147,45 +157,6 @@ class IndexBuilder(object): for word in word_re.findall(title): add_term(word) - add_term(word, 'T') for word in visitor.found_words: add_term(word) - - -class SearchFrontend(object): - """ - This class acts as a frontend for the search index. It can search - a searchindex as provided by `IndexBuilder`. - """ - - def __init__(self, index): - self.filenames, self.titles, self.words = index - self._stemmer = Stemmer() - - def query(self, required, excluded): - file_map = {} - for word in required: - if word not in self.words: - break - for fid in self.words[word]: - file_map.setdefault(fid, set()).add(word) - - return sorted(((self.filenames[fid], self.titles[fid]) - for fid, words in file_map.iteritems() - if len(words) == len(required) and not - any(fid in self.words.get(word, ()) for word in excluded) - ), key=lambda x: x[1].lower()) - - def search(self, searchstring): - required = set() - excluded = set() - for word in searchstring.split(): - if word.startswith('-'): - storage = excluded - word = word[1:] - else: - storage = required - storage.add(self._stemmer.stem(word)) - - return self.query(required, excluded) diff --git a/sphinx/static/searchtools.js b/sphinx/static/searchtools.js index 77d8639a2..949201363 100644 --- a/sphinx/static/searchtools.js +++ b/sphinx/static/searchtools.js @@ -294,6 +294,7 @@ var Search = { var excluded = []; var hlwords = []; var tmp = query.split(/\s+/); + var keyword = (tmp.length == 1) ? tmp[0] : null; for (var i = 0; i < tmp.length; i++) { // stem the word var word = stemmer.stemWord(tmp[i]).toLowerCase(); @@ -317,13 +318,22 @@ var Search = { console.info('excluded: ', excluded); // prepare search - var filenames = this._index[0]; - var titles = this._index[1]; - var words = this._index[2]; + var filenames = this._index.filenames; + var titles = this._index.titles; + var words = this._index.terms; var fileMap = {}; var files = null; + var results = []; + var regularResults = []; $('#search-progress').empty(); + // lookup the keyword + if (keyword != null) { + var match = this._index.keywords[keyword]; + if (match) + results.push([filenames[match[0]], titles[match[0]], match[2]]); + } + // perform the search on the required words for (var i = 0; i < searchwords.length; i++) { var word = searchwords[i]; @@ -342,7 +352,6 @@ var Search = { // now check if the files are in the correct // areas and if the don't contain excluded words - var results = []; for (var file in fileMap) { var valid = true; @@ -362,20 +371,23 @@ var Search = { // if we have still a valid result we can add it // to the result list if (valid) - results.push([filenames[file], titles[file]]); + results.push([filenames[file], titles[file], null]); } // delete unused variables in order to not waste // memory until list is retrieved completely delete filenames, titles, words; - // now sort the results by title - results.sort(function(a, b) { + // now sort the regular results by title + regularResults.sort(function(a, b) { var left = a[1].toLowerCase(); var right = b[1].toLowerCase(); return (left > right) ? -1 : ((left < right) ? 1 : 0); }); + // combine both + results = results.concat(regularResults); + // print the results var resultCount = results.length; function displayNextItem() { @@ -386,7 +398,8 @@ var Search = { listItem.append($('').attr( 'href', item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX + - highlightstring).html(item[1])); + highlightstring + + (item[2] ? '#' + item[2] : '')).html(item[1])); $.get('_sources/' + item[0] + '.txt', function(data) { listItem.append($.makeSearchSummary(data, searchwords, hlwords)); Search.output.append(listItem);