Improved search slightly by adding keyword based lookup

2025-02-25 18:55:22 -06:00 · 2008-09-23 21:21:32 +00:00
parent 449adba3b6
commit 6373729f1d
3 changed files with 45 additions and 61 deletions
--- a/sphinx/builder.py
+++ b/sphinx/builder.py
@@ -386,7 +386,7 @@ class StandaloneHTMLBuilder(Builder):
    def prepare_writing(self, docnames):
        from sphinx.search import IndexBuilder
-        self.indexer = IndexBuilder()
+        self.indexer = IndexBuilder(self.env)
        self.load_indexer(docnames)
        self.docwriter = HTMLWriter(self)
        self.docsettings = OptionParser(
--- a/sphinx/search.py
+++ b/sphinx/search.py
@@ -87,7 +87,8 @@ class IndexBuilder(object):
        'pickle':   pickle
    }
-    def __init__(self):
+    def __init__(self, env):
        self.env = env
        self._stemmer = Stemmer()
        # filename -> title
        self._titles = {}
@@ -110,19 +111,28 @@ class IndexBuilder(object):
            format = self.formats[format]
        format.dump(self.freeze(), stream)
    def get_keyword_map(self):
        """Return a dict of all keywords."""
        rv = {}
        for kw, (ref, _, _, _) in self.env.modules.iteritems():
            rv[kw] = (ref, 'module', 'module-' + kw)
        for kw, (ref, ref_type) in self.env.descrefs.iteritems():
            rv[kw] = (ref, ref_type, kw)
        return rv
    def freeze(self):
-        """
+        """Create a useable data structure for serializing."""
-        Create a useable data structure. You can pass this output
+        filenames = self._titles.keys()
-        to the `SearchFrontend` to search the index.
+        titles = self._titles.values()
-        """
+        fn2index = dict((f, i) for (i, f) in enumerate(filenames))
-        fns, titles = self._titles.keys(), self._titles.values()
+        return dict(
-        fn2index = dict((f, i) for (i, f) in enumerate(fns))
+            filenames=filenames,
-        return [
+            titles=titles,
-            fns,
+            terms=dict((k, [fn2index[fn] for fn in v])
            titles,
            dict((k, [fn2index[fn] for fn in v])
                       for (k, v) in self._mapping.iteritems()),
-        ]
+            keywords=dict((k, (fn2index[v[0]],) + v[1:]) for k, v in
                          self.get_keyword_map().iteritems())
        )
    def prune(self, filenames):
        """Remove data for all filenames not in the list."""
@@ -147,45 +157,6 @@ class IndexBuilder(object):
        for word in word_re.findall(title):
            add_term(word)
            add_term(word, 'T')
        for word in visitor.found_words:
            add_term(word)
 class SearchFrontend(object):
    """
    This class acts as a frontend for the search index. It can search
    a searchindex as provided by `IndexBuilder`.
    """
    def __init__(self, index):
        self.filenames, self.titles, self.words = index
        self._stemmer = Stemmer()
    def query(self, required, excluded):
        file_map = {}
        for word in required:
            if word not in self.words:
                break
            for fid in self.words[word]:
                file_map.setdefault(fid, set()).add(word)
        return sorted(((self.filenames[fid], self.titles[fid])
            for fid, words in file_map.iteritems()
            if len(words) == len(required) and not
               any(fid in self.words.get(word, ()) for word in excluded)
        ), key=lambda x: x[1].lower())
    def search(self, searchstring):
        required = set()
        excluded = set()
        for word in searchstring.split():
            if word.startswith('-'):
                storage = excluded
                word = word[1:]
            else:
                storage = required
            storage.add(self._stemmer.stem(word))
        return self.query(required, excluded)
--- a/sphinx/static/searchtools.js
+++ b/sphinx/static/searchtools.js
@@ -294,6 +294,7 @@ var Search = {
    var excluded = [];
    var hlwords = [];
    var tmp = query.split(/\s+/);
    var keyword = (tmp.length == 1) ? tmp[0] : null;
    for (var i = 0; i < tmp.length; i++) {
      // stem the word
      var word = stemmer.stemWord(tmp[i]).toLowerCase();
@@ -317,13 +318,22 @@ var Search = {
    console.info('excluded: ', excluded);
    // prepare search
-    var filenames = this._index[0];
+    var filenames = this._index.filenames;
-    var titles = this._index[1];
+    var titles = this._index.titles;
-    var words = this._index[2];
+    var words = this._index.terms;
    var fileMap = {};
    var files = null;
    var results = [];
    var regularResults = [];
    $('#search-progress').empty();
    // lookup the keyword
    if (keyword != null) {
      var match = this._index.keywords[keyword];
      if (match)
        results.push([filenames[match[0]], titles[match[0]], match[2]]);
    }
    // perform the search on the required words
    for (var i = 0; i < searchwords.length; i++) {
      var word = searchwords[i];
@@ -342,7 +352,6 @@ var Search = {
    // now check if the files are in the correct
    // areas and if the don't contain excluded words
    var results = [];
    for (var file in fileMap) {
      var valid = true;
@@ -362,20 +371,23 @@ var Search = {
      // if we have still a valid result we can add it
      // to the result list
      if (valid)
-        results.push([filenames[file], titles[file]]);
+        results.push([filenames[file], titles[file], null]);
    }
    // delete unused variables in order to not waste
    // memory until list is retrieved completely
    delete filenames, titles, words;
-    // now sort the results by title
+    // now sort the regular results by title
-    results.sort(function(a, b) {
+    regularResults.sort(function(a, b) {
      var left = a[1].toLowerCase();
      var right = b[1].toLowerCase();
      return (left > right) ? -1 : ((left < right) ? 1 : 0);
    });
    // combine both
    results = results.concat(regularResults);
    // print the results
    var resultCount = results.length;
    function displayNextItem() {
@@ -386,7 +398,8 @@ var Search = {
        listItem.append($('<a/>').attr(
          'href',
          item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
-          highlightstring).html(item[1]));
+          highlightstring +
          (item[2] ? '#' + item[2] : '')).html(item[1]));
        $.get('_sources/' + item[0] + '.txt', function(data) {
          listItem.append($.makeSearchSummary(data, searchwords, hlwords));
          Search.output.append(listItem);