search: support searching for (sub)titles (#10717)

Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com>
2025-02-25 18:55:22 -06:00 · 2022-09-09 03:28:29 +02:00 · 2022-09-09 03:28:29 +02:00 · 7da60f2353
commit 7da60f2353
parent 7473b05e0d
5 changed files with 54 additions and 6 deletions
--- a/2
+++ b/2
@ -18,6 +18,8 @@ Features added
 * #10755: linkcheck: Check the source URL of raw directives that use the ``url``
  option.
 * #10781: Allow :rst:role:`ref` role to be used with definitions and fields.
+* #10717: HTML Search: Increase priority for full title and 
+  subtitle matches in search results

 Bugs fixed
 ----------
--- a/sphinx/environment/init.py
+++ b/sphinx/environment/init.py
@ -59,7 +59,7 @@ if docutils.__version_info__[:2] <= (0, 17):

 # This is increased every time an environment attribute is added
 # or changed to properly invalidate pickle files.
-ENV_VERSION = 56
+ENV_VERSION = 57

 # config status
 CONFIG_OK = 1
--- a/sphinx/search/init.py
+++ b/sphinx/search/init.py
@ -183,6 +183,7 @@ class WordCollector(nodes.NodeVisitor):
    def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
        super().__init__(document)
        self.found_words: List[str] = []
+        self.found_titles: List[Tuple[str, str]] = []
        self.found_title_words: List[str] = []
        self.lang = lang

@ -213,7 +214,10 @@ class WordCollector(nodes.NodeVisitor):
        elif isinstance(node, nodes.Text):
            self.found_words.extend(self.lang.split(node.astext()))
        elif isinstance(node, nodes.title):
-            self.found_title_words.extend(self.lang.split(node.astext()))
+            title = node.astext()
+            ids = node.parent['ids']
+            self.found_titles.append((title, ids[0] if ids else None))
+            self.found_title_words.extend(self.lang.split(title))
        elif isinstance(node, Element) and self.is_meta_keywords(node):
            keywords = node['content']
            keywords = [keyword.strip() for keyword in keywords.split(',')]
@ -237,6 +241,7 @@ class IndexBuilder:
        self._mapping: Dict[str, Set[str]] = {}     # stemmed word -> set(docname)
        # stemmed words in titles -> set(docname)
        self._title_mapping: Dict[str, Set[str]] = {}
+        self._all_titles: Dict[str, List[Tuple[str, str]]] = {}  # docname -> all titles
        self._stem_cache: Dict[str, str] = {}       # word -> stemmed word
        self._objtypes: Dict[Tuple[str, str], int] = {}     # objtype -> index
        # objtype index -> (domain, type, objname (localized))
@ -281,6 +286,11 @@ class IndexBuilder:
        index2fn = frozen['docnames']
        self._filenames = dict(zip(index2fn, frozen['filenames']))
        self._titles = dict(zip(index2fn, frozen['titles']))
+        self._all_titles = {}
+
+        for title, doc_tuples in frozen['alltitles'].items():
+            for doc, titleid in doc_tuples:
+                self._all_titles.setdefault(index2fn[doc], []).append((title, titleid))

        def load_terms(mapping: Dict[str, Any]) -> Dict[str, Set[str]]:
            rv = {}
@ -364,9 +374,16 @@ class IndexBuilder:
        objects = self.get_objects(fn2index)  # populates _objtypes
        objtypes = {v: k[0] + ':' + k[1] for (k, v) in self._objtypes.items()}
        objnames = self._objnames
+
+        alltitles: Dict[str, List[Tuple[int, str]]] = {}
+        for docname, titlelist in self._all_titles.items():
+            for title, titleid in titlelist:
+                alltitles.setdefault(title.lower(), []).append((fn2index[docname],  titleid))
+
        return dict(docnames=docnames, filenames=filenames, titles=titles, terms=terms,
                    objects=objects, objtypes=objtypes, objnames=objnames,
-                    titleterms=title_terms, envversion=self.env.version)
+                    titleterms=title_terms, envversion=self.env.version,
+                    alltitles=alltitles)

    def label(self) -> str:
        return "%s (code: %s)" % (self.lang.language_name, self.lang.lang)
@ -374,13 +391,16 @@ class IndexBuilder:
    def prune(self, docnames: Iterable[str]) -> None:
        """Remove data for all docnames not in the list."""
        new_titles = {}
+        new_alltitles = {}
        new_filenames = {}
        for docname in docnames:
            if docname in self._titles:
                new_titles[docname] = self._titles[docname]
+                new_alltitles[docname] = self._all_titles[docname]
                new_filenames[docname] = self._filenames[docname]
        self._titles = new_titles
        self._filenames = new_filenames
+        self._all_titles = new_alltitles
        for wordnames in self._mapping.values():
            wordnames.intersection_update(docnames)
        for wordnames in self._title_mapping.values():
@ -403,6 +423,8 @@ class IndexBuilder:
                return self._stem_cache[word]
        _filter = self.lang.word_filter

+        self._all_titles[docname] = visitor.found_titles
+
        for word in visitor.found_title_words:
            stemmed_word = stem(word)
            if _filter(stemmed_word):
--- a/sphinx/themes/basic/static/searchtools.js
+++ b/sphinx/themes/basic/static/searchtools.js
@ -237,6 +237,11 @@ const Search = {
   * execute search (requires search index to be loaded)
   */
  query: (query) => {
+    const filenames = Search._index.filenames;
+    const docNames = Search._index.docnames;
+    const titles = Search._index.titles;
+    const allTitles = Search._index.alltitles;
+
    // stem the search terms and add them to the correct list
    const stemmer = new Stemmer();
    const searchTerms = new Set();
@ -272,6 +277,23 @@ const Search = {
    let results = [];
    _removeChildren(document.getElementById("search-progress"));

+    const queryLower = query.toLowerCase();
+    for (const [title, foundTitles] of Object.entries(allTitles)) {
+      if (title.includes(queryLower) && (queryLower.length >= title.length/2)) {
+        for (const [file, id] of foundTitles) {
+          let score = Math.round(100 * queryLower.length / title.length)
+          results.push([
+            docNames[file],
+            titles[file],
+            id !== null ? "#" + id : "",
+            null,
+            score,
+            filenames[file],
+          ]);
+        }
+      }
+    }
+
    // lookup as object
    objectTerms.forEach((term) =>
      results.push(...Search.performObjectSearch(term, objectTerms))
@ -399,8 +421,8 @@ const Search = {
    // prepare search
    const terms = Search._index.terms;
    const titleTerms = Search._index.titleterms;
-    const docNames = Search._index.docnames;
    const filenames = Search._index.filenames;
+    const docNames = Search._index.docnames;
    const titles = Search._index.titles;

    const scoreMap = new Map();
--- a/tests/test_search.py
+++ b/tests/test_search.py
@ -177,7 +177,8 @@ def test_IndexBuilder():
                  'non': [0, 1, 2, 3],
                  'test': [0, 1, 2, 3]},
        'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'),
-        'titleterms': {'section_titl': [0, 1, 2, 3]}
+        'titleterms': {'section_titl': [0, 1, 2, 3]},
+        'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title'), (2, 'section-title'), (3, 'section-title')]}
    }
    assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
    assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
@ -234,7 +235,8 @@ def test_IndexBuilder():
                  'non': [0, 1],
                  'test': [0, 1]},
        'titles': ('title1_2', 'title2_2'),
-        'titleterms': {'section_titl': [0, 1]}
+        'titleterms': {'section_titl': [0, 1]},
+        'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title')]}
    }
    assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
    assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),