mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
HTML Search: omit anchor reference from document titles in the search index. (#12047)
This commit is contained in:
@@ -89,6 +89,9 @@ Bugs fixed
|
||||
* #12494: Fix invalid genindex.html file produced with translated docs
|
||||
(regression in 7.1.0).
|
||||
Patch by Nicolas Peugnet.
|
||||
* #11961: Omit anchor references from document title entries in the search index,
|
||||
removing duplication of search results.
|
||||
Patch by James Addison.
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
||||
@@ -253,7 +253,7 @@ class BuildEnvironment:
|
||||
# search index data
|
||||
|
||||
# docname -> title
|
||||
self._search_index_titles: dict[str, str] = {}
|
||||
self._search_index_titles: dict[str, str | None] = {}
|
||||
# docname -> filename
|
||||
self._search_index_filenames: dict[str, str] = {}
|
||||
# stemmed words -> set(docname)
|
||||
@@ -261,7 +261,7 @@ class BuildEnvironment:
|
||||
# stemmed words in titles -> set(docname)
|
||||
self._search_index_title_mapping: dict[str, set[str]] = {}
|
||||
# docname -> all titles in document
|
||||
self._search_index_all_titles: dict[str, list[tuple[str, str]]] = {}
|
||||
self._search_index_all_titles: dict[str, list[tuple[str, str | None]]] = {}
|
||||
# docname -> list(index entry)
|
||||
self._search_index_index_entries: dict[str, list[tuple[str, str, str]]] = {}
|
||||
# objtype -> index
|
||||
|
||||
@@ -198,7 +198,7 @@ def _is_meta_keywords(
|
||||
@dataclasses.dataclass
|
||||
class WordStore:
|
||||
words: list[str] = dataclasses.field(default_factory=list)
|
||||
titles: list[tuple[str, str]] = dataclasses.field(default_factory=list)
|
||||
titles: list[tuple[str, str | None]] = dataclasses.field(default_factory=list)
|
||||
title_words: list[str] = dataclasses.field(default_factory=list)
|
||||
|
||||
|
||||
@@ -253,7 +253,7 @@ class IndexBuilder:
|
||||
def __init__(self, env: BuildEnvironment, lang: str, options: dict[str, str], scoring: str) -> None:
|
||||
self.env = env
|
||||
# docname -> title
|
||||
self._titles: dict[str, str] = env._search_index_titles
|
||||
self._titles: dict[str, str | None] = env._search_index_titles
|
||||
# docname -> filename
|
||||
self._filenames: dict[str, str] = env._search_index_filenames
|
||||
# stemmed words -> set(docname)
|
||||
@@ -261,7 +261,7 @@ class IndexBuilder:
|
||||
# stemmed words in titles -> set(docname)
|
||||
self._title_mapping: dict[str, set[str]] = env._search_index_title_mapping
|
||||
# docname -> all titles in document
|
||||
self._all_titles: dict[str, list[tuple[str, str]]] = env._search_index_all_titles
|
||||
self._all_titles: dict[str, list[tuple[str, str | None]]] = env._search_index_all_titles
|
||||
# docname -> list(index entry)
|
||||
self._index_entries: dict[str, list[tuple[str, str, str]]] = env._search_index_index_entries
|
||||
# objtype -> index
|
||||
@@ -369,6 +369,13 @@ class IndexBuilder:
|
||||
return rv
|
||||
|
||||
def get_terms(self, fn2index: dict[str, int]) -> tuple[dict[str, list[int] | int], dict[str, list[int] | int]]:
|
||||
"""
|
||||
Return a mapping of document and title terms to their corresponding sorted document IDs.
|
||||
|
||||
When a term is only found within a single document, then the value for that term will be
|
||||
an integer value. When a term is found within multiple documents, the value will be a list
|
||||
of integers.
|
||||
"""
|
||||
rvs: tuple[dict[str, list[int] | int], dict[str, list[int] | int]] = ({}, {})
|
||||
for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)):
|
||||
for k, v in mapping.items():
|
||||
@@ -391,7 +398,7 @@ class IndexBuilder:
|
||||
objtypes = {v: k[0] + ':' + k[1] for (k, v) in self._objtypes.items()}
|
||||
objnames = self._objnames
|
||||
|
||||
alltitles: dict[str, list[tuple[int, str]]] = {}
|
||||
alltitles: dict[str, list[tuple[int, str | None]]] = {}
|
||||
for docname, titlelist in sorted(self._all_titles.items()):
|
||||
for title, titleid in titlelist:
|
||||
alltitles.setdefault(title, []).append((fn2index[docname], titleid))
|
||||
@@ -502,9 +509,10 @@ class IndexBuilder:
|
||||
elif isinstance(node, nodes.Text):
|
||||
word_store.words.extend(split(node.astext()))
|
||||
elif isinstance(node, nodes.title):
|
||||
title = node.astext()
|
||||
title, is_main_title = node.astext(), len(word_store.titles) == 0
|
||||
ids = node.parent['ids']
|
||||
word_store.titles.append((title, ids[0] if ids else None))
|
||||
title_node_id = None if is_main_title else ids[0] if ids else None
|
||||
word_store.titles.append((title, title_node_id))
|
||||
word_store.title_words.extend(split(title))
|
||||
for child in node.children:
|
||||
_visit_nodes(child)
|
||||
|
||||
@@ -1 +1 @@
|
||||
Search.setIndex({"alltitles": {"Main Page": [[0, "main-page"]]}, "docnames": ["index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.rst"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"At": 0, "adjac": 0, "all": 0, "an": 0, "appear": 0, "applic": 0, "ar": 0, "built": 0, "can": 0, "check": 0, "contain": 0, "do": 0, "document": 0, "doesn": 0, "each": 0, "fixtur": 0, "format": 0, "function": 0, "futur": 0, "html": 0, "i": 0, "includ": 0, "match": 0, "messag": 0, "multipl": 0, "multiterm": 0, "order": 0, "other": 0, "output": 0, "perform": 0, "perhap": 0, "phrase": 0, "project": 0, "queri": 0, "requir": 0, "same": 0, "search": 0, "successfulli": 0, "support": 0, "t": 0, "term": 0, "test": 0, "thi": 0, "time": 0, "us": 0, "when": 0, "write": 0}, "titles": ["Main Page"], "titleterms": {"main": 0, "page": 0}})
|
||||
Search.setIndex({"alltitles": {"Main Page": [[0, null]]}, "docnames": ["index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.rst"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"At": 0, "adjac": 0, "all": 0, "an": 0, "appear": 0, "applic": 0, "ar": 0, "built": 0, "can": 0, "check": 0, "contain": 0, "do": 0, "document": 0, "doesn": 0, "each": 0, "fixtur": 0, "format": 0, "function": 0, "futur": 0, "html": 0, "i": 0, "includ": 0, "match": 0, "messag": 0, "multipl": 0, "multiterm": 0, "order": 0, "other": 0, "output": 0, "perform": 0, "perhap": 0, "phrase": 0, "project": 0, "queri": 0, "requir": 0, "same": 0, "search": 0, "successfulli": 0, "support": 0, "t": 0, "term": 0, "test": 0, "thi": 0, "time": 0, "us": 0, "when": 0, "write": 0}, "titles": ["Main Page"], "titleterms": {"main": 0, "page": 0}})
|
||||
@@ -1 +1 @@
|
||||
Search.setIndex({"alltitles": {"sphinx_utils module": [[0, "sphinx-utils-module"]]}, "docnames": ["index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.rst"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"also": 0, "ar": 0, "built": 0, "confirm": 0, "document": 0, "function": 0, "html": 0, "i": 0, "includ": 0, "input": 0, "javascript": 0, "known": 0, "match": 0, "partial": 0, "possibl": 0, "prefix": 0, "project": 0, "provid": 0, "restructuredtext": 0, "sampl": 0, "search": 0, "should": 0, "thi": 0, "titl": 0, "us": 0, "when": 0}, "titles": ["sphinx_utils module"], "titleterms": {"modul": 0, "sphinx_util": 0}})
|
||||
Search.setIndex({"alltitles": {"sphinx_utils module": [[0, null]]}, "docnames": ["index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["index.rst"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"also": 0, "ar": 0, "built": 0, "confirm": 0, "document": 0, "function": 0, "html": 0, "i": 0, "includ": 0, "input": 0, "javascript": 0, "known": 0, "match": 0, "partial": 0, "possibl": 0, "prefix": 0, "project": 0, "provid": 0, "restructuredtext": 0, "sampl": 0, "search": 0, "should": 0, "thi": 0, "titl": 0, "us": 0, "when": 0}, "titles": ["sphinx_utils module"], "titleterms": {"modul": 0, "sphinx_util": 0}})
|
||||
@@ -70,21 +70,12 @@ describe('Basic html theme search', function() {
|
||||
|
||||
searchParameters = Search._parseQuery('main page');
|
||||
|
||||
// fixme: duplicate result due to https://github.com/sphinx-doc/sphinx/issues/11961
|
||||
hits = [
|
||||
[
|
||||
'index',
|
||||
'Main Page',
|
||||
'',
|
||||
null,
|
||||
15,
|
||||
'index.rst'
|
||||
],
|
||||
[
|
||||
'index',
|
||||
'Main Page',
|
||||
'#main-page',
|
||||
null,
|
||||
100,
|
||||
'index.rst'
|
||||
]
|
||||
|
||||
@@ -71,6 +71,9 @@ section_title
|
||||
|
||||
.. test that comments are not indexed: boson
|
||||
|
||||
another_title
|
||||
=============
|
||||
|
||||
test that non-comments are indexed: fermion
|
||||
'''
|
||||
|
||||
@@ -168,6 +171,10 @@ def test_IndexBuilder():
|
||||
'docname2_1': 'title2_1', 'docname2_2': 'title2_2'}
|
||||
assert index._filenames == {'docname1_1': 'filename1_1', 'docname1_2': 'filename1_2',
|
||||
'docname2_1': 'filename2_1', 'docname2_2': 'filename2_2'}
|
||||
# note: element iteration order (sort order) is important when the index
|
||||
# is frozen (serialized) during build -- however, the _mapping-related
|
||||
# dictionaries below may be iterated in arbitrary order by Python at
|
||||
# runtime.
|
||||
assert index._mapping == {
|
||||
'ar': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
|
||||
'fermion': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
|
||||
@@ -176,7 +183,10 @@ def test_IndexBuilder():
|
||||
'index': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
|
||||
'test': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
|
||||
}
|
||||
assert index._title_mapping == {'section_titl': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'}}
|
||||
assert index._title_mapping == {
|
||||
'another_titl': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
|
||||
'section_titl': {'docname1_1', 'docname1_2', 'docname2_1', 'docname2_2'},
|
||||
}
|
||||
assert index._objtypes == {}
|
||||
assert index._objnames == {}
|
||||
|
||||
@@ -196,8 +206,14 @@ def test_IndexBuilder():
|
||||
'non': [0, 1, 2, 3],
|
||||
'test': [0, 1, 2, 3]},
|
||||
'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'),
|
||||
'titleterms': {'section_titl': [0, 1, 2, 3]},
|
||||
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title'), (2, 'section-title'), (3, 'section-title')]},
|
||||
'titleterms': {
|
||||
'another_titl': [0, 1, 2, 3],
|
||||
'section_titl': [0, 1, 2, 3],
|
||||
},
|
||||
'alltitles': {
|
||||
'another_title': [(0, 'another-title'), (1, 'another-title'), (2, 'another-title'), (3, 'another-title')],
|
||||
'section_title': [(0, None), (1, None), (2, None), (3, None)],
|
||||
},
|
||||
'indexentries': {},
|
||||
}
|
||||
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
|
||||
@@ -238,7 +254,10 @@ def test_IndexBuilder():
|
||||
'index': {'docname1_2', 'docname2_2'},
|
||||
'test': {'docname1_2', 'docname2_2'},
|
||||
}
|
||||
assert index._title_mapping == {'section_titl': {'docname1_2', 'docname2_2'}}
|
||||
assert index._title_mapping == {
|
||||
'another_titl': {'docname1_2', 'docname2_2'},
|
||||
'section_titl': {'docname1_2', 'docname2_2'},
|
||||
}
|
||||
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
|
||||
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'), 1: ('dummy2', 'objtype1', 'objtype1')}
|
||||
|
||||
@@ -257,8 +276,14 @@ def test_IndexBuilder():
|
||||
'non': [0, 1],
|
||||
'test': [0, 1]},
|
||||
'titles': ('title1_2', 'title2_2'),
|
||||
'titleterms': {'section_titl': [0, 1]},
|
||||
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title')]},
|
||||
'titleterms': {
|
||||
'another_titl': [0, 1],
|
||||
'section_titl': [0, 1],
|
||||
},
|
||||
'alltitles': {
|
||||
'another_title': [(0, 'another-title'), (1, 'another-title')],
|
||||
'section_title': [(0, None), (1, None)],
|
||||
},
|
||||
'indexentries': {},
|
||||
}
|
||||
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
|
||||
@@ -347,7 +372,8 @@ def assert_is_sorted(item, path: str):
|
||||
assert_is_sorted(value, f'{path}.{key}')
|
||||
elif isinstance(item, list):
|
||||
if not is_title_tuple_type(item) and path not in lists_not_to_sort:
|
||||
assert item == sorted(item), f'{err_path} is not sorted'
|
||||
# sort nulls last; http://stackoverflow.com/questions/19868767/
|
||||
assert item == sorted(item, key=lambda x: (x is None, x)), f'{err_path} is not sorted'
|
||||
for i, child in enumerate(item):
|
||||
assert_is_sorted(child, f'{path}[{i}]')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user