mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
search: support searching for (sub)titles (#10717)
Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com>
This commit is contained in:
parent
7473b05e0d
commit
7da60f2353
2
CHANGES
2
CHANGES
@ -18,6 +18,8 @@ Features added
|
|||||||
* #10755: linkcheck: Check the source URL of raw directives that use the ``url``
|
* #10755: linkcheck: Check the source URL of raw directives that use the ``url``
|
||||||
option.
|
option.
|
||||||
* #10781: Allow :rst:role:`ref` role to be used with definitions and fields.
|
* #10781: Allow :rst:role:`ref` role to be used with definitions and fields.
|
||||||
|
* #10717: HTML Search: Increase priority for full title and
|
||||||
|
subtitle matches in search results
|
||||||
|
|
||||||
Bugs fixed
|
Bugs fixed
|
||||||
----------
|
----------
|
||||||
|
@ -59,7 +59,7 @@ if docutils.__version_info__[:2] <= (0, 17):
|
|||||||
|
|
||||||
# This is increased every time an environment attribute is added
|
# This is increased every time an environment attribute is added
|
||||||
# or changed to properly invalidate pickle files.
|
# or changed to properly invalidate pickle files.
|
||||||
ENV_VERSION = 56
|
ENV_VERSION = 57
|
||||||
|
|
||||||
# config status
|
# config status
|
||||||
CONFIG_OK = 1
|
CONFIG_OK = 1
|
||||||
|
@ -183,6 +183,7 @@ class WordCollector(nodes.NodeVisitor):
|
|||||||
def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
|
def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
|
||||||
super().__init__(document)
|
super().__init__(document)
|
||||||
self.found_words: List[str] = []
|
self.found_words: List[str] = []
|
||||||
|
self.found_titles: List[Tuple[str, str]] = []
|
||||||
self.found_title_words: List[str] = []
|
self.found_title_words: List[str] = []
|
||||||
self.lang = lang
|
self.lang = lang
|
||||||
|
|
||||||
@ -213,7 +214,10 @@ class WordCollector(nodes.NodeVisitor):
|
|||||||
elif isinstance(node, nodes.Text):
|
elif isinstance(node, nodes.Text):
|
||||||
self.found_words.extend(self.lang.split(node.astext()))
|
self.found_words.extend(self.lang.split(node.astext()))
|
||||||
elif isinstance(node, nodes.title):
|
elif isinstance(node, nodes.title):
|
||||||
self.found_title_words.extend(self.lang.split(node.astext()))
|
title = node.astext()
|
||||||
|
ids = node.parent['ids']
|
||||||
|
self.found_titles.append((title, ids[0] if ids else None))
|
||||||
|
self.found_title_words.extend(self.lang.split(title))
|
||||||
elif isinstance(node, Element) and self.is_meta_keywords(node):
|
elif isinstance(node, Element) and self.is_meta_keywords(node):
|
||||||
keywords = node['content']
|
keywords = node['content']
|
||||||
keywords = [keyword.strip() for keyword in keywords.split(',')]
|
keywords = [keyword.strip() for keyword in keywords.split(',')]
|
||||||
@ -237,6 +241,7 @@ class IndexBuilder:
|
|||||||
self._mapping: Dict[str, Set[str]] = {} # stemmed word -> set(docname)
|
self._mapping: Dict[str, Set[str]] = {} # stemmed word -> set(docname)
|
||||||
# stemmed words in titles -> set(docname)
|
# stemmed words in titles -> set(docname)
|
||||||
self._title_mapping: Dict[str, Set[str]] = {}
|
self._title_mapping: Dict[str, Set[str]] = {}
|
||||||
|
self._all_titles: Dict[str, List[Tuple[str, str]]] = {} # docname -> all titles
|
||||||
self._stem_cache: Dict[str, str] = {} # word -> stemmed word
|
self._stem_cache: Dict[str, str] = {} # word -> stemmed word
|
||||||
self._objtypes: Dict[Tuple[str, str], int] = {} # objtype -> index
|
self._objtypes: Dict[Tuple[str, str], int] = {} # objtype -> index
|
||||||
# objtype index -> (domain, type, objname (localized))
|
# objtype index -> (domain, type, objname (localized))
|
||||||
@ -281,6 +286,11 @@ class IndexBuilder:
|
|||||||
index2fn = frozen['docnames']
|
index2fn = frozen['docnames']
|
||||||
self._filenames = dict(zip(index2fn, frozen['filenames']))
|
self._filenames = dict(zip(index2fn, frozen['filenames']))
|
||||||
self._titles = dict(zip(index2fn, frozen['titles']))
|
self._titles = dict(zip(index2fn, frozen['titles']))
|
||||||
|
self._all_titles = {}
|
||||||
|
|
||||||
|
for title, doc_tuples in frozen['alltitles'].items():
|
||||||
|
for doc, titleid in doc_tuples:
|
||||||
|
self._all_titles.setdefault(index2fn[doc], []).append((title, titleid))
|
||||||
|
|
||||||
def load_terms(mapping: Dict[str, Any]) -> Dict[str, Set[str]]:
|
def load_terms(mapping: Dict[str, Any]) -> Dict[str, Set[str]]:
|
||||||
rv = {}
|
rv = {}
|
||||||
@ -364,9 +374,16 @@ class IndexBuilder:
|
|||||||
objects = self.get_objects(fn2index) # populates _objtypes
|
objects = self.get_objects(fn2index) # populates _objtypes
|
||||||
objtypes = {v: k[0] + ':' + k[1] for (k, v) in self._objtypes.items()}
|
objtypes = {v: k[0] + ':' + k[1] for (k, v) in self._objtypes.items()}
|
||||||
objnames = self._objnames
|
objnames = self._objnames
|
||||||
|
|
||||||
|
alltitles: Dict[str, List[Tuple[int, str]]] = {}
|
||||||
|
for docname, titlelist in self._all_titles.items():
|
||||||
|
for title, titleid in titlelist:
|
||||||
|
alltitles.setdefault(title.lower(), []).append((fn2index[docname], titleid))
|
||||||
|
|
||||||
return dict(docnames=docnames, filenames=filenames, titles=titles, terms=terms,
|
return dict(docnames=docnames, filenames=filenames, titles=titles, terms=terms,
|
||||||
objects=objects, objtypes=objtypes, objnames=objnames,
|
objects=objects, objtypes=objtypes, objnames=objnames,
|
||||||
titleterms=title_terms, envversion=self.env.version)
|
titleterms=title_terms, envversion=self.env.version,
|
||||||
|
alltitles=alltitles)
|
||||||
|
|
||||||
def label(self) -> str:
|
def label(self) -> str:
|
||||||
return "%s (code: %s)" % (self.lang.language_name, self.lang.lang)
|
return "%s (code: %s)" % (self.lang.language_name, self.lang.lang)
|
||||||
@ -374,13 +391,16 @@ class IndexBuilder:
|
|||||||
def prune(self, docnames: Iterable[str]) -> None:
|
def prune(self, docnames: Iterable[str]) -> None:
|
||||||
"""Remove data for all docnames not in the list."""
|
"""Remove data for all docnames not in the list."""
|
||||||
new_titles = {}
|
new_titles = {}
|
||||||
|
new_alltitles = {}
|
||||||
new_filenames = {}
|
new_filenames = {}
|
||||||
for docname in docnames:
|
for docname in docnames:
|
||||||
if docname in self._titles:
|
if docname in self._titles:
|
||||||
new_titles[docname] = self._titles[docname]
|
new_titles[docname] = self._titles[docname]
|
||||||
|
new_alltitles[docname] = self._all_titles[docname]
|
||||||
new_filenames[docname] = self._filenames[docname]
|
new_filenames[docname] = self._filenames[docname]
|
||||||
self._titles = new_titles
|
self._titles = new_titles
|
||||||
self._filenames = new_filenames
|
self._filenames = new_filenames
|
||||||
|
self._all_titles = new_alltitles
|
||||||
for wordnames in self._mapping.values():
|
for wordnames in self._mapping.values():
|
||||||
wordnames.intersection_update(docnames)
|
wordnames.intersection_update(docnames)
|
||||||
for wordnames in self._title_mapping.values():
|
for wordnames in self._title_mapping.values():
|
||||||
@ -403,6 +423,8 @@ class IndexBuilder:
|
|||||||
return self._stem_cache[word]
|
return self._stem_cache[word]
|
||||||
_filter = self.lang.word_filter
|
_filter = self.lang.word_filter
|
||||||
|
|
||||||
|
self._all_titles[docname] = visitor.found_titles
|
||||||
|
|
||||||
for word in visitor.found_title_words:
|
for word in visitor.found_title_words:
|
||||||
stemmed_word = stem(word)
|
stemmed_word = stem(word)
|
||||||
if _filter(stemmed_word):
|
if _filter(stemmed_word):
|
||||||
|
@ -237,6 +237,11 @@ const Search = {
|
|||||||
* execute search (requires search index to be loaded)
|
* execute search (requires search index to be loaded)
|
||||||
*/
|
*/
|
||||||
query: (query) => {
|
query: (query) => {
|
||||||
|
const filenames = Search._index.filenames;
|
||||||
|
const docNames = Search._index.docnames;
|
||||||
|
const titles = Search._index.titles;
|
||||||
|
const allTitles = Search._index.alltitles;
|
||||||
|
|
||||||
// stem the search terms and add them to the correct list
|
// stem the search terms and add them to the correct list
|
||||||
const stemmer = new Stemmer();
|
const stemmer = new Stemmer();
|
||||||
const searchTerms = new Set();
|
const searchTerms = new Set();
|
||||||
@ -272,6 +277,23 @@ const Search = {
|
|||||||
let results = [];
|
let results = [];
|
||||||
_removeChildren(document.getElementById("search-progress"));
|
_removeChildren(document.getElementById("search-progress"));
|
||||||
|
|
||||||
|
const queryLower = query.toLowerCase();
|
||||||
|
for (const [title, foundTitles] of Object.entries(allTitles)) {
|
||||||
|
if (title.includes(queryLower) && (queryLower.length >= title.length/2)) {
|
||||||
|
for (const [file, id] of foundTitles) {
|
||||||
|
let score = Math.round(100 * queryLower.length / title.length)
|
||||||
|
results.push([
|
||||||
|
docNames[file],
|
||||||
|
titles[file],
|
||||||
|
id !== null ? "#" + id : "",
|
||||||
|
null,
|
||||||
|
score,
|
||||||
|
filenames[file],
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// lookup as object
|
// lookup as object
|
||||||
objectTerms.forEach((term) =>
|
objectTerms.forEach((term) =>
|
||||||
results.push(...Search.performObjectSearch(term, objectTerms))
|
results.push(...Search.performObjectSearch(term, objectTerms))
|
||||||
@ -399,8 +421,8 @@ const Search = {
|
|||||||
// prepare search
|
// prepare search
|
||||||
const terms = Search._index.terms;
|
const terms = Search._index.terms;
|
||||||
const titleTerms = Search._index.titleterms;
|
const titleTerms = Search._index.titleterms;
|
||||||
const docNames = Search._index.docnames;
|
|
||||||
const filenames = Search._index.filenames;
|
const filenames = Search._index.filenames;
|
||||||
|
const docNames = Search._index.docnames;
|
||||||
const titles = Search._index.titles;
|
const titles = Search._index.titles;
|
||||||
|
|
||||||
const scoreMap = new Map();
|
const scoreMap = new Map();
|
||||||
|
@ -177,7 +177,8 @@ def test_IndexBuilder():
|
|||||||
'non': [0, 1, 2, 3],
|
'non': [0, 1, 2, 3],
|
||||||
'test': [0, 1, 2, 3]},
|
'test': [0, 1, 2, 3]},
|
||||||
'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'),
|
'titles': ('title1_1', 'title1_2', 'title2_1', 'title2_2'),
|
||||||
'titleterms': {'section_titl': [0, 1, 2, 3]}
|
'titleterms': {'section_titl': [0, 1, 2, 3]},
|
||||||
|
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title'), (2, 'section-title'), (3, 'section-title')]}
|
||||||
}
|
}
|
||||||
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
|
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
|
||||||
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
|
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
|
||||||
@ -234,7 +235,8 @@ def test_IndexBuilder():
|
|||||||
'non': [0, 1],
|
'non': [0, 1],
|
||||||
'test': [0, 1]},
|
'test': [0, 1]},
|
||||||
'titles': ('title1_2', 'title2_2'),
|
'titles': ('title1_2', 'title2_2'),
|
||||||
'titleterms': {'section_titl': [0, 1]}
|
'titleterms': {'section_titl': [0, 1]},
|
||||||
|
'alltitles': {'section_title': [(0, 'section-title'), (1, 'section-title')]}
|
||||||
}
|
}
|
||||||
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
|
assert index._objtypes == {('dummy1', 'objtype1'): 0, ('dummy2', 'objtype1'): 1}
|
||||||
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
|
assert index._objnames == {0: ('dummy1', 'objtype1', 'objtype1'),
|
||||||
|
Loading…
Reference in New Issue
Block a user