mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
search index: don't stemm words that would be excluded from search index only after stemming #1529
This commit is contained in:
parent
01797faede
commit
ba64f54e4e
@ -381,14 +381,20 @@ class IndexBuilder(object):
|
||||
_filter = self.lang.word_filter
|
||||
|
||||
for word in visitor.found_title_words:
|
||||
word = stem(word)
|
||||
if _filter(word):
|
||||
stemmed_word = stem(word)
|
||||
if _filter(stemmed_word):
|
||||
self._title_mapping.setdefault(stemmed_word, set()).add(filename)
|
||||
elif _filter(word): # stemmer must not remove words from search index
|
||||
self._title_mapping.setdefault(word, set()).add(filename)
|
||||
|
||||
for word in visitor.found_words:
|
||||
word = stem(word)
|
||||
if word not in self._title_mapping and _filter(word):
|
||||
self._mapping.setdefault(word, set()).add(filename)
|
||||
stemmed_word = stem(word)
|
||||
# again, stemmer must not remove words from search index
|
||||
if not _filter(stemmed_word) and _filter(word):
|
||||
stemmed_word = word
|
||||
if stemmed_word not in self._title_mapping and _filter(stemmed_word):
|
||||
self._mapping.setdefault(stemmed_word, set()).add(filename)
|
||||
|
||||
|
||||
def context_for_searchtool(self):
|
||||
return dict(
|
||||
|
0
sphinx/search/test
Normal file
0
sphinx/search/test
Normal file
@ -5,4 +5,10 @@ meta keywords
|
||||
:keywords lang=en: findthiskey, thistoo, notgerman
|
||||
:keywords: thisonetoo
|
||||
:keywords lang=de: onlygerman, onlytoogerman
|
||||
:description: thisnoteither
|
||||
:description: thisnoteither
|
||||
|
||||
Stemmer
|
||||
=======
|
||||
|
||||
zfs
|
||||
findthisstemmedkey
|
@ -58,6 +58,7 @@ def assert_lang_agnostic_key_words(searchindex):
|
||||
assert 'thisnoteith' not in searchindex
|
||||
assert 'thisonetoo' in searchindex
|
||||
|
||||
|
||||
@with_app(testroot='search')
|
||||
def test_meta_keys_are_handled_for_language_en(app, status, warning):
|
||||
os.remove(app.outdir / 'searchindex.js')
|
||||
@ -68,6 +69,7 @@ def test_meta_keys_are_handled_for_language_en(app, status, warning):
|
||||
assert 'onlygerman' not in searchindex
|
||||
assert 'thistoo' in searchindex
|
||||
|
||||
|
||||
@with_app(testroot='search', confoverrides={'html_search_language': 'de'})
|
||||
def test_meta_keys_are_handled_for_language_de(app, status, warning):
|
||||
app.builder.build_all()
|
||||
@ -75,4 +77,17 @@ def test_meta_keys_are_handled_for_language_de(app, status, warning):
|
||||
assert_lang_agnostic_key_words(searchindex)
|
||||
assert 'onlygerman' in searchindex
|
||||
assert 'notgerman' not in searchindex
|
||||
assert 'onlytoogerman' in searchindex
|
||||
assert 'onlytoogerman' in searchindex
|
||||
|
||||
|
||||
@with_app(testroot='search')
|
||||
def test_stemmer_does_not_remove_short_words(app, status, warning):
|
||||
app.builder.build_all()
|
||||
searchindex = (app.outdir / 'searchindex.js').text()
|
||||
assert 'zfs' in searchindex
|
||||
|
||||
|
||||
@with_app(testroot='search')
|
||||
def test_stemmer(app, status, warning):
|
||||
searchindex = (app.outdir / 'searchindex.js').text()
|
||||
assert 'findthisstemmedkei' in searchindex
|
||||
|
Loading…
Reference in New Issue
Block a user