search index: don't stemm words that would be excluded from search index only after stemming #1529

This commit is contained in:
Timotheus Kampik 2016-06-04 17:21:11 +02:00
parent 01797faede
commit ba64f54e4e
4 changed files with 34 additions and 7 deletions

View File

@ -381,14 +381,20 @@ class IndexBuilder(object):
_filter = self.lang.word_filter
for word in visitor.found_title_words:
word = stem(word)
if _filter(word):
stemmed_word = stem(word)
if _filter(stemmed_word):
self._title_mapping.setdefault(stemmed_word, set()).add(filename)
elif _filter(word): # stemmer must not remove words from search index
self._title_mapping.setdefault(word, set()).add(filename)
for word in visitor.found_words:
word = stem(word)
if word not in self._title_mapping and _filter(word):
self._mapping.setdefault(word, set()).add(filename)
stemmed_word = stem(word)
# again, stemmer must not remove words from search index
if not _filter(stemmed_word) and _filter(word):
stemmed_word = word
if stemmed_word not in self._title_mapping and _filter(stemmed_word):
self._mapping.setdefault(stemmed_word, set()).add(filename)
def context_for_searchtool(self):
return dict(

0
sphinx/search/test Normal file
View File

View File

@ -5,4 +5,10 @@ meta keywords
:keywords lang=en: findthiskey, thistoo, notgerman
:keywords: thisonetoo
:keywords lang=de: onlygerman, onlytoogerman
:description: thisnoteither
:description: thisnoteither
Stemmer
=======
zfs
findthisstemmedkey

View File

@ -58,6 +58,7 @@ def assert_lang_agnostic_key_words(searchindex):
assert 'thisnoteith' not in searchindex
assert 'thisonetoo' in searchindex
@with_app(testroot='search')
def test_meta_keys_are_handled_for_language_en(app, status, warning):
os.remove(app.outdir / 'searchindex.js')
@ -68,6 +69,7 @@ def test_meta_keys_are_handled_for_language_en(app, status, warning):
assert 'onlygerman' not in searchindex
assert 'thistoo' in searchindex
@with_app(testroot='search', confoverrides={'html_search_language': 'de'})
def test_meta_keys_are_handled_for_language_de(app, status, warning):
app.builder.build_all()
@ -75,4 +77,17 @@ def test_meta_keys_are_handled_for_language_de(app, status, warning):
assert_lang_agnostic_key_words(searchindex)
assert 'onlygerman' in searchindex
assert 'notgerman' not in searchindex
assert 'onlytoogerman' in searchindex
assert 'onlytoogerman' in searchindex
@with_app(testroot='search')
def test_stemmer_does_not_remove_short_words(app, status, warning):
app.builder.build_all()
searchindex = (app.outdir / 'searchindex.js').text()
assert 'zfs' in searchindex
@with_app(testroot='search')
def test_stemmer(app, status, warning):
searchindex = (app.outdir / 'searchindex.js').text()
assert 'findthisstemmedkei' in searchindex