mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Merge pull request #2621 from TimKam/fix-stemming-removes-short-words-from-search-results
Fix: stemming removes short words from search results
This commit is contained in:
commit
b58b6014c9
@ -386,14 +386,19 @@ class IndexBuilder(object):
|
||||
_filter = self.lang.word_filter
|
||||
|
||||
for word in visitor.found_title_words:
|
||||
word = stem(word)
|
||||
if _filter(word):
|
||||
stemmed_word = stem(word)
|
||||
if _filter(stemmed_word):
|
||||
self._title_mapping.setdefault(stemmed_word, set()).add(docname)
|
||||
elif _filter(word): # stemmer must not remove words from search index
|
||||
self._title_mapping.setdefault(word, set()).add(docname)
|
||||
|
||||
for word in visitor.found_words:
|
||||
word = stem(word)
|
||||
if word not in self._title_mapping and _filter(word):
|
||||
self._mapping.setdefault(word, set()).add(docname)
|
||||
stemmed_word = stem(word)
|
||||
# again, stemmer must not remove words from search index
|
||||
if not _filter(stemmed_word) and _filter(word):
|
||||
stemmed_word = word
|
||||
if stemmed_word not in self._title_mapping and _filter(stemmed_word):
|
||||
self._mapping.setdefault(stemmed_word, set()).add(docname)
|
||||
|
||||
def context_for_searchtool(self):
|
||||
return dict(
|
||||
|
0
sphinx/search/test
Normal file
0
sphinx/search/test
Normal file
@ -159,6 +159,10 @@ var Search = {
|
||||
}
|
||||
// stem the word
|
||||
var word = stemmer.stemWord(tmp[i].toLowerCase());
|
||||
// prevent stemmer from cutting word smaller than two chars
|
||||
if(word.length < 3 && tmp[i].length >= 3) {
|
||||
word = tmp[i];
|
||||
}
|
||||
var toAppend;
|
||||
// select the correct list
|
||||
if (word[0] == '-') {
|
||||
|
@ -5,4 +5,10 @@ meta keywords
|
||||
:keywords lang=en: findthiskey, thistoo, notgerman
|
||||
:keywords: thisonetoo
|
||||
:keywords lang=de: onlygerman, onlytoogerman
|
||||
:description: thisnoteither
|
||||
:description: thisnoteither
|
||||
|
||||
Stemmer
|
||||
=======
|
||||
|
||||
zfs
|
||||
findthisstemmedkey
|
@ -92,3 +92,16 @@ def test_meta_keys_are_handled_for_language_de(app, status, warning):
|
||||
assert is_registered_term(searchindex, 'onlygerman')
|
||||
assert not is_registered_term(searchindex, 'notgerman')
|
||||
assert is_registered_term(searchindex, 'onlytoogerman')
|
||||
|
||||
|
||||
@with_app(testroot='search')
|
||||
def test_stemmer_does_not_remove_short_words(app, status, warning):
|
||||
app.builder.build_all()
|
||||
searchindex = (app.outdir / 'searchindex.js').text()
|
||||
assert 'zfs' in searchindex
|
||||
|
||||
|
||||
@with_app(testroot='search')
|
||||
def test_stemmer(app, status, warning):
|
||||
searchindex = (app.outdir / 'searchindex.js').text()
|
||||
assert 'findthisstemmedkei' in searchindex
|
||||
|
Loading…
Reference in New Issue
Block a user