Fix #3039: English stemmer returns wrong word if the word is capitalized

This commit is contained in:
Takeshi KOMIYA
2016-10-17 21:09:42 +09:00
parent daee1fc51d
commit e04fe845c7
4 changed files with 8 additions and 3 deletions

View File

@@ -54,6 +54,7 @@ Bugs fixed
* #3003: literal blocks in footnotes are not supported by Latex
* #3047: spacing before footnote in pdf output is not coherent and allows breaks
* #3045: HTML search index creator should ignore "raw" content if now html
* #3039: English stemmer returns wrong word if the word is capitalized
Testing
--------

View File

@@ -242,4 +242,4 @@ class SearchEnglish(SearchLanguage):
self.stemmer = Stemmer()
def stem(self, word):
return self.stemmer.stem(word)
return self.stemmer.stem(word.lower())

View File

@@ -15,6 +15,8 @@ findthisstemmedkey
textinheading
International
.. toctree::
tocitem

View File

@@ -103,8 +103,10 @@ def test_stemmer_does_not_remove_short_words(app, status, warning):
@with_app(testroot='search')
def test_stemmer(app, status, warning):
searchindex = (app.outdir / 'searchindex.js').text()
assert 'findthisstemmedkei' in searchindex
searchindex = jsload(app.outdir / 'searchindex.js')
print(searchindex)
assert is_registered_term(searchindex, 'findthisstemmedkei')
assert is_registered_term(searchindex, 'intern')
@with_app(testroot='search')