Move stemmer classes to sphinx.util.stemmer

This commit is contained in:
Takeshi KOMIYA 2016-12-16 00:16:24 +09:00
parent 4e50a82675
commit d94e6a331b
4 changed files with 57 additions and 54 deletions

View File

@ -10,13 +10,7 @@
"""
from sphinx.search import SearchLanguage
try:
from Stemmer import Stemmer as PyStemmer
PYSTEMMER = True
except ImportError:
from sphinx.util.stemmer import PorterStemmer
PYSTEMMER = False
from sphinx.util.stemmer import get_stemmer
english_stopwords = set("""
a and are as at
@ -225,25 +219,7 @@ class SearchEnglish(SearchLanguage):
def init(self, options):
# type: (Dict) -> None
if PYSTEMMER:
class Stemmer(object):
def __init__(self):
# type: () -> None
self.stemmer = PyStemmer('porter')
def stem(self, word):
# type: (unicode) -> unicode
return self.stemmer.stemWord(word)
else:
class Stemmer(PorterStemmer):
"""All those porter stemmer implementations look hideous;
make at least the stem method nicer.
"""
def stem(self, word):
# type: (unicode) -> unicode
return PorterStemmer.stem(self, word, 0, len(word) - 1)
self.stemmer = Stemmer()
self.stemmer = get_stemmer()
def stem(self, word):
# type: (unicode) -> unicode

View File

@ -13,13 +13,7 @@ import os
import re
from sphinx.search import SearchLanguage
try:
from Stemmer import Stemmer as PyStemmer
PYSTEMMER = True
except ImportError:
from sphinx.util.stemmer import PorterStemmer
PYSTEMMER = False
from sphinx.util.stemmer import get_stemmer
try:
import jieba
@ -244,25 +238,7 @@ class SearchChinese(SearchLanguage):
if dict_path and os.path.isfile(dict_path):
jieba.set_dictionary(dict_path)
if PYSTEMMER:
class Stemmer(object):
def __init__(self):
# type: () -> None
self.stemmer = PyStemmer('porter')
def stem(self, word):
# type: (unicode) -> unicode
return self.stemmer.stemWord(word)
else:
class Stemmer(PorterStemmer):
"""All those porter stemmer implementations look hideous;
make at least the stem method nicer.
"""
def stem(self, word):
# type: (unicode) -> unicode
return PorterStemmer.stem(self, word, 0, len(word) - 1)
self.stemmer = Stemmer()
self.stemmer = get_stemmer()
def split(self, input):
# type: (unicode) -> List[unicode]

View File

@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
"""
sphinx.util.stemmer
~~~~~~~~~~~~~~~~~~~
Word stemming utilities for Sphinx.
:copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from sphinx.util.stemmer.porter import PorterStemmer
try:
from Stemmer import Stemmer as _PyStemmer
PYSTEMMER = True
except ImportError:
PYSTEMMER = False
class BaseStemmer(object):
def stem(self, word):
# type: (unicode) -> unicode
raise NotImplemented
class PyStemmer(BaseStemmer):
def __init__(self):
# type: () -> None
self.stemmer = _PyStemmer('porter')
def stem(self, word):
# type: (unicode) -> unicode
return self.stemmer.stemWord(word)
class StandardStemmer(BaseStemmer, PorterStemmer):
"""All those porter stemmer implementations look hideous;
make at least the stem method nicer.
"""
def stem(self, word):
# type: (unicode) -> unicode
return PorterStemmer.stem(self, word, 0, len(word) - 1)
def get_stemmer():
# type: () -> BaseStemmer
if PYSTEMMER:
return PyStemmer()
else:
return StandardStemmer()

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
sphinx.util.stemmer
~~~~~~~~~~~~~~~~~~~
sphinx.util.stemmer.porter
~~~~~~~~~~~~~~~~~~~~~~~~~~
Porter Stemming Algorithm