From d94e6a331b33b2fa11a7d8cb0afa1db2794bb8a5 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Fri, 16 Dec 2016 00:16:24 +0900 Subject: [PATCH] Move stemmer classes to sphinx.util.stemmer --- sphinx/search/en.py | 28 +--------- sphinx/search/zh.py | 28 +--------- sphinx/util/stemmer/__init__.py | 51 +++++++++++++++++++ sphinx/util/{stemmer.py => stemmer/porter.py} | 4 +- 4 files changed, 57 insertions(+), 54 deletions(-) create mode 100644 sphinx/util/stemmer/__init__.py rename sphinx/util/{stemmer.py => stemmer/porter.py} (99%) diff --git a/sphinx/search/en.py b/sphinx/search/en.py index 22d4e5acb..c6658ffdc 100644 --- a/sphinx/search/en.py +++ b/sphinx/search/en.py @@ -10,13 +10,7 @@ """ from sphinx.search import SearchLanguage - -try: - from Stemmer import Stemmer as PyStemmer - PYSTEMMER = True -except ImportError: - from sphinx.util.stemmer import PorterStemmer - PYSTEMMER = False +from sphinx.util.stemmer import get_stemmer english_stopwords = set(""" a and are as at @@ -225,25 +219,7 @@ class SearchEnglish(SearchLanguage): def init(self, options): # type: (Dict) -> None - if PYSTEMMER: - class Stemmer(object): - def __init__(self): - # type: () -> None - self.stemmer = PyStemmer('porter') - - def stem(self, word): - # type: (unicode) -> unicode - return self.stemmer.stemWord(word) - else: - class Stemmer(PorterStemmer): - """All those porter stemmer implementations look hideous; - make at least the stem method nicer. - """ - def stem(self, word): - # type: (unicode) -> unicode - return PorterStemmer.stem(self, word, 0, len(word) - 1) - - self.stemmer = Stemmer() + self.stemmer = get_stemmer() def stem(self, word): # type: (unicode) -> unicode diff --git a/sphinx/search/zh.py b/sphinx/search/zh.py index bd4787506..520dd6493 100644 --- a/sphinx/search/zh.py +++ b/sphinx/search/zh.py @@ -13,13 +13,7 @@ import os import re from sphinx.search import SearchLanguage - -try: - from Stemmer import Stemmer as PyStemmer - PYSTEMMER = True -except ImportError: - from sphinx.util.stemmer import PorterStemmer - PYSTEMMER = False +from sphinx.util.stemmer import get_stemmer try: import jieba @@ -244,25 +238,7 @@ class SearchChinese(SearchLanguage): if dict_path and os.path.isfile(dict_path): jieba.set_dictionary(dict_path) - if PYSTEMMER: - class Stemmer(object): - def __init__(self): - # type: () -> None - self.stemmer = PyStemmer('porter') - - def stem(self, word): - # type: (unicode) -> unicode - return self.stemmer.stemWord(word) - else: - class Stemmer(PorterStemmer): - """All those porter stemmer implementations look hideous; - make at least the stem method nicer. - """ - def stem(self, word): - # type: (unicode) -> unicode - return PorterStemmer.stem(self, word, 0, len(word) - 1) - - self.stemmer = Stemmer() + self.stemmer = get_stemmer() def split(self, input): # type: (unicode) -> List[unicode] diff --git a/sphinx/util/stemmer/__init__.py b/sphinx/util/stemmer/__init__.py new file mode 100644 index 000000000..ae9f76f1b --- /dev/null +++ b/sphinx/util/stemmer/__init__.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +""" + sphinx.util.stemmer + ~~~~~~~~~~~~~~~~~~~ + + Word stemming utilities for Sphinx. + + :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from sphinx.util.stemmer.porter import PorterStemmer + +try: + from Stemmer import Stemmer as _PyStemmer + PYSTEMMER = True +except ImportError: + PYSTEMMER = False + + +class BaseStemmer(object): + def stem(self, word): + # type: (unicode) -> unicode + raise NotImplemented + + +class PyStemmer(BaseStemmer): + def __init__(self): + # type: () -> None + self.stemmer = _PyStemmer('porter') + + def stem(self, word): + # type: (unicode) -> unicode + return self.stemmer.stemWord(word) + + +class StandardStemmer(BaseStemmer, PorterStemmer): + """All those porter stemmer implementations look hideous; + make at least the stem method nicer. + """ + def stem(self, word): + # type: (unicode) -> unicode + return PorterStemmer.stem(self, word, 0, len(word) - 1) + + +def get_stemmer(): + # type: () -> BaseStemmer + if PYSTEMMER: + return PyStemmer() + else: + return StandardStemmer() diff --git a/sphinx/util/stemmer.py b/sphinx/util/stemmer/porter.py similarity index 99% rename from sphinx/util/stemmer.py rename to sphinx/util/stemmer/porter.py index 47fc41e87..7cff74b6c 100644 --- a/sphinx/util/stemmer.py +++ b/sphinx/util/stemmer/porter.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ - sphinx.util.stemmer - ~~~~~~~~~~~~~~~~~~~ + sphinx.util.stemmer.porter + ~~~~~~~~~~~~~~~~~~~~~~~~~~ Porter Stemming Algorithm