mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Move stemmer classes to sphinx.util.stemmer
This commit is contained in:
parent
4e50a82675
commit
d94e6a331b
@ -10,13 +10,7 @@
|
||||
"""
|
||||
|
||||
from sphinx.search import SearchLanguage
|
||||
|
||||
try:
|
||||
from Stemmer import Stemmer as PyStemmer
|
||||
PYSTEMMER = True
|
||||
except ImportError:
|
||||
from sphinx.util.stemmer import PorterStemmer
|
||||
PYSTEMMER = False
|
||||
from sphinx.util.stemmer import get_stemmer
|
||||
|
||||
english_stopwords = set("""
|
||||
a and are as at
|
||||
@ -225,25 +219,7 @@ class SearchEnglish(SearchLanguage):
|
||||
|
||||
def init(self, options):
|
||||
# type: (Dict) -> None
|
||||
if PYSTEMMER:
|
||||
class Stemmer(object):
|
||||
def __init__(self):
|
||||
# type: () -> None
|
||||
self.stemmer = PyStemmer('porter')
|
||||
|
||||
def stem(self, word):
|
||||
# type: (unicode) -> unicode
|
||||
return self.stemmer.stemWord(word)
|
||||
else:
|
||||
class Stemmer(PorterStemmer):
|
||||
"""All those porter stemmer implementations look hideous;
|
||||
make at least the stem method nicer.
|
||||
"""
|
||||
def stem(self, word):
|
||||
# type: (unicode) -> unicode
|
||||
return PorterStemmer.stem(self, word, 0, len(word) - 1)
|
||||
|
||||
self.stemmer = Stemmer()
|
||||
self.stemmer = get_stemmer()
|
||||
|
||||
def stem(self, word):
|
||||
# type: (unicode) -> unicode
|
||||
|
@ -13,13 +13,7 @@ import os
|
||||
import re
|
||||
|
||||
from sphinx.search import SearchLanguage
|
||||
|
||||
try:
|
||||
from Stemmer import Stemmer as PyStemmer
|
||||
PYSTEMMER = True
|
||||
except ImportError:
|
||||
from sphinx.util.stemmer import PorterStemmer
|
||||
PYSTEMMER = False
|
||||
from sphinx.util.stemmer import get_stemmer
|
||||
|
||||
try:
|
||||
import jieba
|
||||
@ -244,25 +238,7 @@ class SearchChinese(SearchLanguage):
|
||||
if dict_path and os.path.isfile(dict_path):
|
||||
jieba.set_dictionary(dict_path)
|
||||
|
||||
if PYSTEMMER:
|
||||
class Stemmer(object):
|
||||
def __init__(self):
|
||||
# type: () -> None
|
||||
self.stemmer = PyStemmer('porter')
|
||||
|
||||
def stem(self, word):
|
||||
# type: (unicode) -> unicode
|
||||
return self.stemmer.stemWord(word)
|
||||
else:
|
||||
class Stemmer(PorterStemmer):
|
||||
"""All those porter stemmer implementations look hideous;
|
||||
make at least the stem method nicer.
|
||||
"""
|
||||
def stem(self, word):
|
||||
# type: (unicode) -> unicode
|
||||
return PorterStemmer.stem(self, word, 0, len(word) - 1)
|
||||
|
||||
self.stemmer = Stemmer()
|
||||
self.stemmer = get_stemmer()
|
||||
|
||||
def split(self, input):
|
||||
# type: (unicode) -> List[unicode]
|
||||
|
51
sphinx/util/stemmer/__init__.py
Normal file
51
sphinx/util/stemmer/__init__.py
Normal file
@ -0,0 +1,51 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
sphinx.util.stemmer
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Word stemming utilities for Sphinx.
|
||||
|
||||
:copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
|
||||
from sphinx.util.stemmer.porter import PorterStemmer
|
||||
|
||||
try:
|
||||
from Stemmer import Stemmer as _PyStemmer
|
||||
PYSTEMMER = True
|
||||
except ImportError:
|
||||
PYSTEMMER = False
|
||||
|
||||
|
||||
class BaseStemmer(object):
|
||||
def stem(self, word):
|
||||
# type: (unicode) -> unicode
|
||||
raise NotImplemented
|
||||
|
||||
|
||||
class PyStemmer(BaseStemmer):
|
||||
def __init__(self):
|
||||
# type: () -> None
|
||||
self.stemmer = _PyStemmer('porter')
|
||||
|
||||
def stem(self, word):
|
||||
# type: (unicode) -> unicode
|
||||
return self.stemmer.stemWord(word)
|
||||
|
||||
|
||||
class StandardStemmer(BaseStemmer, PorterStemmer):
|
||||
"""All those porter stemmer implementations look hideous;
|
||||
make at least the stem method nicer.
|
||||
"""
|
||||
def stem(self, word):
|
||||
# type: (unicode) -> unicode
|
||||
return PorterStemmer.stem(self, word, 0, len(word) - 1)
|
||||
|
||||
|
||||
def get_stemmer():
|
||||
# type: () -> BaseStemmer
|
||||
if PYSTEMMER:
|
||||
return PyStemmer()
|
||||
else:
|
||||
return StandardStemmer()
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
sphinx.util.stemmer
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
sphinx.util.stemmer.porter
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Porter Stemming Algorithm
|
||||
|
Loading…
Reference in New Issue
Block a user