mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
refactor: Use PEP-526 based variable annotation (sphinx.search)
This commit is contained in:
parent
d27bdce2d7
commit
94dc1d78a4
@ -53,11 +53,11 @@ class SearchLanguage:
|
|||||||
This class is used to preprocess search word which Sphinx HTML readers
|
This class is used to preprocess search word which Sphinx HTML readers
|
||||||
type, before searching index. Default implementation does nothing.
|
type, before searching index. Default implementation does nothing.
|
||||||
"""
|
"""
|
||||||
lang = None # type: str
|
lang: str = None
|
||||||
language_name = None # type: str
|
language_name: str = None
|
||||||
stopwords = set() # type: Set[str]
|
stopwords: Set[str] = set()
|
||||||
js_splitter_code = None # type: str
|
js_splitter_code: str = None
|
||||||
js_stemmer_rawcode = None # type: str
|
js_stemmer_rawcode: str = None
|
||||||
js_stemmer_code = """
|
js_stemmer_code = """
|
||||||
/**
|
/**
|
||||||
* Dummy stemmer for languages without stemming rules.
|
* Dummy stemmer for languages without stemming rules.
|
||||||
@ -124,7 +124,7 @@ def parse_stop_word(source: str) -> Set[str]:
|
|||||||
|
|
||||||
* http://snowball.tartarus.org/algorithms/finnish/stop.txt
|
* http://snowball.tartarus.org/algorithms/finnish/stop.txt
|
||||||
"""
|
"""
|
||||||
result = set() # type: Set[str]
|
result: Set[str] = set()
|
||||||
for line in source.splitlines():
|
for line in source.splitlines():
|
||||||
line = line.split('|')[0] # remove comment
|
line = line.split('|')[0] # remove comment
|
||||||
result.update(line.split())
|
result.update(line.split())
|
||||||
@ -132,7 +132,7 @@ def parse_stop_word(source: str) -> Set[str]:
|
|||||||
|
|
||||||
|
|
||||||
# maps language name to module.class or directly a class
|
# maps language name to module.class or directly a class
|
||||||
languages = {
|
languages: Dict[str, Any] = {
|
||||||
'da': 'sphinx.search.da.SearchDanish',
|
'da': 'sphinx.search.da.SearchDanish',
|
||||||
'de': 'sphinx.search.de.SearchGerman',
|
'de': 'sphinx.search.de.SearchGerman',
|
||||||
'en': SearchEnglish,
|
'en': SearchEnglish,
|
||||||
@ -150,7 +150,7 @@ languages = {
|
|||||||
'sv': 'sphinx.search.sv.SearchSwedish',
|
'sv': 'sphinx.search.sv.SearchSwedish',
|
||||||
'tr': 'sphinx.search.tr.SearchTurkish',
|
'tr': 'sphinx.search.tr.SearchTurkish',
|
||||||
'zh': 'sphinx.search.zh.SearchChinese',
|
'zh': 'sphinx.search.zh.SearchChinese',
|
||||||
} # type: Dict[str, Any]
|
}
|
||||||
|
|
||||||
|
|
||||||
class _JavaScriptIndex:
|
class _JavaScriptIndex:
|
||||||
@ -189,8 +189,8 @@ class WordCollector(nodes.NodeVisitor):
|
|||||||
|
|
||||||
def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
|
def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
|
||||||
super().__init__(document)
|
super().__init__(document)
|
||||||
self.found_words = [] # type: List[str]
|
self.found_words: List[str] = []
|
||||||
self.found_title_words = [] # type: List[str]
|
self.found_title_words: List[str] = []
|
||||||
self.lang = lang
|
self.lang = lang
|
||||||
|
|
||||||
def is_meta_keywords(self, node: addnodes.meta) -> bool:
|
def is_meta_keywords(self, node: addnodes.meta) -> bool:
|
||||||
@ -238,29 +238,24 @@ class IndexBuilder:
|
|||||||
|
|
||||||
def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str) -> None:
|
def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str) -> None:
|
||||||
self.env = env
|
self.env = env
|
||||||
self._titles = {} # type: Dict[str, str]
|
self._titles: Dict[str, str] = {} # docname -> title
|
||||||
# docname -> title
|
self._filenames: Dict[str, str] = {} # docname -> filename
|
||||||
self._filenames = {} # type: Dict[str, str]
|
self._mapping: Dict[str, Set[str]] = {} # stemmed word -> set(docname)
|
||||||
# docname -> filename
|
# stemmed words in titles -> set(docname)
|
||||||
self._mapping = {} # type: Dict[str, Set[str]]
|
self._title_mapping: Dict[str, Set[str]] = {}
|
||||||
# stemmed word -> set(docname)
|
self._stem_cache: Dict[str, str] = {} # word -> stemmed word
|
||||||
self._title_mapping = {} # type: Dict[str, Set[str]]
|
self._objtypes: Dict[Tuple[str, str], int] = {} # objtype -> index
|
||||||
# stemmed words in titles -> set(docname)
|
# objtype index -> (domain, type, objname (localized))
|
||||||
self._stem_cache = {} # type: Dict[str, str]
|
self._objnames: Dict[int, Tuple[str, str, str]] = {}
|
||||||
# word -> stemmed word
|
# add language-specific SearchLanguage instance
|
||||||
self._objtypes = {} # type: Dict[Tuple[str, str], int]
|
lang_class: Type[SearchLanguage] = languages.get(lang)
|
||||||
# objtype -> index
|
|
||||||
self._objnames = {} # type: Dict[int, Tuple[str, str, str]]
|
|
||||||
# objtype index -> (domain, type, objname (localized))
|
|
||||||
lang_class = languages.get(lang) # type: Type[SearchLanguage]
|
|
||||||
# add language-specific SearchLanguage instance
|
|
||||||
|
|
||||||
# fallback; try again with language-code
|
# fallback; try again with language-code
|
||||||
if lang_class is None and '_' in lang:
|
if lang_class is None and '_' in lang:
|
||||||
lang_class = languages.get(lang.split('_')[0])
|
lang_class = languages.get(lang.split('_')[0])
|
||||||
|
|
||||||
if lang_class is None:
|
if lang_class is None:
|
||||||
self.lang = SearchEnglish(options) # type: SearchLanguage
|
self.lang: SearchLanguage = SearchEnglish(options)
|
||||||
elif isinstance(lang_class, str):
|
elif isinstance(lang_class, str):
|
||||||
module, classname = lang_class.rsplit('.', 1)
|
module, classname = lang_class.rsplit('.', 1)
|
||||||
lang_class = getattr(import_module(module), classname)
|
lang_class = getattr(import_module(module), classname)
|
||||||
@ -310,7 +305,7 @@ class IndexBuilder:
|
|||||||
|
|
||||||
def get_objects(self, fn2index: Dict[str, int]
|
def get_objects(self, fn2index: Dict[str, int]
|
||||||
) -> Dict[str, Dict[str, Tuple[int, int, int, str]]]:
|
) -> Dict[str, Dict[str, Tuple[int, int, int, str]]]:
|
||||||
rv = {} # type: Dict[str, Dict[str, Tuple[int, int, int, str]]]
|
rv: Dict[str, Dict[str, Tuple[int, int, int, str]]] = {}
|
||||||
otypes = self._objtypes
|
otypes = self._objtypes
|
||||||
onames = self._objnames
|
onames = self._objnames
|
||||||
for domainname, domain in sorted(self.env.domains.items()):
|
for domainname, domain in sorted(self.env.domains.items()):
|
||||||
@ -346,7 +341,7 @@ class IndexBuilder:
|
|||||||
return rv
|
return rv
|
||||||
|
|
||||||
def get_terms(self, fn2index: Dict) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]:
|
def get_terms(self, fn2index: Dict) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]:
|
||||||
rvs = {}, {} # type: Tuple[Dict[str, List[str]], Dict[str, List[str]]]
|
rvs: Tuple[Dict[str, List[str]], Dict[str, List[str]]] = ({}, {})
|
||||||
for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)):
|
for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)):
|
||||||
for k, v in mapping.items():
|
for k, v in mapping.items():
|
||||||
if len(v) == 1:
|
if len(v) == 1:
|
||||||
|
@ -54,8 +54,8 @@ class BaseSplitter:
|
|||||||
class MecabSplitter(BaseSplitter):
|
class MecabSplitter(BaseSplitter):
|
||||||
def __init__(self, options: Dict) -> None:
|
def __init__(self, options: Dict) -> None:
|
||||||
super().__init__(options)
|
super().__init__(options)
|
||||||
self.ctypes_libmecab = None # type: Any
|
self.ctypes_libmecab: Any = None
|
||||||
self.ctypes_mecab = None # type: Any
|
self.ctypes_mecab: Any = None
|
||||||
if not native_module:
|
if not native_module:
|
||||||
self.init_ctypes(options)
|
self.init_ctypes(options)
|
||||||
else:
|
else:
|
||||||
|
@ -19,7 +19,7 @@ class SearchRomanian(SearchLanguage):
|
|||||||
lang = 'ro'
|
lang = 'ro'
|
||||||
language_name = 'Romanian'
|
language_name = 'Romanian'
|
||||||
js_stemmer_rawcode = 'romanian-stemmer.js'
|
js_stemmer_rawcode = 'romanian-stemmer.js'
|
||||||
stopwords = set() # type: Set[str]
|
stopwords: Set[str] = set()
|
||||||
|
|
||||||
def init(self, options: Dict) -> None:
|
def init(self, options: Dict) -> None:
|
||||||
self.stemmer = snowballstemmer.stemmer('romanian')
|
self.stemmer = snowballstemmer.stemmer('romanian')
|
||||||
|
@ -19,7 +19,7 @@ class SearchTurkish(SearchLanguage):
|
|||||||
lang = 'tr'
|
lang = 'tr'
|
||||||
language_name = 'Turkish'
|
language_name = 'Turkish'
|
||||||
js_stemmer_rawcode = 'turkish-stemmer.js'
|
js_stemmer_rawcode = 'turkish-stemmer.js'
|
||||||
stopwords = set() # type: Set[str]
|
stopwords: Set[str] = set()
|
||||||
|
|
||||||
def init(self, options: Dict) -> None:
|
def init(self, options: Dict) -> None:
|
||||||
self.stemmer = snowballstemmer.stemmer('turkish')
|
self.stemmer = snowballstemmer.stemmer('turkish')
|
||||||
|
@ -230,7 +230,7 @@ class SearchChinese(SearchLanguage):
|
|||||||
js_stemmer_code = js_porter_stemmer
|
js_stemmer_code = js_porter_stemmer
|
||||||
stopwords = english_stopwords
|
stopwords = english_stopwords
|
||||||
latin1_letters = re.compile(r'[a-zA-Z0-9_]+')
|
latin1_letters = re.compile(r'[a-zA-Z0-9_]+')
|
||||||
latin_terms = [] # type: List[str]
|
latin_terms: List[str] = []
|
||||||
|
|
||||||
def init(self, options: Dict) -> None:
|
def init(self, options: Dict) -> None:
|
||||||
if JIEBA:
|
if JIEBA:
|
||||||
@ -241,7 +241,7 @@ class SearchChinese(SearchLanguage):
|
|||||||
self.stemmer = get_stemmer()
|
self.stemmer = get_stemmer()
|
||||||
|
|
||||||
def split(self, input: str) -> List[str]:
|
def split(self, input: str) -> List[str]:
|
||||||
chinese = [] # type: List[str]
|
chinese: List[str] = []
|
||||||
if JIEBA:
|
if JIEBA:
|
||||||
chinese = list(jieba.cut_for_search(input))
|
chinese = list(jieba.cut_for_search(input))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user