mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
refactor: Use PEP-526 based variable annotation (sphinx.search)
This commit is contained in:
parent
d27bdce2d7
commit
94dc1d78a4
@ -53,11 +53,11 @@ class SearchLanguage:
|
||||
This class is used to preprocess search word which Sphinx HTML readers
|
||||
type, before searching index. Default implementation does nothing.
|
||||
"""
|
||||
lang = None # type: str
|
||||
language_name = None # type: str
|
||||
stopwords = set() # type: Set[str]
|
||||
js_splitter_code = None # type: str
|
||||
js_stemmer_rawcode = None # type: str
|
||||
lang: str = None
|
||||
language_name: str = None
|
||||
stopwords: Set[str] = set()
|
||||
js_splitter_code: str = None
|
||||
js_stemmer_rawcode: str = None
|
||||
js_stemmer_code = """
|
||||
/**
|
||||
* Dummy stemmer for languages without stemming rules.
|
||||
@ -124,7 +124,7 @@ def parse_stop_word(source: str) -> Set[str]:
|
||||
|
||||
* http://snowball.tartarus.org/algorithms/finnish/stop.txt
|
||||
"""
|
||||
result = set() # type: Set[str]
|
||||
result: Set[str] = set()
|
||||
for line in source.splitlines():
|
||||
line = line.split('|')[0] # remove comment
|
||||
result.update(line.split())
|
||||
@ -132,7 +132,7 @@ def parse_stop_word(source: str) -> Set[str]:
|
||||
|
||||
|
||||
# maps language name to module.class or directly a class
|
||||
languages = {
|
||||
languages: Dict[str, Any] = {
|
||||
'da': 'sphinx.search.da.SearchDanish',
|
||||
'de': 'sphinx.search.de.SearchGerman',
|
||||
'en': SearchEnglish,
|
||||
@ -150,7 +150,7 @@ languages = {
|
||||
'sv': 'sphinx.search.sv.SearchSwedish',
|
||||
'tr': 'sphinx.search.tr.SearchTurkish',
|
||||
'zh': 'sphinx.search.zh.SearchChinese',
|
||||
} # type: Dict[str, Any]
|
||||
}
|
||||
|
||||
|
||||
class _JavaScriptIndex:
|
||||
@ -189,8 +189,8 @@ class WordCollector(nodes.NodeVisitor):
|
||||
|
||||
def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
|
||||
super().__init__(document)
|
||||
self.found_words = [] # type: List[str]
|
||||
self.found_title_words = [] # type: List[str]
|
||||
self.found_words: List[str] = []
|
||||
self.found_title_words: List[str] = []
|
||||
self.lang = lang
|
||||
|
||||
def is_meta_keywords(self, node: addnodes.meta) -> bool:
|
||||
@ -238,29 +238,24 @@ class IndexBuilder:
|
||||
|
||||
def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str) -> None:
|
||||
self.env = env
|
||||
self._titles = {} # type: Dict[str, str]
|
||||
# docname -> title
|
||||
self._filenames = {} # type: Dict[str, str]
|
||||
# docname -> filename
|
||||
self._mapping = {} # type: Dict[str, Set[str]]
|
||||
# stemmed word -> set(docname)
|
||||
self._title_mapping = {} # type: Dict[str, Set[str]]
|
||||
# stemmed words in titles -> set(docname)
|
||||
self._stem_cache = {} # type: Dict[str, str]
|
||||
# word -> stemmed word
|
||||
self._objtypes = {} # type: Dict[Tuple[str, str], int]
|
||||
# objtype -> index
|
||||
self._objnames = {} # type: Dict[int, Tuple[str, str, str]]
|
||||
# objtype index -> (domain, type, objname (localized))
|
||||
lang_class = languages.get(lang) # type: Type[SearchLanguage]
|
||||
# add language-specific SearchLanguage instance
|
||||
self._titles: Dict[str, str] = {} # docname -> title
|
||||
self._filenames: Dict[str, str] = {} # docname -> filename
|
||||
self._mapping: Dict[str, Set[str]] = {} # stemmed word -> set(docname)
|
||||
# stemmed words in titles -> set(docname)
|
||||
self._title_mapping: Dict[str, Set[str]] = {}
|
||||
self._stem_cache: Dict[str, str] = {} # word -> stemmed word
|
||||
self._objtypes: Dict[Tuple[str, str], int] = {} # objtype -> index
|
||||
# objtype index -> (domain, type, objname (localized))
|
||||
self._objnames: Dict[int, Tuple[str, str, str]] = {}
|
||||
# add language-specific SearchLanguage instance
|
||||
lang_class: Type[SearchLanguage] = languages.get(lang)
|
||||
|
||||
# fallback; try again with language-code
|
||||
if lang_class is None and '_' in lang:
|
||||
lang_class = languages.get(lang.split('_')[0])
|
||||
|
||||
if lang_class is None:
|
||||
self.lang = SearchEnglish(options) # type: SearchLanguage
|
||||
self.lang: SearchLanguage = SearchEnglish(options)
|
||||
elif isinstance(lang_class, str):
|
||||
module, classname = lang_class.rsplit('.', 1)
|
||||
lang_class = getattr(import_module(module), classname)
|
||||
@ -310,7 +305,7 @@ class IndexBuilder:
|
||||
|
||||
def get_objects(self, fn2index: Dict[str, int]
|
||||
) -> Dict[str, Dict[str, Tuple[int, int, int, str]]]:
|
||||
rv = {} # type: Dict[str, Dict[str, Tuple[int, int, int, str]]]
|
||||
rv: Dict[str, Dict[str, Tuple[int, int, int, str]]] = {}
|
||||
otypes = self._objtypes
|
||||
onames = self._objnames
|
||||
for domainname, domain in sorted(self.env.domains.items()):
|
||||
@ -346,7 +341,7 @@ class IndexBuilder:
|
||||
return rv
|
||||
|
||||
def get_terms(self, fn2index: Dict) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]:
|
||||
rvs = {}, {} # type: Tuple[Dict[str, List[str]], Dict[str, List[str]]]
|
||||
rvs: Tuple[Dict[str, List[str]], Dict[str, List[str]]] = ({}, {})
|
||||
for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)):
|
||||
for k, v in mapping.items():
|
||||
if len(v) == 1:
|
||||
|
@ -54,8 +54,8 @@ class BaseSplitter:
|
||||
class MecabSplitter(BaseSplitter):
|
||||
def __init__(self, options: Dict) -> None:
|
||||
super().__init__(options)
|
||||
self.ctypes_libmecab = None # type: Any
|
||||
self.ctypes_mecab = None # type: Any
|
||||
self.ctypes_libmecab: Any = None
|
||||
self.ctypes_mecab: Any = None
|
||||
if not native_module:
|
||||
self.init_ctypes(options)
|
||||
else:
|
||||
|
@ -19,7 +19,7 @@ class SearchRomanian(SearchLanguage):
|
||||
lang = 'ro'
|
||||
language_name = 'Romanian'
|
||||
js_stemmer_rawcode = 'romanian-stemmer.js'
|
||||
stopwords = set() # type: Set[str]
|
||||
stopwords: Set[str] = set()
|
||||
|
||||
def init(self, options: Dict) -> None:
|
||||
self.stemmer = snowballstemmer.stemmer('romanian')
|
||||
|
@ -19,7 +19,7 @@ class SearchTurkish(SearchLanguage):
|
||||
lang = 'tr'
|
||||
language_name = 'Turkish'
|
||||
js_stemmer_rawcode = 'turkish-stemmer.js'
|
||||
stopwords = set() # type: Set[str]
|
||||
stopwords: Set[str] = set()
|
||||
|
||||
def init(self, options: Dict) -> None:
|
||||
self.stemmer = snowballstemmer.stemmer('turkish')
|
||||
|
@ -230,7 +230,7 @@ class SearchChinese(SearchLanguage):
|
||||
js_stemmer_code = js_porter_stemmer
|
||||
stopwords = english_stopwords
|
||||
latin1_letters = re.compile(r'[a-zA-Z0-9_]+')
|
||||
latin_terms = [] # type: List[str]
|
||||
latin_terms: List[str] = []
|
||||
|
||||
def init(self, options: Dict) -> None:
|
||||
if JIEBA:
|
||||
@ -241,7 +241,7 @@ class SearchChinese(SearchLanguage):
|
||||
self.stemmer = get_stemmer()
|
||||
|
||||
def split(self, input: str) -> List[str]:
|
||||
chinese = [] # type: List[str]
|
||||
chinese: List[str] = []
|
||||
if JIEBA:
|
||||
chinese = list(jieba.cut_for_search(input))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user