refactor: Use PEP-526 based variable annotation (sphinx.search)

This commit is contained in:
Takeshi KOMIYA 2021-03-27 00:38:32 +09:00
parent d27bdce2d7
commit 94dc1d78a4
5 changed files with 30 additions and 35 deletions

View File

@ -53,11 +53,11 @@ class SearchLanguage:
This class is used to preprocess search word which Sphinx HTML readers
type, before searching index. Default implementation does nothing.
"""
lang = None # type: str
language_name = None # type: str
stopwords = set() # type: Set[str]
js_splitter_code = None # type: str
js_stemmer_rawcode = None # type: str
lang: str = None
language_name: str = None
stopwords: Set[str] = set()
js_splitter_code: str = None
js_stemmer_rawcode: str = None
js_stemmer_code = """
/**
* Dummy stemmer for languages without stemming rules.
@ -124,7 +124,7 @@ def parse_stop_word(source: str) -> Set[str]:
* http://snowball.tartarus.org/algorithms/finnish/stop.txt
"""
result = set() # type: Set[str]
result: Set[str] = set()
for line in source.splitlines():
line = line.split('|')[0] # remove comment
result.update(line.split())
@ -132,7 +132,7 @@ def parse_stop_word(source: str) -> Set[str]:
# maps language name to module.class or directly a class
languages = {
languages: Dict[str, Any] = {
'da': 'sphinx.search.da.SearchDanish',
'de': 'sphinx.search.de.SearchGerman',
'en': SearchEnglish,
@ -150,7 +150,7 @@ languages = {
'sv': 'sphinx.search.sv.SearchSwedish',
'tr': 'sphinx.search.tr.SearchTurkish',
'zh': 'sphinx.search.zh.SearchChinese',
} # type: Dict[str, Any]
}
class _JavaScriptIndex:
@ -189,8 +189,8 @@ class WordCollector(nodes.NodeVisitor):
def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
super().__init__(document)
self.found_words = [] # type: List[str]
self.found_title_words = [] # type: List[str]
self.found_words: List[str] = []
self.found_title_words: List[str] = []
self.lang = lang
def is_meta_keywords(self, node: addnodes.meta) -> bool:
@ -238,29 +238,24 @@ class IndexBuilder:
def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str) -> None:
self.env = env
self._titles = {} # type: Dict[str, str]
# docname -> title
self._filenames = {} # type: Dict[str, str]
# docname -> filename
self._mapping = {} # type: Dict[str, Set[str]]
# stemmed word -> set(docname)
self._title_mapping = {} # type: Dict[str, Set[str]]
# stemmed words in titles -> set(docname)
self._stem_cache = {} # type: Dict[str, str]
# word -> stemmed word
self._objtypes = {} # type: Dict[Tuple[str, str], int]
# objtype -> index
self._objnames = {} # type: Dict[int, Tuple[str, str, str]]
# objtype index -> (domain, type, objname (localized))
lang_class = languages.get(lang) # type: Type[SearchLanguage]
# add language-specific SearchLanguage instance
self._titles: Dict[str, str] = {} # docname -> title
self._filenames: Dict[str, str] = {} # docname -> filename
self._mapping: Dict[str, Set[str]] = {} # stemmed word -> set(docname)
# stemmed words in titles -> set(docname)
self._title_mapping: Dict[str, Set[str]] = {}
self._stem_cache: Dict[str, str] = {} # word -> stemmed word
self._objtypes: Dict[Tuple[str, str], int] = {} # objtype -> index
# objtype index -> (domain, type, objname (localized))
self._objnames: Dict[int, Tuple[str, str, str]] = {}
# add language-specific SearchLanguage instance
lang_class: Type[SearchLanguage] = languages.get(lang)
# fallback; try again with language-code
if lang_class is None and '_' in lang:
lang_class = languages.get(lang.split('_')[0])
if lang_class is None:
self.lang = SearchEnglish(options) # type: SearchLanguage
self.lang: SearchLanguage = SearchEnglish(options)
elif isinstance(lang_class, str):
module, classname = lang_class.rsplit('.', 1)
lang_class = getattr(import_module(module), classname)
@ -310,7 +305,7 @@ class IndexBuilder:
def get_objects(self, fn2index: Dict[str, int]
) -> Dict[str, Dict[str, Tuple[int, int, int, str]]]:
rv = {} # type: Dict[str, Dict[str, Tuple[int, int, int, str]]]
rv: Dict[str, Dict[str, Tuple[int, int, int, str]]] = {}
otypes = self._objtypes
onames = self._objnames
for domainname, domain in sorted(self.env.domains.items()):
@ -346,7 +341,7 @@ class IndexBuilder:
return rv
def get_terms(self, fn2index: Dict) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]:
rvs = {}, {} # type: Tuple[Dict[str, List[str]], Dict[str, List[str]]]
rvs: Tuple[Dict[str, List[str]], Dict[str, List[str]]] = ({}, {})
for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)):
for k, v in mapping.items():
if len(v) == 1:

View File

@ -54,8 +54,8 @@ class BaseSplitter:
class MecabSplitter(BaseSplitter):
def __init__(self, options: Dict) -> None:
super().__init__(options)
self.ctypes_libmecab = None # type: Any
self.ctypes_mecab = None # type: Any
self.ctypes_libmecab: Any = None
self.ctypes_mecab: Any = None
if not native_module:
self.init_ctypes(options)
else:

View File

@ -19,7 +19,7 @@ class SearchRomanian(SearchLanguage):
lang = 'ro'
language_name = 'Romanian'
js_stemmer_rawcode = 'romanian-stemmer.js'
stopwords = set() # type: Set[str]
stopwords: Set[str] = set()
def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('romanian')

View File

@ -19,7 +19,7 @@ class SearchTurkish(SearchLanguage):
lang = 'tr'
language_name = 'Turkish'
js_stemmer_rawcode = 'turkish-stemmer.js'
stopwords = set() # type: Set[str]
stopwords: Set[str] = set()
def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('turkish')

View File

@ -230,7 +230,7 @@ class SearchChinese(SearchLanguage):
js_stemmer_code = js_porter_stemmer
stopwords = english_stopwords
latin1_letters = re.compile(r'[a-zA-Z0-9_]+')
latin_terms = [] # type: List[str]
latin_terms: List[str] = []
def init(self, options: Dict) -> None:
if JIEBA:
@ -241,7 +241,7 @@ class SearchChinese(SearchLanguage):
self.stemmer = get_stemmer()
def split(self, input: str) -> List[str]:
chinese = [] # type: List[str]
chinese: List[str] = []
if JIEBA:
chinese = list(jieba.cut_for_search(input))