refactor: Use PEP-526 based variable annotation (sphinx.search)

This commit is contained in:
Takeshi KOMIYA 2021-03-27 00:38:32 +09:00
parent d27bdce2d7
commit 94dc1d78a4
5 changed files with 30 additions and 35 deletions

View File

@ -53,11 +53,11 @@ class SearchLanguage:
This class is used to preprocess search word which Sphinx HTML readers This class is used to preprocess search word which Sphinx HTML readers
type, before searching index. Default implementation does nothing. type, before searching index. Default implementation does nothing.
""" """
lang = None # type: str lang: str = None
language_name = None # type: str language_name: str = None
stopwords = set() # type: Set[str] stopwords: Set[str] = set()
js_splitter_code = None # type: str js_splitter_code: str = None
js_stemmer_rawcode = None # type: str js_stemmer_rawcode: str = None
js_stemmer_code = """ js_stemmer_code = """
/** /**
* Dummy stemmer for languages without stemming rules. * Dummy stemmer for languages without stemming rules.
@ -124,7 +124,7 @@ def parse_stop_word(source: str) -> Set[str]:
* http://snowball.tartarus.org/algorithms/finnish/stop.txt * http://snowball.tartarus.org/algorithms/finnish/stop.txt
""" """
result = set() # type: Set[str] result: Set[str] = set()
for line in source.splitlines(): for line in source.splitlines():
line = line.split('|')[0] # remove comment line = line.split('|')[0] # remove comment
result.update(line.split()) result.update(line.split())
@ -132,7 +132,7 @@ def parse_stop_word(source: str) -> Set[str]:
# maps language name to module.class or directly a class # maps language name to module.class or directly a class
languages = { languages: Dict[str, Any] = {
'da': 'sphinx.search.da.SearchDanish', 'da': 'sphinx.search.da.SearchDanish',
'de': 'sphinx.search.de.SearchGerman', 'de': 'sphinx.search.de.SearchGerman',
'en': SearchEnglish, 'en': SearchEnglish,
@ -150,7 +150,7 @@ languages = {
'sv': 'sphinx.search.sv.SearchSwedish', 'sv': 'sphinx.search.sv.SearchSwedish',
'tr': 'sphinx.search.tr.SearchTurkish', 'tr': 'sphinx.search.tr.SearchTurkish',
'zh': 'sphinx.search.zh.SearchChinese', 'zh': 'sphinx.search.zh.SearchChinese',
} # type: Dict[str, Any] }
class _JavaScriptIndex: class _JavaScriptIndex:
@ -189,8 +189,8 @@ class WordCollector(nodes.NodeVisitor):
def __init__(self, document: nodes.document, lang: SearchLanguage) -> None: def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
super().__init__(document) super().__init__(document)
self.found_words = [] # type: List[str] self.found_words: List[str] = []
self.found_title_words = [] # type: List[str] self.found_title_words: List[str] = []
self.lang = lang self.lang = lang
def is_meta_keywords(self, node: addnodes.meta) -> bool: def is_meta_keywords(self, node: addnodes.meta) -> bool:
@ -238,29 +238,24 @@ class IndexBuilder:
def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str) -> None: def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str) -> None:
self.env = env self.env = env
self._titles = {} # type: Dict[str, str] self._titles: Dict[str, str] = {} # docname -> title
# docname -> title self._filenames: Dict[str, str] = {} # docname -> filename
self._filenames = {} # type: Dict[str, str] self._mapping: Dict[str, Set[str]] = {} # stemmed word -> set(docname)
# docname -> filename # stemmed words in titles -> set(docname)
self._mapping = {} # type: Dict[str, Set[str]] self._title_mapping: Dict[str, Set[str]] = {}
# stemmed word -> set(docname) self._stem_cache: Dict[str, str] = {} # word -> stemmed word
self._title_mapping = {} # type: Dict[str, Set[str]] self._objtypes: Dict[Tuple[str, str], int] = {} # objtype -> index
# stemmed words in titles -> set(docname) # objtype index -> (domain, type, objname (localized))
self._stem_cache = {} # type: Dict[str, str] self._objnames: Dict[int, Tuple[str, str, str]] = {}
# word -> stemmed word # add language-specific SearchLanguage instance
self._objtypes = {} # type: Dict[Tuple[str, str], int] lang_class: Type[SearchLanguage] = languages.get(lang)
# objtype -> index
self._objnames = {} # type: Dict[int, Tuple[str, str, str]]
# objtype index -> (domain, type, objname (localized))
lang_class = languages.get(lang) # type: Type[SearchLanguage]
# add language-specific SearchLanguage instance
# fallback; try again with language-code # fallback; try again with language-code
if lang_class is None and '_' in lang: if lang_class is None and '_' in lang:
lang_class = languages.get(lang.split('_')[0]) lang_class = languages.get(lang.split('_')[0])
if lang_class is None: if lang_class is None:
self.lang = SearchEnglish(options) # type: SearchLanguage self.lang: SearchLanguage = SearchEnglish(options)
elif isinstance(lang_class, str): elif isinstance(lang_class, str):
module, classname = lang_class.rsplit('.', 1) module, classname = lang_class.rsplit('.', 1)
lang_class = getattr(import_module(module), classname) lang_class = getattr(import_module(module), classname)
@ -310,7 +305,7 @@ class IndexBuilder:
def get_objects(self, fn2index: Dict[str, int] def get_objects(self, fn2index: Dict[str, int]
) -> Dict[str, Dict[str, Tuple[int, int, int, str]]]: ) -> Dict[str, Dict[str, Tuple[int, int, int, str]]]:
rv = {} # type: Dict[str, Dict[str, Tuple[int, int, int, str]]] rv: Dict[str, Dict[str, Tuple[int, int, int, str]]] = {}
otypes = self._objtypes otypes = self._objtypes
onames = self._objnames onames = self._objnames
for domainname, domain in sorted(self.env.domains.items()): for domainname, domain in sorted(self.env.domains.items()):
@ -346,7 +341,7 @@ class IndexBuilder:
return rv return rv
def get_terms(self, fn2index: Dict) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]: def get_terms(self, fn2index: Dict) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]:
rvs = {}, {} # type: Tuple[Dict[str, List[str]], Dict[str, List[str]]] rvs: Tuple[Dict[str, List[str]], Dict[str, List[str]]] = ({}, {})
for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)): for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)):
for k, v in mapping.items(): for k, v in mapping.items():
if len(v) == 1: if len(v) == 1:

View File

@ -54,8 +54,8 @@ class BaseSplitter:
class MecabSplitter(BaseSplitter): class MecabSplitter(BaseSplitter):
def __init__(self, options: Dict) -> None: def __init__(self, options: Dict) -> None:
super().__init__(options) super().__init__(options)
self.ctypes_libmecab = None # type: Any self.ctypes_libmecab: Any = None
self.ctypes_mecab = None # type: Any self.ctypes_mecab: Any = None
if not native_module: if not native_module:
self.init_ctypes(options) self.init_ctypes(options)
else: else:

View File

@ -19,7 +19,7 @@ class SearchRomanian(SearchLanguage):
lang = 'ro' lang = 'ro'
language_name = 'Romanian' language_name = 'Romanian'
js_stemmer_rawcode = 'romanian-stemmer.js' js_stemmer_rawcode = 'romanian-stemmer.js'
stopwords = set() # type: Set[str] stopwords: Set[str] = set()
def init(self, options: Dict) -> None: def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('romanian') self.stemmer = snowballstemmer.stemmer('romanian')

View File

@ -19,7 +19,7 @@ class SearchTurkish(SearchLanguage):
lang = 'tr' lang = 'tr'
language_name = 'Turkish' language_name = 'Turkish'
js_stemmer_rawcode = 'turkish-stemmer.js' js_stemmer_rawcode = 'turkish-stemmer.js'
stopwords = set() # type: Set[str] stopwords: Set[str] = set()
def init(self, options: Dict) -> None: def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('turkish') self.stemmer = snowballstemmer.stemmer('turkish')

View File

@ -230,7 +230,7 @@ class SearchChinese(SearchLanguage):
js_stemmer_code = js_porter_stemmer js_stemmer_code = js_porter_stemmer
stopwords = english_stopwords stopwords = english_stopwords
latin1_letters = re.compile(r'[a-zA-Z0-9_]+') latin1_letters = re.compile(r'[a-zA-Z0-9_]+')
latin_terms = [] # type: List[str] latin_terms: List[str] = []
def init(self, options: Dict) -> None: def init(self, options: Dict) -> None:
if JIEBA: if JIEBA:
@ -241,7 +241,7 @@ class SearchChinese(SearchLanguage):
self.stemmer = get_stemmer() self.stemmer = get_stemmer()
def split(self, input: str) -> List[str]: def split(self, input: str) -> List[str]:
chinese = [] # type: List[str] chinese: List[str] = []
if JIEBA: if JIEBA:
chinese = list(jieba.cut_for_search(input)) chinese = list(jieba.cut_for_search(input))