From 4ec7fdf24b2c51bf6c3b22979b30829f4d3b910a Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Wed, 25 Dec 2019 00:52:47 +0900 Subject: [PATCH] Migrate to py3 style type annotation: sphinx.search --- sphinx/search/__init__.py | 85 ++++++++++++++------------------------- sphinx/search/da.py | 12 ++---- sphinx/search/de.py | 12 ++---- sphinx/search/en.py | 12 ++---- sphinx/search/es.py | 12 ++---- sphinx/search/fi.py | 12 ++---- sphinx/search/fr.py | 12 ++---- sphinx/search/hu.py | 12 ++---- sphinx/search/it.py | 12 ++---- sphinx/search/ja.py | 58 +++++++++----------------- sphinx/search/nl.py | 12 ++---- sphinx/search/no.py | 12 ++---- sphinx/search/pt.py | 12 ++---- sphinx/search/ro.py | 13 +++--- sphinx/search/ru.py | 12 ++---- sphinx/search/sv.py | 13 +++--- sphinx/search/tr.py | 13 +++--- sphinx/search/zh.py | 18 +++------ 18 files changed, 116 insertions(+), 228 deletions(-) diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index 22ba14d40..54a1aad92 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -13,21 +13,21 @@ import re import warnings from importlib import import_module from os import path +from typing import Any, Dict, IO, Iterable, List, Tuple, Set from docutils import nodes +from docutils.nodes import Node from sphinx import addnodes from sphinx import package_dir from sphinx.deprecation import RemovedInSphinx40Warning +from sphinx.environment import BuildEnvironment from sphinx.search.jssplitter import splitter_code from sphinx.util import jsdump, rpartition if False: # For type annotation - from typing import Any, Dict, IO, Iterable, List, Tuple, Set # NOQA from typing import Type # for python3.5.1 - from docutils import nodes # NOQA - from sphinx.environment import BuildEnvironment # NOQA class SearchLanguage: @@ -69,19 +69,16 @@ var Stemmer = function() { _word_re = re.compile(r'(?u)\w+') - def __init__(self, options): - # type: (Dict) -> None + def __init__(self, options: Dict) -> None: self.options = options self.init(options) - def init(self, options): - # type: (Dict) -> None + def init(self, options: Dict) -> None: """ Initialize the class with the options the user has given. """ - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: """ This method splits a sentence into words. Default splitter splits input at white spaces, which should be enough for most languages except CJK @@ -89,8 +86,7 @@ var Stemmer = function() { """ return self._word_re.findall(input) - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: """ This method implements stemming algorithm of the Python version. @@ -103,8 +99,7 @@ var Stemmer = function() { """ return word - def word_filter(self, word): - # type: (str) -> bool + def word_filter(self, word: str) -> bool: """ Return true if the target word should be registered in the search index. This method is called after stemming. @@ -121,8 +116,7 @@ var Stemmer = function() { from sphinx.search.en import SearchEnglish -def parse_stop_word(source): - # type: (str) -> Set[str] +def parse_stop_word(source: str) -> Set[str]: """ parse snowball style word list like this: @@ -166,24 +160,20 @@ class _JavaScriptIndex: PREFIX = 'Search.setIndex(' SUFFIX = ')' - def dumps(self, data): - # type: (Any) -> str + def dumps(self, data: Any) -> str: return self.PREFIX + jsdump.dumps(data) + self.SUFFIX - def loads(self, s): - # type: (str) -> Any + def loads(self, s: str) -> Any: data = s[len(self.PREFIX):-len(self.SUFFIX)] if not data or not s.startswith(self.PREFIX) or not \ s.endswith(self.SUFFIX): raise ValueError('invalid data') return jsdump.loads(data) - def dump(self, data, f): - # type: (Any, IO) -> None + def dump(self, data: Any, f: IO) -> None: f.write(self.dumps(data)) - def load(self, f): - # type: (IO) -> Any + def load(self, f: IO) -> Any: return self.loads(f.read()) @@ -195,15 +185,13 @@ class WordCollector(nodes.NodeVisitor): A special visitor that collects words for the `IndexBuilder`. """ - def __init__(self, document, lang): - # type: (nodes.document, SearchLanguage) -> None + def __init__(self, document: nodes.document, lang: SearchLanguage) -> None: super().__init__(document) self.found_words = [] # type: List[str] self.found_title_words = [] # type: List[str] self.lang = lang - def is_meta_keywords(self, node, nodetype=None): - # type: (addnodes.meta, Any) -> bool + def is_meta_keywords(self, node: addnodes.meta, nodetype: Any = None) -> bool: if nodetype is not None: warnings.warn('"nodetype" argument for WordCollector.is_meta_keywords() ' 'is deprecated.', RemovedInSphinx40Warning) @@ -217,8 +205,7 @@ class WordCollector(nodes.NodeVisitor): return False - def dispatch_visit(self, node): - # type: (nodes.Node) -> None + def dispatch_visit(self, node: Node) -> None: if isinstance(node, nodes.comment): raise nodes.SkipNode elif isinstance(node, nodes.raw): @@ -251,8 +238,7 @@ class IndexBuilder: 'pickle': pickle } - def __init__(self, env, lang, options, scoring): - # type: (BuildEnvironment, str, Dict, str) -> None + def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str) -> None: self.env = env self._titles = {} # type: Dict[str, str] # docname -> title @@ -292,8 +278,7 @@ class IndexBuilder: self.js_scorer_code = '' self.js_splitter_code = splitter_code - def load(self, stream, format): - # type: (IO, Any) -> None + def load(self, stream: IO, format: Any) -> None: """Reconstruct from frozen data.""" if isinstance(format, str): format = self.formats[format] @@ -306,8 +291,7 @@ class IndexBuilder: self._filenames = dict(zip(index2fn, frozen['filenames'])) self._titles = dict(zip(index2fn, frozen['titles'])) - def load_terms(mapping): - # type: (Dict[str, Any]) -> Dict[str, Set[str]] + def load_terms(mapping: Dict[str, Any]) -> Dict[str, Set[str]]: rv = {} for k, v in mapping.items(): if isinstance(v, int): @@ -320,15 +304,14 @@ class IndexBuilder: self._title_mapping = load_terms(frozen['titleterms']) # no need to load keywords/objtypes - def dump(self, stream, format): - # type: (IO, Any) -> None + def dump(self, stream: IO, format: Any) -> None: """Dump the frozen index to a stream.""" if isinstance(format, str): format = self.formats[format] format.dump(self.freeze(), stream) - def get_objects(self, fn2index): - # type: (Dict[str, int]) -> Dict[str, Dict[str, Tuple[int, int, int, str]]] + def get_objects(self, fn2index: Dict[str, int] + ) -> Dict[str, Dict[str, Tuple[int, int, int, str]]]: rv = {} # type: Dict[str, Dict[str, Tuple[int, int, int, str]]] otypes = self._objtypes onames = self._objnames @@ -364,8 +347,7 @@ class IndexBuilder: pdict[name] = (fn2index[docname], typeindex, prio, shortanchor) return rv - def get_terms(self, fn2index): - # type: (Dict) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]] + def get_terms(self, fn2index: Dict) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]: rvs = {}, {} # type: Tuple[Dict[str, List[str]], Dict[str, List[str]]] for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)): for k, v in mapping.items(): @@ -377,8 +359,7 @@ class IndexBuilder: rv[k] = sorted([fn2index[fn] for fn in v if fn in fn2index]) return rvs - def freeze(self): - # type: () -> Dict[str, Any] + def freeze(self) -> Dict[str, Any]: """Create a usable data structure for serializing.""" docnames, titles = zip(*sorted(self._titles.items())) filenames = [self._filenames.get(docname) for docname in docnames] @@ -392,12 +373,10 @@ class IndexBuilder: objects=objects, objtypes=objtypes, objnames=objnames, titleterms=title_terms, envversion=self.env.version) - def label(self): - # type: () -> str + def label(self) -> str: return "%s (code: %s)" % (self.lang.language_name, self.lang.lang) - def prune(self, docnames): - # type: (Iterable[str]) -> None + def prune(self, docnames: Iterable[str]) -> None: """Remove data for all docnames not in the list.""" new_titles = {} new_filenames = {} @@ -412,8 +391,7 @@ class IndexBuilder: for wordnames in self._title_mapping.values(): wordnames.intersection_update(docnames) - def feed(self, docname, filename, title, doctree): - # type: (str, str, str, nodes.document) -> None + def feed(self, docname: str, filename: str, title: str, doctree: nodes.document) -> None: """Feed a doctree to the index.""" self._titles[docname] = title self._filenames[docname] = filename @@ -422,8 +400,7 @@ class IndexBuilder: doctree.walk(visitor) # memoize self.lang.stem - def stem(word): - # type: (str) -> str + def stem(word: str) -> str: try: return self._stem_cache[word] except KeyError: @@ -447,8 +424,7 @@ class IndexBuilder: if _filter(stemmed_word) and not already_indexed: self._mapping.setdefault(stemmed_word, set()).add(docname) - def context_for_searchtool(self): - # type: () -> Dict[str, Any] + def context_for_searchtool(self) -> Dict[str, Any]: return { 'search_language_stemming_code': self.lang.js_stemmer_code, 'search_language_stop_words': jsdump.dumps(sorted(self.lang.stopwords)), @@ -456,8 +432,7 @@ class IndexBuilder: 'search_word_splitter_code': self.js_splitter_code, } - def get_js_stemmer_rawcode(self): - # type: () -> str + def get_js_stemmer_rawcode(self) -> str: if self.lang.js_stemmer_rawcode: return path.join(package_dir, 'search', 'non-minified-js', self.lang.js_stemmer_rawcode) diff --git a/sphinx/search/da.py b/sphinx/search/da.py index 228fdf086..b04679e10 100644 --- a/sphinx/search/da.py +++ b/sphinx/search/da.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any # NOQA +from sphinx.search import SearchLanguage, parse_stop_word danish_stopwords = parse_stop_word(''' @@ -128,10 +126,8 @@ class SearchDanish(SearchLanguage): js_stemmer_code = js_stemmer stopwords = danish_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('danish') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/de.py b/sphinx/search/de.py index 5413e0732..ae1827bf9 100644 --- a/sphinx/search/de.py +++ b/sphinx/search/de.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any # NOQA +from sphinx.search import SearchLanguage, parse_stop_word german_stopwords = parse_stop_word(''' @@ -311,10 +309,8 @@ class SearchGerman(SearchLanguage): js_stemmer_code = js_stemmer stopwords = german_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('german') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/en.py b/sphinx/search/en.py index fa8e1f66b..1fabef78d 100644 --- a/sphinx/search/en.py +++ b/sphinx/search/en.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ +from typing import Dict + from sphinx.search import SearchLanguage from sphinx.util.stemmer import get_stemmer -if False: - # For type annotation - from typing import Dict # NOQA - english_stopwords = set(""" a and are as at be but by @@ -220,10 +218,8 @@ class SearchEnglish(SearchLanguage): js_stemmer_code = js_porter_stemmer stopwords = english_stopwords - def init(self, options): - # type: (Dict) -> None + def init(self, options: Dict) -> None: self.stemmer = get_stemmer() - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stem(word.lower()) diff --git a/sphinx/search/es.py b/sphinx/search/es.py index c6f0dae9c..1009961c8 100644 --- a/sphinx/search/es.py +++ b/sphinx/search/es.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any # NOQA +from sphinx.search import SearchLanguage, parse_stop_word spanish_stopwords = parse_stop_word(''' @@ -371,10 +369,8 @@ class SearchSpanish(SearchLanguage): js_stemmer_code = js_stemmer stopwords = spanish_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('spanish') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/fi.py b/sphinx/search/fi.py index b8ff1d1f8..67bee89fe 100644 --- a/sphinx/search/fi.py +++ b/sphinx/search/fi.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any # NOQA +from sphinx.search import SearchLanguage, parse_stop_word finnish_stopwords = parse_stop_word(''' @@ -121,10 +119,8 @@ class SearchFinnish(SearchLanguage): js_stemmer_code = js_stemmer stopwords = finnish_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('finnish') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/fr.py b/sphinx/search/fr.py index 0848843f3..b15271888 100644 --- a/sphinx/search/fr.py +++ b/sphinx/search/fr.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any # NOQA +from sphinx.search import SearchLanguage, parse_stop_word french_stopwords = parse_stop_word(''' @@ -207,10 +205,8 @@ class SearchFrench(SearchLanguage): js_stemmer_code = js_stemmer stopwords = french_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('french') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/hu.py b/sphinx/search/hu.py index 973475cb3..085773383 100644 --- a/sphinx/search/hu.py +++ b/sphinx/search/hu.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any # NOQA +from sphinx.search import SearchLanguage, parse_stop_word hungarian_stopwords = parse_stop_word(''' @@ -235,10 +233,8 @@ class SearchHungarian(SearchLanguage): js_stemmer_code = js_stemmer stopwords = hungarian_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('hungarian') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/it.py b/sphinx/search/it.py index 41039818b..e76cd99dd 100644 --- a/sphinx/search/it.py +++ b/sphinx/search/it.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any # NOQA +from sphinx.search import SearchLanguage, parse_stop_word italian_stopwords = parse_stop_word(''' @@ -324,10 +322,8 @@ class SearchItalian(SearchLanguage): js_stemmer_code = js_stemmer stopwords = italian_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('italian') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/ja.py b/sphinx/search/ja.py index 0c11af74d..a7b9f9684 100644 --- a/sphinx/search/ja.py +++ b/sphinx/search/ja.py @@ -20,6 +20,7 @@ import os import re import sys import warnings +from typing import Any, Dict, List try: import MeCab @@ -38,21 +39,13 @@ from sphinx.errors import SphinxError, ExtensionError from sphinx.search import SearchLanguage from sphinx.util import import_object -if False: - # For type annotation - from typing import Any, Dict, List # NOQA - class BaseSplitter: - - def __init__(self, options): - # type: (Dict) -> None + def __init__(self, options: Dict) -> None: self.options = options - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: """ - :param str input: :return: :rtype: list[str] @@ -61,8 +54,7 @@ class BaseSplitter: class MecabSplitter(BaseSplitter): - def __init__(self, options): - # type: (Dict) -> None + def __init__(self, options: Dict) -> None: super().__init__(options) self.ctypes_libmecab = None # type: Any self.ctypes_mecab = None # type: Any @@ -72,8 +64,7 @@ class MecabSplitter(BaseSplitter): self.init_native(options) self.dict_encode = options.get('dic_enc', 'utf-8') - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: if native_module: result = self.native.parse(input) else: @@ -81,16 +72,14 @@ class MecabSplitter(BaseSplitter): self.ctypes_mecab, input.encode(self.dict_encode)) return result.split(' ') - def init_native(self, options): - # type: (Dict) -> None + def init_native(self, options: Dict) -> None: param = '-Owakati' dict = options.get('dict') if dict: param += ' -d %s' % dict self.native = MeCab.Tagger(param) - def init_ctypes(self, options): - # type: (Dict) -> None + def init_ctypes(self, options: Dict) -> None: import ctypes.util lib = options.get('lib') @@ -126,8 +115,7 @@ class MecabSplitter(BaseSplitter): if self.ctypes_mecab is None: raise SphinxError('mecab initialization failed') - def __del__(self): - # type: () -> None + def __del__(self) -> None: if self.ctypes_libmecab: self.ctypes_libmecab.mecab_destroy(self.ctypes_mecab) @@ -135,21 +123,18 @@ MeCabBinder = MecabSplitter # keep backward compatibility until Sphinx-1.6 class JanomeSplitter(BaseSplitter): - def __init__(self, options): - # type: (Dict) -> None + def __init__(self, options: Dict) -> None: super().__init__(options) self.user_dict = options.get('user_dic') self.user_dict_enc = options.get('user_dic_enc', 'utf8') self.init_tokenizer() - def init_tokenizer(self): - # type: () -> None + def init_tokenizer(self) -> None: if not janome_module: raise RuntimeError('Janome is not available') self.tokenizer = janome.tokenizer.Tokenizer(udic=self.user_dict, udic_enc=self.user_dict_enc) - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: result = ' '.join(token.surface for token in self.tokenizer.tokenize(input)) return result.split(' ') @@ -425,23 +410,20 @@ class DefaultSplitter(BaseSplitter): '郎': 1082, '1': -270, 'E1': 306, 'ル': -673, 'ン': -496} # ctype_ - def ctype_(self, char): - # type: (str) -> str + def ctype_(self, char: str) -> str: for pattern, value in self.patterns_.items(): if pattern.match(char): return value return 'O' # ts_ - def ts_(self, dict, key): - # type: (Dict[str, int], str) -> int + def ts_(self, dict: Dict[str, int], key: str) -> int: if key in dict: return dict[key] return 0 # segment - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: if not input: return [] @@ -549,8 +531,7 @@ class SearchJapanese(SearchLanguage): 'janome': 'sphinx.search.ja.JanomeSplitter', } - def init(self, options): - # type: (Dict) -> None + def init(self, options: Dict) -> None: type = options.get('type', 'sphinx.search.ja.DefaultSplitter') if type in self.splitters: dotted_path = self.splitters[type] @@ -565,14 +546,11 @@ class SearchJapanese(SearchLanguage): raise ExtensionError("Splitter module %r can't be imported" % dotted_path) - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: return self.splitter.split(input) - def word_filter(self, stemmed_word): - # type: (str) -> bool + def word_filter(self, stemmed_word: str) -> bool: return len(stemmed_word) > 1 - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return word diff --git a/sphinx/search/nl.py b/sphinx/search/nl.py index 076c190b2..0e2e2ef23 100644 --- a/sphinx/search/nl.py +++ b/sphinx/search/nl.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any # NOQA +from sphinx.search import SearchLanguage, parse_stop_word dutch_stopwords = parse_stop_word(''' @@ -135,10 +133,8 @@ class SearchDutch(SearchLanguage): js_stemmer_code = js_stemmer stopwords = dutch_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('dutch') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/no.py b/sphinx/search/no.py index 106c6b670..68c1ac207 100644 --- a/sphinx/search/no.py +++ b/sphinx/search/no.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any # NOQA +from sphinx.search import SearchLanguage, parse_stop_word norwegian_stopwords = parse_stop_word(''' @@ -210,10 +208,8 @@ class SearchNorwegian(SearchLanguage): js_stemmer_code = js_stemmer stopwords = norwegian_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('norwegian') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/pt.py b/sphinx/search/pt.py index 143759387..2538511f7 100644 --- a/sphinx/search/pt.py +++ b/sphinx/search/pt.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any # NOQA +from sphinx.search import SearchLanguage, parse_stop_word portuguese_stopwords = parse_stop_word(''' @@ -270,10 +268,8 @@ class SearchPortuguese(SearchLanguage): js_stemmer_code = js_stemmer stopwords = portuguese_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('portuguese') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/ro.py b/sphinx/search/ro.py index e385d6f01..cfae772c9 100644 --- a/sphinx/search/ro.py +++ b/sphinx/search/ro.py @@ -8,13 +8,12 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage +from typing import Dict, Set import snowballstemmer -if False: - # For type annotation - from typing import Dict, Set # NOQA +from sphinx.search import SearchLanguage + js_stemmer = """ var JSX={};(function(j){function l(b,e){var a=function(){};a.prototype=e.prototype;var c=new a;for(var d in b){b[d].prototype=c}}function L(c,b){for(var a in b.prototype)if(b.prototype.hasOwnProperty(a))c.prototype[a]=b.prototype[a]}function h(a,b,d){function c(a,b,c){delete a[b];a[b]=c;return c}Object.defineProperty(a,b,{get:function(){return c(a,b,d())},set:function(d){c(a,b,d)},enumerable:true,configurable:true})}function M(a,b,c){return a[b]=a[b]/c|0}var E=parseInt;var C=parseFloat;function N(a){return a!==a}var A=isFinite;var z=encodeURIComponent;var y=decodeURIComponent;var x=encodeURI;var w=decodeURI;var u=Object.prototype.toString;var D=Object.prototype.hasOwnProperty;function k(){}j.require=function(b){var a=r[b];return a!==undefined?a:null};j.profilerIsRunning=function(){return k.getResults!=null};j.getProfileResults=function(){return(k.getResults||function(){return{}})()};j.postProfileResults=function(a,b){if(k.postResults==null)throw new Error('profiler has not been turned on');return k.postResults(a,b)};j.resetProfileResults=function(){if(k.resetResults==null)throw new Error('profiler has not been turned on');return k.resetResults()};j.DEBUG=false;function t(){};l([t],Error);function a(a,b,c){this.F=a.length;this.K=a;this.L=b;this.I=c;this.H=null;this.P=null};l([a],Object);function n(){};l([n],Object);function g(){var a;var b;var c;this.G={};a=this.E='';b=this._=0;c=this.A=a.length;this.D=0;this.B=b;this.C=c};l([g],n);function v(a,b){a.E=b.E;a._=b._;a.A=b.A;a.D=b.D;a.B=b.B;a.C=b.C};function d(b,d,c,e){var a;if(b._>=b.A){return false}a=b.E.charCodeAt(b._);if(a>e||a>>3]&1<<(a&7))===0){return false}b._++;return true};function e(a,d,c,e){var b;if(a._>=a.A){return false}b=a.E.charCodeAt(a._);if(b>e||b>>3]&1<<(b&7))===0){a._++;return true}return false};function p(a,d,c,e){var b;if(a._<=a.D){return false}b=a.E.charCodeAt(a._-1);if(b>e||b>>3]&1<<(b&7))===0){a._--;return true}return false};function m(a,b,d){var c;if(a.A-a._>>1);h=0;c=g0){break}if(d===b){break}if(l){break}l=true}}while(true){a=m[b];if(g>=a.F){f._=e+a.F|0;if(a.H==null){return a.I}o=a.H(a.P);f._=e+a.F|0;if(o){return a.I}}b=a.L;if(b<0){return 0}}return-1};function f(d,m,p){var b;var g;var e;var n;var f;var k;var l;var i;var h;var c;var a;var j;var o;b=0;g=p;e=d._;n=d.D;f=0;k=0;l=false;while(true){i=b+(g-b>>1);h=0;c=f=0;j--){if(e-c===n){h=-1;break}h=d.E.charCodeAt(e-1-c)-a.K.charCodeAt(j);if(h!==0){break}c++}if(h<0){g=i;k=c}else{b=i;f=c}if(g-b<=1){if(b>0){break}if(g===b){break}if(l){break}l=true}}while(true){a=m[b];if(f>=a.F){d._=e-a.F|0;if(a.H==null){return a.I}o=a.H(d);d._=e-a.F|0;if(o){return a.I}}b=a.L;if(b<0){return 0}}return-1};function s(a,b,d,e){var c;c=e.length-(d-b);a.E=a.E.slice(0,b)+e+a.E.slice(d);a.A+=c|0;if(a._>=d){a._+=c|0}else if(a._>b){a._=b}return c|0};function c(a,f){var b;var c;var d;var e;b=false;if((c=a.B)<0||c>(d=a.C)||d>(e=a.A)||e>a.E.length?false:true){s(a,a.B,a.C,f);b=true}return b};g.prototype.J=function(){return false};g.prototype.b=function(b){var a;var c;var d;var e;a=this.G['.'+b];if(a==null){c=this.E=b;d=this._=0;e=this.A=c.length;this.D=0;this.B=d;this.C=e;this.J();a=this.E;this.G['.'+b]=a}return a};g.prototype.stemWord=g.prototype.b;g.prototype.c=function(e){var d;var b;var c;var a;var f;var g;var h;d=[];for(b=0;b=this.A){break d}this._++}continue b}this._=i;break b}return true};b.prototype.r_prelude=b.prototype.W;function G(a){var j;var e;var k;var f;var g;var h;var i;var l;b:while(true){j=a._;f=true;d:while(f===true){f=false;e:while(true){e=a._;g=true;a:while(g===true){g=false;if(!d(a,b.g_v,97,259)){break a}a.B=a._;h=true;f:while(h===true){h=false;k=a._;i=true;c:while(i===true){i=false;if(!m(a,1,'u')){break c}a.C=a._;if(!d(a,b.g_v,97,259)){break c}if(!c(a,'U')){return false}break f}a._=k;if(!m(a,1,'i')){break a}a.C=a._;if(!d(a,b.g_v,97,259)){break a}if(!c(a,'I')){return false}}a._=e;break e}l=a._=e;if(l>=a.A){break d}a._++}continue b}a._=j;break b}return true};b.prototype.U=function(){var u;var w;var x;var y;var t;var l;var f;var g;var h;var i;var c;var j;var k;var a;var m;var n;var o;var p;var q;var r;var s;var v;this.I_pV=s=this.A;this.I_p1=s;this.I_p2=s;u=this._;l=true;a:while(l===true){l=false;f=true;g:while(f===true){f=false;w=this._;g=true;b:while(g===true){g=false;if(!d(this,b.g_v,97,259)){break b}h=true;f:while(h===true){h=false;x=this._;i=true;c:while(i===true){i=false;if(!e(this,b.g_v,97,259)){break c}d:while(true){c=true;e:while(c===true){c=false;if(!d(this,b.g_v,97,259)){break e}break d}if(this._>=this.A){break c}this._++}break f}this._=x;if(!d(this,b.g_v,97,259)){break b}c:while(true){j=true;d:while(j===true){j=false;if(!e(this,b.g_v,97,259)){break d}break c}if(this._>=this.A){break b}this._++}}break g}this._=w;if(!e(this,b.g_v,97,259)){break a}k=true;c:while(k===true){k=false;y=this._;a=true;b:while(a===true){a=false;if(!e(this,b.g_v,97,259)){break b}e:while(true){m=true;d:while(m===true){m=false;if(!d(this,b.g_v,97,259)){break d}break e}if(this._>=this.A){break b}this._++}break c}this._=y;if(!d(this,b.g_v,97,259)){break a}if(this._>=this.A){break a}this._++}}this.I_pV=this._}v=this._=u;t=v;n=true;a:while(n===true){n=false;b:while(true){o=true;c:while(o===true){o=false;if(!d(this,b.g_v,97,259)){break c}break b}if(this._>=this.A){break a}this._++}b:while(true){p=true;c:while(p===true){p=false;if(!e(this,b.g_v,97,259)){break c}break b}if(this._>=this.A){break a}this._++}this.I_p1=this._;b:while(true){q=true;c:while(q===true){q=false;if(!d(this,b.g_v,97,259)){break c}break b}if(this._>=this.A){break a}this._++}c:while(true){r=true;b:while(r===true){r=false;if(!e(this,b.g_v,97,259)){break b}break c}if(this._>=this.A){break a}this._++}this.I_p2=this._}this._=t;return true};b.prototype.r_mark_regions=b.prototype.U;function H(a){var x;var y;var z;var u;var v;var l;var f;var g;var h;var i;var j;var k;var c;var m;var n;var o;var p;var q;var r;var s;var t;var w;a.I_pV=t=a.A;a.I_p1=t;a.I_p2=t;x=a._;l=true;a:while(l===true){l=false;f=true;g:while(f===true){f=false;y=a._;g=true;b:while(g===true){g=false;if(!d(a,b.g_v,97,259)){break b}h=true;f:while(h===true){h=false;z=a._;i=true;c:while(i===true){i=false;if(!e(a,b.g_v,97,259)){break c}d:while(true){j=true;e:while(j===true){j=false;if(!d(a,b.g_v,97,259)){break e}break d}if(a._>=a.A){break c}a._++}break f}a._=z;if(!d(a,b.g_v,97,259)){break b}c:while(true){k=true;d:while(k===true){k=false;if(!e(a,b.g_v,97,259)){break d}break c}if(a._>=a.A){break b}a._++}}break g}a._=y;if(!e(a,b.g_v,97,259)){break a}c=true;c:while(c===true){c=false;u=a._;m=true;b:while(m===true){m=false;if(!e(a,b.g_v,97,259)){break b}e:while(true){n=true;d:while(n===true){n=false;if(!d(a,b.g_v,97,259)){break d}break e}if(a._>=a.A){break b}a._++}break c}a._=u;if(!d(a,b.g_v,97,259)){break a}if(a._>=a.A){break a}a._++}}a.I_pV=a._}w=a._=x;v=w;o=true;a:while(o===true){o=false;b:while(true){p=true;c:while(p===true){p=false;if(!d(a,b.g_v,97,259)){break c}break b}if(a._>=a.A){break a}a._++}b:while(true){q=true;c:while(q===true){q=false;if(!e(a,b.g_v,97,259)){break c}break b}if(a._>=a.A){break a}a._++}a.I_p1=a._;b:while(true){r=true;c:while(r===true){r=false;if(!d(a,b.g_v,97,259)){break c}break b}if(a._>=a.A){break a}a._++}c:while(true){s=true;b:while(s===true){s=false;if(!e(a,b.g_v,97,259)){break b}break c}if(a._>=a.A){break a}a._++}a.I_p2=a._}a._=v;return true};b.prototype.V=function(){var a;var e;var d;b:while(true){e=this._;d=true;a:while(d===true){d=false;this.B=this._;a=q(this,b.a_0,3);if(a===0){break a}this.C=this._;switch(a){case 0:break a;case 1:if(!c(this,'i')){return false}break;case 2:if(!c(this,'u')){return false}break;case 3:if(this._>=this.A){break a}this._++;break}continue b}this._=e;break b}return true};b.prototype.r_postlude=b.prototype.V;function I(a){var d;var f;var e;b:while(true){f=a._;e=true;a:while(e===true){e=false;a.B=a._;d=q(a,b.a_0,3);if(d===0){break a}a.C=a._;switch(d){case 0:break a;case 1:if(!c(a,'i')){return false}break;case 2:if(!c(a,'u')){return false}break;case 3:if(a._>=a.A){break a}a._++;break}continue b}a._=f;break b}return true};b.prototype.S=function(){return!(this.I_pV<=this._)?false:true};b.prototype.r_RV=b.prototype.S;b.prototype.Q=function(){return!(this.I_p1<=this._)?false:true};b.prototype.r_R1=b.prototype.Q;b.prototype.R=function(){return!(this.I_p2<=this._)?false:true};b.prototype.r_R2=b.prototype.R;b.prototype.Y=function(){var a;var e;var d;var g;this.C=this._;a=f(this,b.a_1,16);if(a===0){return false}this.B=g=this._;if(!(!(this.I_p1<=g)?false:true)){return false}switch(a){case 0:return false;case 1:if(!c(this,'')){return false}break;case 2:if(!c(this,'a')){return false}break;case 3:if(!c(this,'e')){return false}break;case 4:if(!c(this,'i')){return false}break;case 5:e=this.A-this._;d=true;a:while(d===true){d=false;if(!i(this,2,'ab')){break a}return false}this._=this.A-e;if(!c(this,'i')){return false}break;case 6:if(!c(this,'at')){return false}break;case 7:if(!c(this,'aţi')){return false}break}return true};b.prototype.r_step_0=b.prototype.Y;function J(a){var d;var g;var e;var h;a.C=a._;d=f(a,b.a_1,16);if(d===0){return false}a.B=h=a._;if(!(!(a.I_p1<=h)?false:true)){return false}switch(d){case 0:return false;case 1:if(!c(a,'')){return false}break;case 2:if(!c(a,'a')){return false}break;case 3:if(!c(a,'e')){return false}break;case 4:if(!c(a,'i')){return false}break;case 5:g=a.A-a._;e=true;a:while(e===true){e=false;if(!i(a,2,'ab')){break a}return false}a._=a.A-g;if(!c(a,'i')){return false}break;case 6:if(!c(a,'at')){return false}break;case 7:if(!c(a,'aţi')){return false}break}return true};b.prototype.T=function(){var a;var d;var e;var g;d=this.A-(e=this._);this.C=e;a=f(this,b.a_2,46);if(a===0){return false}this.B=g=this._;if(!(!(this.I_p1<=g)?false:true)){return false}switch(a){case 0:return false;case 1:if(!c(this,'abil')){return false}break;case 2:if(!c(this,'ibil')){return false}break;case 3:if(!c(this,'iv')){return false}break;case 4:if(!c(this,'ic')){return false}break;case 5:if(!c(this,'at')){return false}break;case 6:if(!c(this,'it')){return false}break}this.B_standard_suffix_removed=true;this._=this.A-d;return true};b.prototype.r_combo_suffix=b.prototype.T;function o(a){var d;var e;var g;var h;e=a.A-(g=a._);a.C=g;d=f(a,b.a_2,46);if(d===0){return false}a.B=h=a._;if(!(!(a.I_p1<=h)?false:true)){return false}switch(d){case 0:return false;case 1:if(!c(a,'abil')){return false}break;case 2:if(!c(a,'ibil')){return false}break;case 3:if(!c(a,'iv')){return false}break;case 4:if(!c(a,'ic')){return false}break;case 5:if(!c(a,'at')){return false}break;case 6:if(!c(a,'it')){return false}break}a.B_standard_suffix_removed=true;a._=a.A-e;return true};b.prototype.X=function(){var a;var e;var d;var g;this.B_standard_suffix_removed=false;a:while(true){e=this.A-this._;d=true;b:while(d===true){d=false;if(!o(this)){break b}continue a}this._=this.A-e;break a}this.C=this._;a=f(this,b.a_3,62);if(a===0){return false}this.B=g=this._;if(!(!(this.I_p2<=g)?false:true)){return false}switch(a){case 0:return false;case 1:if(!c(this,'')){return false}break;case 2:if(!i(this,1,'ţ')){return false}this.B=this._;if(!c(this,'t')){return false}break;case 3:if(!c(this,'ist')){return false}break}this.B_standard_suffix_removed=true;return true};b.prototype.r_standard_suffix=b.prototype.X;function K(a){var d;var g;var e;var h;a.B_standard_suffix_removed=false;a:while(true){g=a.A-a._;e=true;b:while(e===true){e=false;if(!o(a)){break b}continue a}a._=a.A-g;break a}a.C=a._;d=f(a,b.a_3,62);if(d===0){return false}a.B=h=a._;if(!(!(a.I_p2<=h)?false:true)){return false}switch(d){case 0:return false;case 1:if(!c(a,'')){return false}break;case 2:if(!i(a,1,'ţ')){return false}a.B=a._;if(!c(a,'t')){return false}break;case 3:if(!c(a,'ist')){return false}break}a.B_standard_suffix_removed=true;return true};b.prototype.Z=function(){var d;var h;var a;var j;var e;var g;var k;var l;var m;h=this.A-(k=this._);if(k None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('romanian') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/ru.py b/sphinx/search/ru.py index 8719ef1d2..9c0e30394 100644 --- a/sphinx/search/ru.py +++ b/sphinx/search/ru.py @@ -8,13 +8,11 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any # NOQA +from sphinx.search import SearchLanguage, parse_stop_word russian_stopwords = parse_stop_word(''' @@ -259,10 +257,8 @@ class SearchRussian(SearchLanguage): js_stemmer_code = js_stemmer stopwords = russian_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('russian') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/sv.py b/sphinx/search/sv.py index cfdd15f92..4af7f7835 100644 --- a/sphinx/search/sv.py +++ b/sphinx/search/sv.py @@ -8,13 +8,12 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage, parse_stop_word +from typing import Dict import snowballstemmer -if False: - # For type annotation - from typing import Any +from sphinx.search import SearchLanguage, parse_stop_word + swedish_stopwords = parse_stop_word(''' | source: http://snowball.tartarus.org/algorithms/swedish/stop.txt @@ -147,10 +146,8 @@ class SearchSwedish(SearchLanguage): js_stemmer_code = js_stemmer stopwords = swedish_stopwords - def init(self, options): - # type: (Any) -> None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('swedish') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/tr.py b/sphinx/search/tr.py index ba2cdf951..8db42f676 100644 --- a/sphinx/search/tr.py +++ b/sphinx/search/tr.py @@ -8,13 +8,12 @@ :license: BSD, see LICENSE for details. """ -from sphinx.search import SearchLanguage +from typing import Dict, Set import snowballstemmer -if False: - # For type annotation - from typing import Dict, Set # NOQA +from sphinx.search import SearchLanguage + js_stemmer = """ var JSX={};(function(q){function r(b,e){var a=function(){};a.prototype=e.prototype;var c=new a;for(var d in b){b[d].prototype=c}}function Q(c,b){for(var a in b.prototype)if(b.prototype.hasOwnProperty(a))c.prototype[a]=b.prototype[a]}function j(a,b,d){function c(a,b,c){delete a[b];a[b]=c;return c}Object.defineProperty(a,b,{get:function(){return c(a,b,d())},set:function(d){c(a,b,d)},enumerable:true,configurable:true})}function R(a,b,c){return a[b]=a[b]/c|0}var M=parseInt;var K=parseFloat;function P(a){return a!==a}var A=isFinite;var G=encodeURIComponent;var F=decodeURIComponent;var E=encodeURI;var D=decodeURI;var C=Object.prototype.toString;var H=Object.prototype.hasOwnProperty;function p(){}q.require=function(b){var a=y[b];return a!==undefined?a:null};q.profilerIsRunning=function(){return p.getResults!=null};q.getProfileResults=function(){return(p.getResults||function(){return{}})()};q.postProfileResults=function(a,b){if(p.postResults==null)throw new Error('profiler has not been turned on');return p.postResults(a,b)};q.resetProfileResults=function(){if(p.resetResults==null)throw new Error('profiler has not been turned on');return p.resetResults()};q.DEBUG=false;function I(){};r([I],Error);function d(a,b,c){this.G=a.length;this.A_=a;this.D_=b;this.J=c;this.I=null;this.E_=null};r([d],Object);function u(){};r([u],Object);function m(){var a;var b;var c;this.F={};a=this.E='';b=this._=0;c=this.A=a.length;this.D=0;this.B=b;this.C=c};r([m],u);function B(a,b){a.E=b.E;a._=b._;a.A=b.A;a.D=b.D;a.B=b.B;a.C=b.C};function v(b,d,c,e){var a;if(b._>=b.A){return false}a=b.E.charCodeAt(b._);if(a>e||a>>3]&1<<(a&7))===0){return false}b._++;return true};function f(b,d,c,e){var a;if(b._<=b.D){return false}a=b.E.charCodeAt(b._-1);if(a>e||a>>3]&1<<(a&7))===0){return false}b._--;return true};function t(a,d,c,e){var b;if(a._<=a.D){return false}b=a.E.charCodeAt(a._-1);if(b>e||b>>3]&1<<(b&7))===0){a._--;return true}return false};function s(a,b,d){var c;if(a.A-a._>1);h=0;c=f=0;j--){if(e-c===n){h=-1;break}h=d.E.charCodeAt(e-1-c)-a.A_.charCodeAt(j);if(h!==0){break}c++}if(h<0){g=i;k=c}else{b=i;f=c}if(g-b<=1){if(b>0){break}if(g===b){break}if(l){break}l=true}}while(true){a=m[b];if(f>=a.G){d._=e-a.G|0;if(a.I==null){return a.J}o=a.I(d);d._=e-a.G|0;if(o){return a.J}}b=a.D_;if(b<0){return 0}}return-1};function n(a,b,d,e){var c;c=e.length-(d-b);a.E=a.E.slice(0,b)+e+a.E.slice(d);a.A+=c|0;if(a._>=d){a._+=c|0}else if(a._>b){a._=b}return c|0};function e(a,f){var b;var c;var d;var e;b=false;if((c=a.B)<0||c>(d=a.C)||d>(e=a.A)||e>a.E.length?false:true){n(a,a.B,a.C,f);b=true}return b};m.prototype.H=function(){return false};m.prototype.B_=function(b){var a;var c;var d;var e;a=this.F['.'+b];if(a==null){c=this.E=b;d=this._=0;e=this.A=c.length;this.D=0;this.B=d;this.C=e;this.H();a=this.E;this.F['.'+b]=a}return a};m.prototype.stemWord=m.prototype.B_;m.prototype.C_=function(e){var d;var b;var c;var a;var f;var g;var h;d=[];for(b=0;b=this.A){break b}this._++}b--;continue a}this._=f;break a}if(b>0){return false}this._=e;return true};a.prototype.r_more_than_one_syllable_word=a.prototype.v;function N(b){var f;var g;var c;var d;var e;f=b._;c=2;a:while(true){g=b._;d=true;b:while(d===true){d=false;c:while(true){e=true;d:while(e===true){e=false;if(!v(b,a.g_vowel,97,305)){break d}break c}if(b._>=b.A){break b}b._++}c--;continue a}b._=g;break a}if(c>0){return false}b._=f;return true};a.prototype.P=function(){var f;var g;var h;var b;var a;var c;var d;var i;var j;var e;b=true;b:while(b===true){b=false;f=this._;a=true;a:while(a===true){a=false;g=this._;c:while(true){c=true;d:while(c===true){c=false;if(!s(this,2,'ad')){break d}break c}if(this._>=this.A){break a}this._++}i=this.I_strlen=2;if(!(i===this.A)){break a}this._=g;break b}j=this._=f;h=j;a:while(true){d=true;c:while(d===true){d=false;if(!s(this,5,'soyad')){break c}break a}if(this._>=this.A){return false}this._++}e=this.I_strlen=5;if(!(e===this.A)){return false}this._=h}return true};a.prototype.r_is_reserved_word=a.prototype.P;function x(a){var g;var h;var i;var c;var b;var d;var e;var j;var k;var f;c=true;b:while(c===true){c=false;g=a._;b=true;a:while(b===true){b=false;h=a._;c:while(true){d=true;d:while(d===true){d=false;if(!s(a,2,'ad')){break d}break c}if(a._>=a.A){break a}a._++}j=a.I_strlen=2;if(!(j===a.A)){break a}a._=h;break b}k=a._=g;i=k;a:while(true){e=true;c:while(e===true){e=false;if(!s(a,5,'soyad')){break c}break a}if(a._>=a.A){return false}a._++}f=a.I_strlen=5;if(!(f===a.A)){return false}a._=i}return true};a.prototype.x=function(){var d;var e;var a;var b;var c;var f;var g;var h;d=this._;a=true;a:while(a===true){a=false;if(!x(this)){break a}return false}f=this._=d;this.D=f;h=this._=g=this.A;e=g-h;b=true;a:while(b===true){b=false;if(!z(this)){break a}}this._=this.A-e;c=true;a:while(c===true){c=false;if(!w(this)){break a}}this._=this.D;return true};a.prototype.r_postlude=a.prototype.x;function O(a){var e;var f;var b;var c;var d;var g;var h;var i;e=a._;b=true;a:while(b===true){b=false;if(!x(a)){break a}return false}g=a._=e;a.D=g;i=a._=h=a.A;f=h-i;c=true;a:while(c===true){c=false;if(!z(a)){break a}}a._=a.A-f;d=true;a:while(d===true){d=false;if(!w(a)){break a}}a._=a.D;return true};a.prototype.H=function(){var c;var a;var b;var d;var e;if(!N(this)){return false}this.D=this._;e=this._=d=this.A;c=d-e;a=true;a:while(a===true){a=false;if(!J(this)){break a}}this._=this.A-c;if(!this.B_continue_stemming_noun_suffixes){return false}b=true;a:while(b===true){b=false;if(!L(this)){break a}}this._=this.D;return!O(this)?false:true};a.prototype.stem=a.prototype.H;a.prototype.L=function(b){return b instanceof a};a.prototype.equals=a.prototype.L;a.prototype.M=function(){var c;var a;var b;var d;c='TurkishStemmer';a=0;for(b=0;b None + def init(self, options: Dict) -> None: self.stemmer = snowballstemmer.stemmer('turkish') - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/zh.py b/sphinx/search/zh.py index ed5f52265..62a6d1206 100644 --- a/sphinx/search/zh.py +++ b/sphinx/search/zh.py @@ -10,6 +10,7 @@ import os import re +from typing import Dict, List from sphinx.search import SearchLanguage from sphinx.util.stemmer import get_stemmer @@ -20,10 +21,6 @@ try: except ImportError: JIEBA = False -if False: - # For type annotation - from typing import Dict, List # NOQA - english_stopwords = set(""" a and are as at be but by @@ -235,8 +232,7 @@ class SearchChinese(SearchLanguage): latin1_letters = re.compile(r'[a-zA-Z0-9_]+') latin_terms = [] # type: List[str] - def init(self, options): - # type: (Dict) -> None + def init(self, options: Dict) -> None: if JIEBA: dict_path = options.get('dict') if dict_path and os.path.isfile(dict_path): @@ -244,8 +240,7 @@ class SearchChinese(SearchLanguage): self.stemmer = get_stemmer() - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: chinese = [] # type: List[str] if JIEBA: chinese = list(jieba.cut_for_search(input)) @@ -255,13 +250,10 @@ class SearchChinese(SearchLanguage): self.latin_terms.extend(latin1) return chinese + latin1 - def word_filter(self, stemmed_word): - # type: (str) -> bool + def word_filter(self, stemmed_word: str) -> bool: return len(stemmed_word) > 1 - def stem(self, word): - # type: (str) -> str - + def stem(self, word: str) -> str: # Don't stem Latin words that are long enough to be relevant for search # if not stemmed, but would be too short after being stemmed # avoids some issues with acronyms