Enable automatic formatting for `sphinx/search/` (#12967)

2025-02-25 18:55:22 -06:00 · 2024-10-04 16:28:02 +01:00 · 2024-10-04 16:28:02 +01:00 · 81c6f1e8cc
commit 81c6f1e8cc
parent e1c5f6d314
19 changed files with 187 additions and 145 deletions
--- a/.ruff.toml
+++ b/.ruff.toml
@ -7,7 +7,7 @@ extend-exclude = [
    "tests/js/roots/*",
    "build/*",
    "doc/_build/*",
-    "sphinx/search/*",
+#    "sphinx/search/*",
    "doc/usage/extensions/example*.py",
 ]

@ -411,6 +411,8 @@ select = [
 "sphinx/ext/autodoc/importer.py" = ["D402"]
 "sphinx/util/requests.py" = ["D402"]

+"sphinx/search/*" = ["E501"]
+
 "tests/*" = [
    "E501",
    "ANN",  # tests don't need annotations
@ -475,7 +477,6 @@ exclude = [
    "sphinx/ext/todo.py",
    "sphinx/ext/viewcode.py",
    "sphinx/registry.py",
-    "sphinx/search/*",
    "sphinx/testing/*",
    "sphinx/transforms/*",
    "sphinx/writers/*",
--- a/sphinx/search/init.py
+++ b/sphinx/search/init.py
@ -1,4 +1,5 @@
 """Create a full-text search index for offline search."""
+
 from __future__ import annotations

 import dataclasses
@ -15,12 +16,13 @@ from docutils import nodes
 from docutils.nodes import Element, Node

 from sphinx import addnodes, package_dir
-from sphinx.environment import BuildEnvironment
 from sphinx.util.index_entries import split_index_msg

 if TYPE_CHECKING:
    from collections.abc import Iterable

+    from sphinx.environment import BuildEnvironment
+

 class SearchLanguage:
    """
@ -52,10 +54,11 @@ class SearchLanguage:
       This class is used to preprocess search word which Sphinx HTML readers
       type, before searching index. Default implementation does nothing.
    """
+
    lang: str = ''
    language_name: str = ''
    stopwords: set[str] = set()
-    js_splitter_code: str = ""
+    js_splitter_code: str = ''
    js_stemmer_rawcode: str = ''
    js_stemmer_code = """
 /**
@ -105,16 +108,14 @@ var Stemmer = function() {
        Return true if the target word should be registered in the search index.
        This method is called after stemming.
        """
-        return (
-            len(word) == 0 or not (
-                ((len(word) < 3) and (12353 < ord(word[0]) < 12436)) or
-                (ord(word[0]) < 256 and (
-                    word in self.stopwords
-                ))))
+        return len(word) == 0 or not (
+            ((len(word) < 3) and (12353 < ord(word[0]) < 12436))
+            or (ord(word[0]) < 256 and (word in self.stopwords))
+        )


 # SearchEnglish imported after SearchLanguage is defined due to circular import
-from sphinx.search.en import SearchEnglish
+from sphinx.search.en import SearchEnglish  # NoQA: E402


 def parse_stop_word(source: str) -> set[str]:
@ -165,10 +166,10 @@ class _JavaScriptIndex:
        return self.PREFIX + json.dumps(data, sort_keys=True) + self.SUFFIX

    def loads(self, s: str) -> Any:
-        data = s[len(self.PREFIX):-len(self.SUFFIX)]
-        if not data or not s.startswith(self.PREFIX) or not \
-           s.endswith(self.SUFFIX):
-            raise ValueError('invalid data')
+        data = s[len(self.PREFIX) : -len(self.SUFFIX)]
+        if not data or not s.startswith(self.PREFIX) or not s.endswith(self.SUFFIX):
+            msg = 'invalid data'
+            raise ValueError(msg)
        return json.loads(data)

    def dump(self, data: Any, f: IO[str]) -> None:
@ -187,9 +188,8 @@ def _is_meta_keywords(
 ) -> bool:
    if node.get('name') == 'keywords':
        meta_lang = node.get('lang')
-        if meta_lang is None:  # lang not specified
-            return True
-        elif meta_lang == lang:  # matched to html_search_language
+        if meta_lang is None or meta_lang == lang:
+            # lang not specified or matched to html_search_language
            return True

    return False
@ -222,8 +222,18 @@ class WordCollector(nodes.NodeVisitor):
                # Some people might put content in raw HTML that should be searched,
                # so we just amateurishly strip HTML tags and index the remaining
                # content
-                nodetext = re.sub(r'<style.*?</style>', '', node.astext(), flags=re.IGNORECASE|re.DOTALL)
-                nodetext = re.sub(r'<script.*?</script>', '', nodetext, flags=re.IGNORECASE|re.DOTALL)
+                nodetext = re.sub(
+                    r'<style.*?</style>',
+                    '',
+                    node.astext(),
+                    flags=re.IGNORECASE | re.DOTALL,
+                )
+                nodetext = re.sub(
+                    r'<script.*?</script>',
+                    '',
+                    nodetext,
+                    flags=re.IGNORECASE | re.DOTALL,
+                )
                nodetext = re.sub(r'<[^<]+?>', '', nodetext)
                self.found_words.extend(self.lang.split(nodetext))
            raise nodes.SkipNode
@ -245,12 +255,15 @@ class IndexBuilder:
    Helper class that creates a search index based on the doctrees
    passed to the `feed` method.
    """
+
    formats = {
-        'json':     json,
-        'pickle':   pickle
+        'json': json,
+        'pickle': pickle,
    }

-    def __init__(self, env: BuildEnvironment, lang: str, options: dict[str, str], scoring: str) -> None:
+    def __init__(
+        self, env: BuildEnvironment, lang: str, options: dict[str, str], scoring: str
+    ) -> None:
        self.env = env
        # docname -> title
        self._titles: dict[str, str | None] = env._search_index_titles
@ -261,9 +274,13 @@ class IndexBuilder:
        # stemmed words in titles -> set(docname)
        self._title_mapping: dict[str, set[str]] = env._search_index_title_mapping
        # docname -> all titles in document
-        self._all_titles: dict[str, list[tuple[str, str | None]]] = env._search_index_all_titles
+        self._all_titles: dict[str, list[tuple[str, str | None]]] = (
+            env._search_index_all_titles
+        )
        # docname -> list(index entry)
-        self._index_entries: dict[str, list[tuple[str, str, str]]] = env._search_index_index_entries
+        self._index_entries: dict[str, list[tuple[str, str, str]]] = (
+            env._search_index_index_entries
+        )
        # objtype -> index
        self._objtypes: dict[tuple[str, str], int] = env._search_index_objtypes
        # objtype index -> (domain, type, objname (localized))
@ -290,7 +307,7 @@ class IndexBuilder:
                self.js_scorer_code = fp.read().decode()
        else:
            self.js_scorer_code = ''
-        self.js_splitter_code = ""
+        self.js_splitter_code = ''

    def load(self, stream: IO, format: Any) -> None:
        """Reconstruct from frozen data."""
@ -298,15 +315,15 @@ class IndexBuilder:
            format = self.formats[format]
        frozen = format.load(stream)
        # if an old index is present, we treat it as not existing.
-        if not isinstance(frozen, dict) or \
-           frozen.get('envversion') != self.env.version:
-            raise ValueError('old format')
+        if not isinstance(frozen, dict) or frozen.get('envversion') != self.env.version:
+            msg = 'old format'
+            raise ValueError(msg)
        index2fn = frozen['docnames']
-        self._filenames = dict(zip(index2fn, frozen['filenames']))
-        self._titles = dict(zip(index2fn, frozen['titles']))
+        self._filenames = dict(zip(index2fn, frozen['filenames'], strict=True))
+        self._titles = dict(zip(index2fn, frozen['titles'], strict=True))
        self._all_titles = {}

-        for docname in self._titles.keys():
+        for docname in self._titles:
            self._all_titles[docname] = []
        for title, doc_tuples in frozen['alltitles'].items():
            for doc, titleid in doc_tuples:
@ -331,8 +348,9 @@ class IndexBuilder:
            format = self.formats[format]
        format.dump(self.freeze(), stream)

-    def get_objects(self, fn2index: dict[str, int]
-                    ) -> dict[str, list[tuple[int, int, int, str, str]]]:
+    def get_objects(
+        self, fn2index: dict[str, int]
+    ) -> dict[str, list[tuple[int, int, int, str, str]]]:
        rv: dict[str, list[tuple[int, int, int, str, str]]] = {}
        otypes = self._objtypes
        onames = self._objnames
@ -355,8 +373,11 @@ class IndexBuilder:
                    otype = domain.object_types.get(type)
                    if otype:
                        # use str() to fire translation proxies
-                        onames[typeindex] = (domain.name, type,
-                                             str(domain.get_type_name(otype)))
+                        onames[typeindex] = (
+                            domain.name,
+                            type,
+                            str(domain.get_type_name(otype)),
+                        )
                    else:
                        onames[typeindex] = (domain.name, type, type)
                if anchor == fullname:
@ -368,7 +389,9 @@ class IndexBuilder:
                plist.append((fn2index[docname], typeindex, prio, shortanchor, name))
        return rv

-    def get_terms(self, fn2index: dict[str, int]) -> tuple[dict[str, list[int] | int], dict[str, list[int] | int]]:
+    def get_terms(
+        self, fn2index: dict[str, int]
+    ) -> tuple[dict[str, list[int] | int], dict[str, list[int] | int]]:
        """
        Return a mapping of document and title terms to their corresponding sorted document IDs.

@ -377,10 +400,10 @@ class IndexBuilder:
        of integers.
        """
        rvs: tuple[dict[str, list[int] | int], dict[str, list[int] | int]] = ({}, {})
-        for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)):
+        for rv, mapping in zip(rvs, (self._mapping, self._title_mapping), strict=True):
            for k, v in mapping.items():
                if len(v) == 1:
-                    fn, = v
+                    (fn,) = v
                    if fn in fn2index:
                        rv[k] = fn2index[fn]
                else:
@ -389,7 +412,7 @@ class IndexBuilder:

    def freeze(self) -> dict[str, Any]:
        """Create a usable data structure for serializing."""
-        docnames, titles = zip(*sorted(self._titles.items()))
+        docnames, titles = zip(*sorted(self._titles.items()), strict=True)
        filenames = [self._filenames.get(docname) for docname in docnames]
        fn2index = {f: i for (i, f) in enumerate(docnames)}
        terms, title_terms = self.get_terms(fn2index)
@ -406,15 +429,28 @@ class IndexBuilder:
        index_entries: dict[str, list[tuple[int, str, bool]]] = {}
        for docname, entries in self._index_entries.items():
            for entry, entry_id, main_entry in entries:
-                index_entries.setdefault(entry.lower(), []).append((fn2index[docname], entry_id, main_entry == "main"))
+                index_entries.setdefault(entry.lower(), []).append((
+                    fn2index[docname],
+                    entry_id,
+                    main_entry == 'main',
+                ))

-        return dict(docnames=docnames, filenames=filenames, titles=titles, terms=terms,
-                    objects=objects, objtypes=objtypes, objnames=objnames,
-                    titleterms=title_terms, envversion=self.env.version,
-                    alltitles=alltitles, indexentries=index_entries)
+        return {
+            'docnames': docnames,
+            'filenames': filenames,
+            'titles': titles,
+            'terms': terms,
+            'objects': objects,
+            'objtypes': objtypes,
+            'objnames': objnames,
+            'titleterms': title_terms,
+            'envversion': self.env.version,
+            'alltitles': alltitles,
+            'indexentries': index_entries,
+        }

    def label(self) -> str:
-        return f"{self.lang.language_name} (code: {self.lang.lang})"
+        return f'{self.lang.language_name} (code: {self.lang.lang})'

    def prune(self, docnames: Iterable[str]) -> None:
        """Remove data for all docnames not in the list."""
@ -434,7 +470,9 @@ class IndexBuilder:
        for wordnames in self._title_mapping.values():
            wordnames.intersection_update(docnames)

-    def feed(self, docname: str, filename: str, title: str, doctree: nodes.document) -> None:
+    def feed(
+        self, docname: str, filename: str, title: str, doctree: nodes.document
+    ) -> None:
        """Feed a doctree to the index."""
        self._titles[docname] = title
        self._filenames[docname] = filename
@ -495,15 +533,22 @@ class IndexBuilder:
                    # Some people might put content in raw HTML that should be searched,
                    # so we just amateurishly strip HTML tags and index the remaining
                    # content
-                    nodetext = re.sub(r'<style.*?</style>', '', node.astext(),
-                                      flags=re.IGNORECASE | re.DOTALL)
-                    nodetext = re.sub(r'<script.*?</script>', '', nodetext,
-                                      flags=re.IGNORECASE | re.DOTALL)
+                    nodetext = re.sub(
+                        r'<style.*?</style>',
+                        '',
+                        node.astext(),
+                        flags=re.IGNORECASE | re.DOTALL,
+                    )
+                    nodetext = re.sub(
+                        r'<script.*?</script>',
+                        '',
+                        nodetext,
+                        flags=re.IGNORECASE | re.DOTALL,
+                    )
                    nodetext = re.sub(r'<[^<]+?>', '', nodetext)
                    word_store.words.extend(split(nodetext))
                return
-            elif (isinstance(node, nodes.meta)
-                  and _is_meta_keywords(node, language)):
+            elif isinstance(node, nodes.meta) and _is_meta_keywords(node, language):
                keywords = [keyword.strip() for keyword in node['content'].split(',')]
                word_store.words.extend(keywords)
            elif isinstance(node, nodes.Text):
@ -553,11 +598,16 @@ class IndexBuilder:
        """Returns JS code that will be inserted into language_data.js."""
        if self.lang.js_stemmer_rawcode:
            js_dir = path.join(package_dir, 'search', 'minified-js')
-            with open(path.join(js_dir, 'base-stemmer.js'), encoding='utf-8') as js_file:
+            with open(
+                path.join(js_dir, 'base-stemmer.js'), encoding='utf-8'
+            ) as js_file:
                base_js = js_file.read()
-            with open(path.join(js_dir, self.lang.js_stemmer_rawcode), encoding='utf-8') as js_file:
+            with open(
+                path.join(js_dir, self.lang.js_stemmer_rawcode), encoding='utf-8'
+            ) as js_file:
                language_js = js_file.read()
-            return ('%s\n%s\nStemmer = %sStemmer;' %
-                    (base_js, language_js, self.lang.language_name))
+            return (
+                f'{base_js}\n{language_js}\nStemmer = {self.lang.language_name}Stemmer;'
+            )
        else:
            return self.lang.js_stemmer_code
--- a/sphinx/search/da.py
+++ b/sphinx/search/da.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-danish_stopwords = parse_stop_word('''
+danish_stopwords = parse_stop_word("""
 | source: https://snowball.tartarus.org/algorithms/danish/stop.txt
 og           | and
 i            | in
@ -104,7 +102,7 @@ været        | be
 thi          | for (conj)
 jer          | you
 sådan        | such, like this/like that
-''')
+""")


 class SearchDanish(SearchLanguage):
--- a/sphinx/search/de.py
+++ b/sphinx/search/de.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-german_stopwords = parse_stop_word('''
+german_stopwords = parse_stop_word("""
 |source: https://snowball.tartarus.org/algorithms/german/stop.txt
 aber           |  but

@ -287,7 +285,7 @@ zum            |  zu + dem
 zur            |  zu + der
 zwar           |  indeed
 zwischen       |  between
-''')
+""")


 class SearchGerman(SearchLanguage):
--- a/sphinx/search/en.py
+++ b/sphinx/search/en.py
@ -2,13 +2,12 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage

-english_stopwords = set("""
+english_stopwords = set(
+    """
 a  and  are  as  at
 be  but  by
 for
@ -18,7 +17,8 @@ of  on  or
 such
 that  the  their  then  there  these  they  this  to
 was  will  with
-""".split())
+""".split()
+)

 js_porter_stemmer = """
 /**
--- a/sphinx/search/es.py
+++ b/sphinx/search/es.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-spanish_stopwords = parse_stop_word('''
+spanish_stopwords = parse_stop_word("""
 |source: https://snowball.tartarus.org/algorithms/spanish/stop.txt
 de             |  from, of
 la             |  the, her
@ -347,7 +345,7 @@ tenida
 tenidos
 tenidas
 tened
-''')
+""")


 class SearchSpanish(SearchLanguage):
--- a/sphinx/search/fi.py
+++ b/sphinx/search/fi.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-finnish_stopwords = parse_stop_word('''
+finnish_stopwords = parse_stop_word("""
 | source: https://snowball.tartarus.org/algorithms/finnish/stop.txt
 | forms of BE

@ -97,7 +95,7 @@ kun    | when
 niin   | so
 nyt    | now
 itse   | self
-''')
+""")


 class SearchFinnish(SearchLanguage):
--- a/sphinx/search/fr.py
+++ b/sphinx/search/fr.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-french_stopwords = parse_stop_word('''
+french_stopwords = parse_stop_word("""
 | source: https://snowball.tartarus.org/algorithms/french/stop.txt
 au             |  a + le
 aux            |  a + les
@ -183,7 +181,7 @@ quelle         |  which
 quelles        |  which
 sans           |  without
 soi            |  oneself
-''')
+""")


 class SearchFrench(SearchLanguage):
--- a/sphinx/search/hu.py
+++ b/sphinx/search/hu.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-hungarian_stopwords = parse_stop_word('''
+hungarian_stopwords = parse_stop_word("""
 | source: https://snowball.tartarus.org/algorithms/hungarian/stop.txt
 | prepared by Anna Tordai
 a
@ -210,7 +208,7 @@ vissza
 vele
 viszont
 volna
-''')
+""")


 class SearchHungarian(SearchLanguage):
--- a/sphinx/search/it.py
+++ b/sphinx/search/it.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-italian_stopwords = parse_stop_word('''
+italian_stopwords = parse_stop_word("""
 | source: https://snowball.tartarus.org/algorithms/italian/stop.txt
 ad             |  a (to) before vowel
 al             |  a + il
@ -300,7 +298,7 @@ stessi
 stesse
 stessimo
 stessero
-''')
+""")


 class SearchItalian(SearchLanguage):
--- a/sphinx/search/ja.py
+++ b/sphinx/search/ja.py
@ -17,12 +17,14 @@ from typing import Any

 try:
    import MeCab  # type: ignore[import-not-found]
+
    native_module = True
 except ImportError:
    native_module = False

 try:
    import janome.tokenizer  # type: ignore[import-not-found]
+
    janome_module = True
 except ImportError:
    janome_module = False
@ -61,7 +63,8 @@ class MecabSplitter(BaseSplitter):
            result = self.native.parse(input)
        else:
            result = self.ctypes_libmecab.mecab_sparse_tostr(
-                self.ctypes_mecab, input.encode(self.dict_encode))
+                self.ctypes_mecab, input.encode(self.dict_encode)
+            )
        return result.split(' ')

    def init_native(self, options: dict[str, str]) -> None:
@ -89,7 +92,8 @@ class MecabSplitter(BaseSplitter):
            if os.path.exists(lib):
                libpath = lib
        if libpath is None:
-            raise RuntimeError('MeCab dynamic library is not available')
+            msg = 'MeCab dynamic library is not available'
+            raise RuntimeError(msg)

        param = 'mecab -Owakati'
        dict = options.get('dict')
@ -101,11 +105,15 @@ class MecabSplitter(BaseSplitter):
        self.ctypes_libmecab = ctypes.CDLL(libpath)
        self.ctypes_libmecab.mecab_new2.argtypes = (ctypes.c_char_p,)
        self.ctypes_libmecab.mecab_new2.restype = ctypes.c_void_p
-        self.ctypes_libmecab.mecab_sparse_tostr.argtypes = (ctypes.c_void_p, ctypes.c_char_p)
+        self.ctypes_libmecab.mecab_sparse_tostr.argtypes = (
+            ctypes.c_void_p,
+            ctypes.c_char_p,
+        )
        self.ctypes_libmecab.mecab_sparse_tostr.restype = ctypes.c_char_p
        self.ctypes_mecab = self.ctypes_libmecab.mecab_new2(param.encode(fs_enc))
        if self.ctypes_mecab is None:
-            raise SphinxError('mecab initialization failed')
+            msg = 'mecab initialization failed'
+            raise SphinxError(msg)

    def __del__(self) -> None:
        if self.ctypes_libmecab:
@ -121,8 +129,11 @@ class JanomeSplitter(BaseSplitter):

    def init_tokenizer(self) -> None:
        if not janome_module:
-            raise RuntimeError('Janome is not available')
-        self.tokenizer = janome.tokenizer.Tokenizer(udic=self.user_dict, udic_enc=self.user_dict_enc)
+            msg = 'Janome is not available'
+            raise RuntimeError(msg)
+        self.tokenizer = janome.tokenizer.Tokenizer(
+            udic=self.user_dict, udic_enc=self.user_dict_enc
+        )

    def split(self, input: str) -> list[str]:
        result = ' '.join(token.surface for token in self.tokenizer.tokenize(input))
@ -130,14 +141,18 @@ class JanomeSplitter(BaseSplitter):


 class DefaultSplitter(BaseSplitter):
-    patterns_ = {re.compile(pattern): value for pattern, value in {
-        '[一二三四五六七八九十百千万億兆]': 'M',
-        '[一-龠々〆ヵヶ]': 'H',
-        '[ぁ-ん]': 'I',
-        '[ァ-ヴーｱ-ﾝﾞｰ]': 'K',
-        '[a-zA-Zａ-ｚＡ-Ｚ]': 'A',
-        '[0-9０-９]': 'N',
-    }.items()}
+    patterns_ = {
+        re.compile(pattern): value
+        for pattern, value in {
+            '[一二三四五六七八九十百千万億兆]': 'M',
+            '[一-龠々〆ヵヶ]': 'H',
+            '[ぁ-ん]': 'I',
+            '[ァ-ヴーｱ-ﾝﾞｰ]': 'K',
+            '[a-zA-Zａ-ｚＡ-Ｚ]': 'A',
+            '[0-9０-９]': 'N',
+        }.items()
+    }
+    # fmt: off
    BIAS__ = -332
    BC1__ = {'HH': 6, 'II': 2461, 'KH': 406, 'OH': -1378}
    BC2__ = {'AA': -3267, 'AI': 2744, 'AN': -878, 'HH': -4070, 'HM': -1711,
@ -398,6 +413,7 @@ class DefaultSplitter(BaseSplitter):
             '委': 798, '学': -960, '市': 887, '広': -695, '後': 535, '業': -697,
             '相': 753, '社': -507, '福': 974, '空': -822, '者': 1811, '連': 463,
             '郎': 1082, '１': -270, 'Ｅ１': 306, 'ﾙ': -673, 'ﾝ': -496}
+    # fmt: on

    # ctype_
    def ctype_(self, char: str) -> str:
@ -427,18 +443,18 @@ class DefaultSplitter(BaseSplitter):

        for i in range(4, len(seg) - 3):
            score = self.BIAS__
-            w1 = seg[i-3]
-            w2 = seg[i-2]
-            w3 = seg[i-1]
+            w1 = seg[i - 3]
+            w2 = seg[i - 2]
+            w3 = seg[i - 1]
            w4 = seg[i]
-            w5 = seg[i+1]
-            w6 = seg[i+2]
-            c1 = ctype[i-3]
-            c2 = ctype[i-2]
-            c3 = ctype[i-1]
+            w5 = seg[i + 1]
+            w6 = seg[i + 2]
+            c1 = ctype[i - 3]
+            c2 = ctype[i - 2]
+            c3 = ctype[i - 1]
            c4 = ctype[i]
-            c5 = ctype[i+1]
-            c6 = ctype[i+2]
+            c5 = ctype[i + 1]
+            c6 = ctype[i + 2]
            score += self.ts_(self.UP1__, p1)
            score += self.ts_(self.UP2__, p2)
            score += self.ts_(self.UP3__, p3)
@ -470,7 +486,7 @@ class DefaultSplitter(BaseSplitter):
            score += self.ts_(self.TC2__, c2 + c3 + c4)
            score += self.ts_(self.TC3__, c3 + c4 + c5)
            score += self.ts_(self.TC4__, c4 + c5 + c6)
-#           score += self.ts_(self.TC5__, c4 + c5 + c6)
+            # score += self.ts_(self.TC5__, c4 + c5 + c6)
            score += self.ts_(self.UQ1__, p1 + c1)
            score += self.ts_(self.UQ2__, p2 + c2)
            score += self.ts_(self.UQ1__, p3 + c3)
@ -501,6 +517,7 @@ class SearchJapanese(SearchLanguage):
    Japanese search implementation: uses no stemmer, but word splitting is quite
    complicated.
    """
+
    lang = 'ja'
    language_name = 'Japanese'

--- a/sphinx/search/nl.py
+++ b/sphinx/search/nl.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-dutch_stopwords = parse_stop_word('''
+dutch_stopwords = parse_stop_word("""
 | source: https://snowball.tartarus.org/algorithms/dutch/stop.txt
 de             |  the
 en             |  and
@ -111,7 +109,7 @@ uw             |  your
 iemand         |  somebody
 geweest        |  been; past participle of 'be'
 andere         |  other
-''')
+""")


 class SearchDutch(SearchLanguage):
--- a/sphinx/search/no.py
+++ b/sphinx/search/no.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-norwegian_stopwords = parse_stop_word('''
+norwegian_stopwords = parse_stop_word("""
 | source: https://snowball.tartarus.org/algorithms/norwegian/stop.txt
 og             | and
 i              | in
@ -186,7 +184,7 @@ verte          | become *
 vort           | become *
 varte          | became *
 vart           | became *
-''')
+""")


 class SearchNorwegian(SearchLanguage):
--- a/sphinx/search/pt.py
+++ b/sphinx/search/pt.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-portuguese_stopwords = parse_stop_word('''
+portuguese_stopwords = parse_stop_word("""
 | source: https://snowball.tartarus.org/algorithms/portuguese/stop.txt
 de             |  of, from
 a              |  the; to, at; her
@ -245,7 +243,7 @@ terão
 teria
 teríamos
 teriam
-''')
+""")


 class SearchPortuguese(SearchLanguage):
--- a/sphinx/search/ro.py
+++ b/sphinx/search/ro.py
@ -2,8 +2,6 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict, Set
-
 import snowballstemmer

 from sphinx.search import SearchLanguage
--- a/sphinx/search/ru.py
+++ b/sphinx/search/ru.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-russian_stopwords = parse_stop_word('''
+russian_stopwords = parse_stop_word("""
 | source: https://snowball.tartarus.org/algorithms/russian/stop.txt
 и              | and
 в              | in/into
@ -235,7 +233,7 @@ russian_stopwords = parse_stop_word('''
  | можн
  | нужн
  | нельзя
-''')
+""")


 class SearchRussian(SearchLanguage):
--- a/sphinx/search/sv.py
+++ b/sphinx/search/sv.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict
-
 import snowballstemmer

 from sphinx.search import SearchLanguage, parse_stop_word

-swedish_stopwords = parse_stop_word('''
+swedish_stopwords = parse_stop_word("""
 | source: https://snowball.tartarus.org/algorithms/swedish/stop.txt
 och            | and
 det            | it, this/that
@ -124,7 +122,7 @@ våra           | our
 ert            | your
 era            | your
 vilkas         | whose
-''')
+""")


 class SearchSwedish(SearchLanguage):
--- a/sphinx/search/tr.py
+++ b/sphinx/search/tr.py
@ -2,8 +2,6 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Dict, Set
-
 import snowballstemmer

 from sphinx.search import SearchLanguage
--- a/sphinx/search/zh.py
+++ b/sphinx/search/zh.py
@ -11,11 +11,13 @@ from sphinx.search import SearchLanguage

 try:
    import jieba  # type: ignore[import-not-found]
+
    JIEBA = True
 except ImportError:
    JIEBA = False

-english_stopwords = set("""
+english_stopwords = set(
+    """
 a  and  are  as  at
 be  but  by
 for
@ -25,7 +27,8 @@ of  on  or
 such
 that  the  their  then  there  these  they  this  to
 was  will  with
-""".split())
+""".split()
+)

 js_porter_stemmer = """
 /**
@ -239,8 +242,7 @@ class SearchChinese(SearchLanguage):
        if JIEBA:
            chinese = list(jieba.cut_for_search(input))

-        latin1 = \
-            [term.strip() for term in self.latin1_letters.findall(input)]
+        latin1 = [term.strip() for term in self.latin1_letters.findall(input)]
        self.latin_terms.extend(latin1)
        return chinese + latin1

@ -252,9 +254,9 @@ class SearchChinese(SearchLanguage):
        # if not stemmed, but would be too short after being stemmed
        # avoids some issues with acronyms
        should_not_be_stemmed = (
-            word in self.latin_terms and
-            len(word) >= 3 and
-            len(self.stemmer.stemWord(word.lower())) < 3
+            word in self.latin_terms
+            and len(word) >= 3
+            and len(self.stemmer.stemWord(word.lower())) < 3
        )
        if should_not_be_stemmed:
            return word.lower()