Refactor sphinx.search

2025-02-25 18:55:22 -06:00 · 2018-11-30 18:03:20 +09:00
parent 3528a68d2a
commit f7317651a1
3 changed files with 33 additions and 22 deletions
--- a/2
+++ b/2
@@ -39,6 +39,8 @@ Deprecated
  ``autodoc.DocstringSignatureMixin.get_doc()``,
  ``autodoc.DocstringSignatureMixin._find_signature()``, and
  ``autodoc.ClassDocumenter.get_doc()`` are deprecated.
+* The ``nodetype`` argument of ``sphinx.search.WordCollector.
+  is_meta_keywords()``
 * The ``suffix`` argument of ``env.doc2path()`` is deprecated.
 * The string style ``base`` argument of ``env.doc2path()`` is deprecated.
 * ``sphinx.application.Sphinx._setting_up_extension``
--- a/doc/extdev/index.rst
+++ b/doc/extdev/index.rst
@@ -131,6 +131,12 @@ The following is a list of deprecated interfaces.
     - 4.0
     - N/A

+   * - ``nodetype`` argument of
+       ``sphinx.search.WordCollector.is_meta_keywords()``
+     - 2.0
+     - 4.0
+     - N/A
+
   * - ``suffix`` argument of ``BuildEnvironment.doc2path()``
     - 2.0
     - 4.0
--- a/sphinx/search/init.py
+++ b/sphinx/search/init.py
@@ -10,13 +10,16 @@
 """
 import pickle
 import re
+import warnings
 from os import path

 from six import text_type

-from docutils.nodes import raw, comment, title, Text, NodeVisitor, SkipNode
+from docutils import nodes

-import sphinx
+from sphinx import addnodes
+from sphinx import package_dir
+from sphinx.deprecation import RemovedInSphinx40Warning
 from sphinx.util import jsdump, rpartition
 from sphinx.util.pycompat import htmlescape
 from sphinx.search.jssplitter import splitter_code
@@ -127,7 +130,7 @@ def parse_stop_word(source):

    * http://snowball.tartarus.org/algorithms/finnish/stop.txt
    """
-    result = set()
+    result = set()  # type: Set[unicode]
    for line in source.splitlines():
        line = line.split('|')[0]  # remove comment
        result.update(line.split())
@@ -189,21 +192,25 @@ class _JavaScriptIndex:
 js_index = _JavaScriptIndex()


-class WordCollector(NodeVisitor):
+class WordCollector(nodes.NodeVisitor):
    """
    A special visitor that collects words for the `IndexBuilder`.
    """

    def __init__(self, document, lang):
-        # type: (nodes.Node, SearchLanguage) -> None
+        # type: (nodes.document, SearchLanguage) -> None
        super(WordCollector, self).__init__(document)
        self.found_words = []           # type: List[unicode]
        self.found_title_words = []     # type: List[unicode]
        self.lang = lang

-    def is_meta_keywords(self, node, nodetype):
-        # type: (nodes.Node, Type) -> bool
-        if isinstance(node, sphinx.addnodes.meta) and node.get('name') == 'keywords':
+    def is_meta_keywords(self, node, nodetype=None):
+        # type: (addnodes.meta, Any) -> bool
+        if nodetype is not None:
+            warnings.warn('"nodetype" argument for WordCollector.is_meta_keywords() '
+                          'is deprecated.', RemovedInSphinx40Warning)
+
+        if isinstance(node, addnodes.meta) and node.get('name') == 'keywords':
            meta_lang = node.get('lang')
            if meta_lang is None:  # lang not specified
                return True
@@ -214,10 +221,9 @@ class WordCollector(NodeVisitor):

    def dispatch_visit(self, node):
        # type: (nodes.Node) -> None
-        nodetype = type(node)
-        if issubclass(nodetype, comment):
-            raise SkipNode
-        if issubclass(nodetype, raw):
+        if isinstance(node, nodes.comment):
+            raise nodes.SkipNode
+        elif isinstance(node, nodes.raw):
            if 'html' in node.get('format', '').split():
                # Some people might put content in raw HTML that should be searched,
                # so we just amateurishly strip HTML tags and index the remaining
@@ -226,12 +232,12 @@ class WordCollector(NodeVisitor):
                nodetext = re.sub(r'(?is)<script.*?</script>', '', nodetext)
                nodetext = re.sub(r'<[^<]+?>', '', nodetext)
                self.found_words.extend(self.lang.split(nodetext))
-            raise SkipNode
-        if issubclass(nodetype, Text):
+            raise nodes.SkipNode
+        elif isinstance(node, nodes.Text):
            self.found_words.extend(self.lang.split(node.astext()))
-        elif issubclass(nodetype, title):
+        elif isinstance(node, nodes.title):
            self.found_title_words.extend(self.lang.split(node.astext()))
-        elif self.is_meta_keywords(node, nodetype):
+        elif isinstance(node, addnodes.meta) and self.is_meta_keywords(node):
            keywords = node['content']
            keywords = [keyword.strip() for keyword in keywords.split(',')]
            self.found_words.extend(keywords)
@@ -411,7 +417,7 @@ class IndexBuilder:
            wordnames.intersection_update(docnames)

    def feed(self, docname, filename, title, doctree):
-        # type: (unicode, unicode, unicode, nodes.Node) -> None
+        # type: (unicode, unicode, unicode, nodes.document) -> None
        """Feed a doctree to the index."""
        self._titles[docname] = title
        self._filenames[docname] = filename
@@ -457,10 +463,7 @@ class IndexBuilder:
    def get_js_stemmer_rawcode(self):
        # type: () -> unicode
        if self.lang.js_stemmer_rawcode:
-            return path.join(
-                sphinx.package_dir, 'search',
-                'non-minified-js',
-                self.lang.js_stemmer_rawcode
-            )
+            return path.join(package_dir, 'search', 'non-minified-js',
+                             self.lang.js_stemmer_rawcode)
        else:
            return None