Merge pull request #10337 from AA-Turner/reuse-publisher

Cache `Publisher` objects to speed up Sphinx
This commit is contained in:
Takeshi KOMIYA 2022-05-08 02:24:27 +09:00 committed by GitHub
commit 431caac943
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 90 additions and 11 deletions

View File

@ -54,6 +54,7 @@ Deprecated
* The ``language`` argument of ``sphinx.util.i18n:format_date()`` becomes
required
* ``sphinx.builders.html.html5_ready``
* ``sphinx.io.read_doc()``
* ``sphinx.util.docutils.__version_info__``
* ``sphinx.util.docutils.is_html5_writer_available()``
* ``sphinx.writers.latex.LaTeXWriter.docclasses``

View File

@ -47,6 +47,11 @@ The following is a list of deprecated interfaces.
- 7.0
- N/A
* - ``sphinx.io.read_doc()``
- 5.0
- 7.0
- ``sphinx.builders.Builder.read_doc()``
* - ``sphinx.util.docutils.__version_info__``
- 5.0
- 7.0

View File

@ -1,5 +1,6 @@
"""Builder superclass for all builders."""
import codecs
import pickle
import time
from os import path
@ -14,9 +15,9 @@ from sphinx.environment import CONFIG_CHANGED_REASON, CONFIG_OK, BuildEnvironmen
from sphinx.environment.adapters.asset import ImageAdapter
from sphinx.errors import SphinxError
from sphinx.events import EventManager
from sphinx.io import read_doc
from sphinx.locale import __
from sphinx.util import import_object, logging, progress_message, rst, status_iterator
from sphinx.util import (UnicodeDecodeErrorHandler, get_filetype, import_object, logging,
progress_message, rst, status_iterator)
from sphinx.util.build_phase import BuildPhase
from sphinx.util.console import bold # type: ignore
from sphinx.util.docutils import sphinx_domains
@ -464,8 +465,21 @@ class Builder:
if path.isfile(docutilsconf):
self.env.note_dependency(docutilsconf)
filename = self.env.doc2path(docname)
filetype = get_filetype(self.app.config.source_suffix, filename)
publisher = self.app.registry.get_publisher(self.app, filetype)
with sphinx_domains(self.env), rst.default_role(docname, self.config.default_role):
doctree = read_doc(self.app, self.env, self.env.doc2path(docname))
# set up error_handler for the target document
codecs.register_error('sphinx', UnicodeDecodeErrorHandler(docname)) # type: ignore
publisher.set_source(source_path=filename)
publisher.publish()
doctree = publisher.document
# The settings object is reused by the Publisher for each document.
# Becuase we modify the settings object in ``write_doctree``, we
# need to ensure that each doctree has an independent copy.
doctree.settings = doctree.settings.copy()
# store time of reading, for outdated files detection
# (Some filesystems have coarse timestamp resolution;

View File

@ -11,8 +11,9 @@ from os import path
from typing import IO, Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Type
from urllib.parse import quote
import docutils.readers.doctree
from docutils import nodes
from docutils.core import publish_parts
from docutils.core import Publisher
from docutils.frontend import OptionParser
from docutils.io import DocTreeInput, StringOutput
from docutils.nodes import Node
@ -207,6 +208,19 @@ class StandaloneHTMLBuilder(Builder):
# JS files
self.script_files: List[JavaScript] = []
# Cached Publisher for writing doctrees to HTML
reader = docutils.readers.doctree.Reader(parser_name='restructuredtext')
pub = Publisher(
reader=reader,
parser=reader.parser,
writer=HTMLWriter(self),
source_class=DocTreeInput,
destination=StringOutput(encoding='unicode'),
)
op = pub.setup_option_parser(output_encoding='unicode', traceback=True)
pub.settings = op.get_default_values()
self._publisher = pub
def init(self) -> None:
self.build_info = self.create_build_info()
# basename of images directory
@ -417,15 +431,12 @@ class StandaloneHTMLBuilder(Builder):
"""Utility: Render a lone doctree node."""
if node is None:
return {'fragment': ''}
doc = new_document('<partial node>')
doc.append(node)
writer = HTMLWriter(self)
return publish_parts(reader_name='doctree',
writer=writer,
source_class=DocTreeInput,
settings_overrides={'output_encoding': 'unicode'},
source=doc)
self._publisher.set_source(doc)
self._publisher.publish()
return self._publisher.writer.parts
def prepare_writing(self, docnames: Set[str]) -> None:
# create the search indexer

View File

@ -1,5 +1,6 @@
"""Input/Output files"""
import codecs
import warnings
from typing import TYPE_CHECKING, Any, List, Type
from docutils import nodes
@ -14,6 +15,7 @@ from docutils.transforms.references import DanglingReferences
from docutils.writers import UnfilteredWriter
from sphinx import addnodes
from sphinx.deprecation import RemovedInSphinx70Warning
from sphinx.environment import BuildEnvironment
from sphinx.transforms import (AutoIndexUpgrader, DoctreeReadEvent, FigureAligner,
SphinxTransformer)
@ -155,6 +157,9 @@ class SphinxFileInput(FileInput):
def read_doc(app: "Sphinx", env: BuildEnvironment, filename: str) -> nodes.document:
"""Parse a document and convert to doctree."""
warnings.warn('sphinx.io.read_doc() is deprecated.',
RemovedInSphinx70Warning, stacklevel=2)
# set up error_handler for the target document
error_handler = UnicodeDecodeErrorHandler(env.docname)
codecs.register_error('sphinx', error_handler) # type: ignore
@ -180,3 +185,32 @@ def read_doc(app: "Sphinx", env: BuildEnvironment, filename: str) -> nodes.docum
pub.set_source(source_path=filename)
pub.publish()
return pub.document
def create_publisher(app: "Sphinx", filetype: str) -> Publisher:
reader = SphinxStandaloneReader()
reader.setup(app)
parser = app.registry.create_source_parser(app, filetype)
if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == ():
# a workaround for recommonmark
# If recommonmark.AutoStrictify is enabled, the parser invokes reST parser
# internally. But recommonmark-0.4.0 does not provide settings_spec for reST
# parser. As a workaround, this copies settings_spec for RSTParser to the
# CommonMarkParser.
from docutils.parsers.rst import Parser as RSTParser
parser.settings_spec = RSTParser.settings_spec
pub = Publisher(
reader=reader,
parser=parser,
writer=SphinxDummyWriter(),
source_class=SphinxFileInput,
destination=NullOutput()
)
# Propagate exceptions by default when used programmatically:
defaults = {"traceback": True, **app.env.settings}
# Set default settings
pub.settings = pub.setup_option_parser(**defaults).get_default_values() # type: ignore
return pub

View File

@ -8,6 +8,7 @@ from typing import (TYPE_CHECKING, Any, Callable, Dict, Iterator, List, Optional
Union)
from docutils import nodes
from docutils.core import Publisher
from docutils.io import Input
from docutils.nodes import Element, Node, TextElement
from docutils.parsers import Parser
@ -27,6 +28,7 @@ from sphinx.domains.std import GenericObject, Target
from sphinx.environment import BuildEnvironment
from sphinx.errors import ExtensionError, SphinxError, VersionRequirementError
from sphinx.extension import Extension
from sphinx.io import create_publisher
from sphinx.locale import __
from sphinx.parsers import Parser as SphinxParser
from sphinx.roles import XRefRole
@ -125,6 +127,9 @@ class SphinxComponentRegistry:
#: additional transforms; list of transforms
self.transforms: List[Type[Transform]] = []
# private cache of Docutils Publishers (file type -> publisher object)
self.publishers: Dict[str, Publisher] = {}
def add_builder(self, builder: Type[Builder], override: bool = False) -> None:
logger.debug('[app] adding builder: %r', builder)
if not hasattr(builder, 'name'):
@ -461,6 +466,15 @@ class SphinxComponentRegistry:
envversion['sphinx'] = ENV_VERSION
return envversion
def get_publisher(self, app: "Sphinx", filetype: str) -> Publisher:
try:
return self.publishers[filetype]
except KeyError:
pass
publisher = create_publisher(app, filetype)
self.publishers[filetype] = publisher
return publisher
def merge_source_suffix(app: "Sphinx", config: Config) -> None:
"""Merge any user-specified source_suffix with any added by extensions."""