From 1e1de932a6edf6a9b38e29f12f1fd4352f862481 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sat, 9 Apr 2022 20:17:33 +0100 Subject: [PATCH 1/5] Cache publisher for rendering documents --- sphinx/builders/html/__init__.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py index f713fb2b5..ac78461a6 100644 --- a/sphinx/builders/html/__init__.py +++ b/sphinx/builders/html/__init__.py @@ -10,8 +10,9 @@ from os import path from typing import IO, Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Type from urllib.parse import quote +import docutils.readers.doctree from docutils import nodes -from docutils.core import publish_parts +from docutils.core import Publisher from docutils.frontend import OptionParser from docutils.io import DocTreeInput, StringOutput from docutils.nodes import Node @@ -211,6 +212,19 @@ class StandaloneHTMLBuilder(Builder): # JS files self.script_files: List[JavaScript] = [] + # Cached Publisher for writing doctrees to HTML + reader = docutils.readers.doctree.Reader(parser_name='restructuredtext') + pub = Publisher( + reader=reader, + parser=reader.parser, + writer=HTMLWriter(self), + source_class=DocTreeInput, + destination=StringOutput(encoding='unicode'), + ) + op = pub.setup_option_parser(output_encoding='unicode', traceback=True) + pub.settings = op.get_default_values() + self._publisher = pub + def init(self) -> None: self.build_info = self.create_build_info() # basename of images directory @@ -421,15 +435,12 @@ class StandaloneHTMLBuilder(Builder): """Utility: Render a lone doctree node.""" if node is None: return {'fragment': ''} + doc = new_document('') doc.append(node) - - writer = HTMLWriter(self) - return publish_parts(reader_name='doctree', - writer=writer, - source_class=DocTreeInput, - settings_overrides={'output_encoding': 'unicode'}, - source=doc) + self._publisher.set_source(doc) + self._publisher.publish() + return self._publisher.writer.parts def prepare_writing(self, docnames: Set[str]) -> None: # create the search indexer From ab3b3e298000c08ab0d0e354a0415df0af6d4e17 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sat, 9 Apr 2022 21:31:57 +0100 Subject: [PATCH 2/5] Cache publisher for reading documents --- sphinx/builders/__init__.py | 4 +++- sphinx/io.py | 35 +++++++++-------------------- sphinx/registry.py | 44 +++++++++++++++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 28 deletions(-) diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py index e1817c445..8e246f074 100644 --- a/sphinx/builders/__init__.py +++ b/sphinx/builders/__init__.py @@ -464,8 +464,10 @@ class Builder: if path.isfile(docutilsconf): self.env.note_dependency(docutilsconf) + filename = self.env.doc2path(docname) + publisher = self.app.registry.create_publisher(self.app, filename) with sphinx_domains(self.env), rst.default_role(docname, self.config.default_role): - doctree = read_doc(self.app, self.env, self.env.doc2path(docname)) + doctree = read_doc(publisher, docname, filename) # store time of reading, for outdated files detection # (Some filesystems have coarse timestamp resolution; diff --git a/sphinx/io.py b/sphinx/io.py index 936631d51..57e2e0899 100644 --- a/sphinx/io.py +++ b/sphinx/io.py @@ -5,9 +5,8 @@ from typing import TYPE_CHECKING, Any, List, Type from docutils import nodes from docutils.core import Publisher from docutils.frontend import Values -from docutils.io import FileInput, Input, NullOutput +from docutils.io import FileInput, Input from docutils.parsers import Parser -from docutils.parsers.rst import Parser as RSTParser from docutils.readers import standalone from docutils.transforms import Transform from docutils.transforms.references import DanglingReferences @@ -20,7 +19,7 @@ from sphinx.transforms import (AutoIndexUpgrader, DoctreeReadEvent, FigureAligne from sphinx.transforms.i18n import (Locale, PreserveTranslatableMessages, RemoveTranslatableInline) from sphinx.transforms.references import SphinxDomains -from sphinx.util import UnicodeDecodeErrorHandler, get_filetype, logging +from sphinx.util import UnicodeDecodeErrorHandler, logging from sphinx.util.docutils import LoggingReporter from sphinx.versioning import UIDTransform @@ -153,30 +152,16 @@ class SphinxFileInput(FileInput): super().__init__(*args, **kwargs) -def read_doc(app: "Sphinx", env: BuildEnvironment, filename: str) -> nodes.document: +def read_doc(publisher: Publisher, docname: str, filename: str) -> nodes.document: """Parse a document and convert to doctree.""" # set up error_handler for the target document - error_handler = UnicodeDecodeErrorHandler(env.docname) + error_handler = UnicodeDecodeErrorHandler(docname) codecs.register_error('sphinx', error_handler) # type: ignore - reader = SphinxStandaloneReader() - reader.setup(app) - filetype = get_filetype(app.config.source_suffix, filename) - parser = app.registry.create_source_parser(app, filetype) - if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == (): - # a workaround for recommonmark - # If recommonmark.AutoStrictify is enabled, the parser invokes reST parser - # internally. But recommonmark-0.4.0 does not provide settings_spec for reST - # parser. As a workaround, this copies settings_spec for RSTParser to the - # CommonMarkParser. - parser.settings_spec = RSTParser.settings_spec + publisher.set_source(source_path=filename) + publisher.publish() - pub = Publisher(reader=reader, - parser=parser, - writer=SphinxDummyWriter(), - source_class=SphinxFileInput, - destination=NullOutput()) - pub.process_programmatic_settings(None, env.settings, None) - pub.set_source(source_path=filename) - pub.publish() - return pub.document + doctree = publisher.document + # settings get modified in ``write_doctree``; get a local copy + doctree.settings = doctree.settings.copy() + return doctree diff --git a/sphinx/registry.py b/sphinx/registry.py index 0f8010956..1edc90ebc 100644 --- a/sphinx/registry.py +++ b/sphinx/registry.py @@ -8,7 +8,8 @@ from typing import (TYPE_CHECKING, Any, Callable, Dict, Iterator, List, Optional Union) from docutils import nodes -from docutils.io import Input +from docutils.core import Publisher +from docutils.io import Input, NullOutput from docutils.nodes import Element, Node, TextElement from docutils.parsers import Parser from docutils.parsers.rst import Directive @@ -27,10 +28,11 @@ from sphinx.domains.std import GenericObject, Target from sphinx.environment import BuildEnvironment from sphinx.errors import ExtensionError, SphinxError, VersionRequirementError from sphinx.extension import Extension +from sphinx.io import SphinxDummyWriter, SphinxFileInput, SphinxStandaloneReader from sphinx.locale import __ from sphinx.parsers import Parser as SphinxParser from sphinx.roles import XRefRole -from sphinx.util import logging +from sphinx.util import get_filetype, logging from sphinx.util.logging import prefixed_warnings from sphinx.util.typing import RoleFunction, TitleGetter @@ -125,6 +127,9 @@ class SphinxComponentRegistry: #: additional transforms; list of transforms self.transforms: List[Type[Transform]] = [] + # private cache of Docutils Publishers (file type -> publisher object) + self._publishers: Dict[str, Publisher] = {} + def add_builder(self, builder: Type[Builder], override: bool = False) -> None: logger.debug('[app] adding builder: %r', builder) if not hasattr(builder, 'name'): @@ -461,6 +466,41 @@ class SphinxComponentRegistry: envversion['sphinx'] = ENV_VERSION return envversion + def create_publisher(self, app: "Sphinx", filename: str) -> Publisher: + filetype = get_filetype(app.config.source_suffix, filename) + try: + return self._publishers[filetype] + except KeyError: + pass + + reader = SphinxStandaloneReader() + reader.setup(app) + + parser = app.registry.create_source_parser(app, filetype) + if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == (): + # a workaround for recommonmark + # If recommonmark.AutoStrictify is enabled, the parser invokes reST parser + # internally. But recommonmark-0.4.0 does not provide settings_spec for reST + # parser. As a workaround, this copies settings_spec for RSTParser to the + # CommonMarkParser. + from docutils.parsers.rst import Parser as RSTParser + + parser.settings_spec = RSTParser.settings_spec + + pub = Publisher( + reader=reader, + parser=parser, + writer=SphinxDummyWriter(), + source_class=SphinxFileInput, + destination=NullOutput() + ) + # Propagate exceptions by default when used programmatically: + defaults = {"traceback": True, **app.env.settings} + # Set default settings + pub.settings = pub.setup_option_parser(**defaults).get_default_values() # type: ignore + self._publishers[filetype] = pub + return pub + def merge_source_suffix(app: "Sphinx", config: Config) -> None: """Merge any user-specified source_suffix with any added by extensions.""" From f346e0a11f358f0496524b1e3f5d0984722f7d85 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Mon, 2 May 2022 17:24:49 +0100 Subject: [PATCH 3/5] Move creation to `sphinx.io` --- sphinx/builders/__init__.py | 6 +++-- sphinx/io.py | 31 +++++++++++++++++++++++++- sphinx/registry.py | 44 ++++++++----------------------------- 3 files changed, 43 insertions(+), 38 deletions(-) diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py index 8e246f074..f73c1a5fc 100644 --- a/sphinx/builders/__init__.py +++ b/sphinx/builders/__init__.py @@ -16,7 +16,8 @@ from sphinx.errors import SphinxError from sphinx.events import EventManager from sphinx.io import read_doc from sphinx.locale import __ -from sphinx.util import import_object, logging, progress_message, rst, status_iterator +from sphinx.util import (get_filetype, import_object, logging, progress_message, rst, + status_iterator) from sphinx.util.build_phase import BuildPhase from sphinx.util.console import bold # type: ignore from sphinx.util.docutils import sphinx_domains @@ -465,7 +466,8 @@ class Builder: self.env.note_dependency(docutilsconf) filename = self.env.doc2path(docname) - publisher = self.app.registry.create_publisher(self.app, filename) + filetype = get_filetype(self.app.config.source_suffix, filename) + publisher = self.app.registry.get_publisher(self.app, filetype) with sphinx_domains(self.env), rst.default_role(docname, self.config.default_role): doctree = read_doc(publisher, docname, filename) diff --git a/sphinx/io.py b/sphinx/io.py index 57e2e0899..4ffa8e54d 100644 --- a/sphinx/io.py +++ b/sphinx/io.py @@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, List, Type from docutils import nodes from docutils.core import Publisher from docutils.frontend import Values -from docutils.io import FileInput, Input +from docutils.io import FileInput, Input, NullOutput from docutils.parsers import Parser from docutils.readers import standalone from docutils.transforms import Transform @@ -165,3 +165,32 @@ def read_doc(publisher: Publisher, docname: str, filename: str) -> nodes.documen # settings get modified in ``write_doctree``; get a local copy doctree.settings = doctree.settings.copy() return doctree + + +def create_publisher(app: "Sphinx", filetype: str) -> Publisher: + reader = SphinxStandaloneReader() + reader.setup(app) + + parser = app.registry.create_source_parser(app, filetype) + if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == (): + # a workaround for recommonmark + # If recommonmark.AutoStrictify is enabled, the parser invokes reST parser + # internally. But recommonmark-0.4.0 does not provide settings_spec for reST + # parser. As a workaround, this copies settings_spec for RSTParser to the + # CommonMarkParser. + from docutils.parsers.rst import Parser as RSTParser + + parser.settings_spec = RSTParser.settings_spec + + pub = Publisher( + reader=reader, + parser=parser, + writer=SphinxDummyWriter(), + source_class=SphinxFileInput, + destination=NullOutput() + ) + # Propagate exceptions by default when used programmatically: + defaults = {"traceback": True, **app.env.settings} + # Set default settings + pub.settings = pub.setup_option_parser(**defaults).get_default_values() # type: ignore + return pub diff --git a/sphinx/registry.py b/sphinx/registry.py index 1edc90ebc..6770abb02 100644 --- a/sphinx/registry.py +++ b/sphinx/registry.py @@ -9,7 +9,7 @@ from typing import (TYPE_CHECKING, Any, Callable, Dict, Iterator, List, Optional from docutils import nodes from docutils.core import Publisher -from docutils.io import Input, NullOutput +from docutils.io import Input from docutils.nodes import Element, Node, TextElement from docutils.parsers import Parser from docutils.parsers.rst import Directive @@ -28,11 +28,11 @@ from sphinx.domains.std import GenericObject, Target from sphinx.environment import BuildEnvironment from sphinx.errors import ExtensionError, SphinxError, VersionRequirementError from sphinx.extension import Extension -from sphinx.io import SphinxDummyWriter, SphinxFileInput, SphinxStandaloneReader +from sphinx.io import create_publisher from sphinx.locale import __ from sphinx.parsers import Parser as SphinxParser from sphinx.roles import XRefRole -from sphinx.util import get_filetype, logging +from sphinx.util import logging from sphinx.util.logging import prefixed_warnings from sphinx.util.typing import RoleFunction, TitleGetter @@ -128,7 +128,7 @@ class SphinxComponentRegistry: self.transforms: List[Type[Transform]] = [] # private cache of Docutils Publishers (file type -> publisher object) - self._publishers: Dict[str, Publisher] = {} + self.publishers: Dict[str, Publisher] = {} def add_builder(self, builder: Type[Builder], override: bool = False) -> None: logger.debug('[app] adding builder: %r', builder) @@ -466,40 +466,14 @@ class SphinxComponentRegistry: envversion['sphinx'] = ENV_VERSION return envversion - def create_publisher(self, app: "Sphinx", filename: str) -> Publisher: - filetype = get_filetype(app.config.source_suffix, filename) + def get_publisher(self, app: "Sphinx", filetype: str) -> Publisher: try: - return self._publishers[filetype] + return self.publishers[filetype] except KeyError: pass - - reader = SphinxStandaloneReader() - reader.setup(app) - - parser = app.registry.create_source_parser(app, filetype) - if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == (): - # a workaround for recommonmark - # If recommonmark.AutoStrictify is enabled, the parser invokes reST parser - # internally. But recommonmark-0.4.0 does not provide settings_spec for reST - # parser. As a workaround, this copies settings_spec for RSTParser to the - # CommonMarkParser. - from docutils.parsers.rst import Parser as RSTParser - - parser.settings_spec = RSTParser.settings_spec - - pub = Publisher( - reader=reader, - parser=parser, - writer=SphinxDummyWriter(), - source_class=SphinxFileInput, - destination=NullOutput() - ) - # Propagate exceptions by default when used programmatically: - defaults = {"traceback": True, **app.env.settings} - # Set default settings - pub.settings = pub.setup_option_parser(**defaults).get_default_values() # type: ignore - self._publishers[filetype] = pub - return pub + publisher = create_publisher(app, filetype) + self.publishers[filetype] = publisher + return publisher def merge_source_suffix(app: "Sphinx", config: Config) -> None: From 919eb1db912ae80c98b3edfbb303dcd04e2b3e9c Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sat, 7 May 2022 13:03:43 +0100 Subject: [PATCH 4/5] Inline `io.read_doc` and explain why settings are copied --- sphinx/builders/__init__.py | 18 ++++++++++++++---- sphinx/io.py | 19 ++----------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py index f73c1a5fc..a67948acc 100644 --- a/sphinx/builders/__init__.py +++ b/sphinx/builders/__init__.py @@ -1,5 +1,6 @@ """Builder superclass for all builders.""" +import codecs import pickle import time from os import path @@ -14,10 +15,9 @@ from sphinx.environment import CONFIG_CHANGED_REASON, CONFIG_OK, BuildEnvironmen from sphinx.environment.adapters.asset import ImageAdapter from sphinx.errors import SphinxError from sphinx.events import EventManager -from sphinx.io import read_doc from sphinx.locale import __ -from sphinx.util import (get_filetype, import_object, logging, progress_message, rst, - status_iterator) +from sphinx.util import (UnicodeDecodeErrorHandler, get_filetype, import_object, logging, + progress_message, rst, status_iterator) from sphinx.util.build_phase import BuildPhase from sphinx.util.console import bold # type: ignore from sphinx.util.docutils import sphinx_domains @@ -469,7 +469,17 @@ class Builder: filetype = get_filetype(self.app.config.source_suffix, filename) publisher = self.app.registry.get_publisher(self.app, filetype) with sphinx_domains(self.env), rst.default_role(docname, self.config.default_role): - doctree = read_doc(publisher, docname, filename) + # set up error_handler for the target document + codecs.register_error('sphinx', UnicodeDecodeErrorHandler(docname)) # type: ignore + + publisher.set_source(source_path=filename) + publisher.publish() + doctree = publisher.document + + # The settings object is reused by the Publisher for each document. + # Becuase we modify the settings object in ``write_doctree``, we + # need to ensure that each doctree has an independent copy. + doctree.settings = doctree.settings.copy() # store time of reading, for outdated files detection # (Some filesystems have coarse timestamp resolution; diff --git a/sphinx/io.py b/sphinx/io.py index 4ffa8e54d..b4cec7d3e 100644 --- a/sphinx/io.py +++ b/sphinx/io.py @@ -1,5 +1,5 @@ """Input/Output files""" -import codecs + from typing import TYPE_CHECKING, Any, List, Type from docutils import nodes @@ -19,7 +19,7 @@ from sphinx.transforms import (AutoIndexUpgrader, DoctreeReadEvent, FigureAligne from sphinx.transforms.i18n import (Locale, PreserveTranslatableMessages, RemoveTranslatableInline) from sphinx.transforms.references import SphinxDomains -from sphinx.util import UnicodeDecodeErrorHandler, logging +from sphinx.util import logging from sphinx.util.docutils import LoggingReporter from sphinx.versioning import UIDTransform @@ -152,21 +152,6 @@ class SphinxFileInput(FileInput): super().__init__(*args, **kwargs) -def read_doc(publisher: Publisher, docname: str, filename: str) -> nodes.document: - """Parse a document and convert to doctree.""" - # set up error_handler for the target document - error_handler = UnicodeDecodeErrorHandler(docname) - codecs.register_error('sphinx', error_handler) # type: ignore - - publisher.set_source(source_path=filename) - publisher.publish() - - doctree = publisher.document - # settings get modified in ``write_doctree``; get a local copy - doctree.settings = doctree.settings.copy() - return doctree - - def create_publisher(app: "Sphinx", filetype: str) -> Publisher: reader = SphinxStandaloneReader() reader.setup(app) From f9dce5764edd62b280904325cfecb4f962dbbd9b Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Sun, 8 May 2022 02:15:58 +0900 Subject: [PATCH 5/5] Mark sphinx.io:read_doc() deprecated --- CHANGES | 1 + doc/extdev/deprecated.rst | 5 +++++ sphinx/io.py | 39 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index e95c6a132..8bac61c1a 100644 --- a/CHANGES +++ b/CHANGES @@ -54,6 +54,7 @@ Deprecated * The ``language`` argument of ``sphinx.util.i18n:format_date()`` becomes required * ``sphinx.builders.html.html5_ready`` +* ``sphinx.io.read_doc()`` * ``sphinx.util.docutils.__version_info__`` * ``sphinx.util.docutils.is_html5_writer_available()`` * ``sphinx.writers.latex.LaTeXWriter.docclasses`` diff --git a/doc/extdev/deprecated.rst b/doc/extdev/deprecated.rst index 5a03f821a..98bd463a9 100644 --- a/doc/extdev/deprecated.rst +++ b/doc/extdev/deprecated.rst @@ -47,6 +47,11 @@ The following is a list of deprecated interfaces. - 7.0 - N/A + * - ``sphinx.io.read_doc()`` + - 5.0 + - 7.0 + - ``sphinx.builders.Builder.read_doc()`` + * - ``sphinx.util.docutils.__version_info__`` - 5.0 - 7.0 diff --git a/sphinx/io.py b/sphinx/io.py index b4cec7d3e..5ab7b2b63 100644 --- a/sphinx/io.py +++ b/sphinx/io.py @@ -1,5 +1,6 @@ """Input/Output files""" - +import codecs +import warnings from typing import TYPE_CHECKING, Any, List, Type from docutils import nodes @@ -7,19 +8,21 @@ from docutils.core import Publisher from docutils.frontend import Values from docutils.io import FileInput, Input, NullOutput from docutils.parsers import Parser +from docutils.parsers.rst import Parser as RSTParser from docutils.readers import standalone from docutils.transforms import Transform from docutils.transforms.references import DanglingReferences from docutils.writers import UnfilteredWriter from sphinx import addnodes +from sphinx.deprecation import RemovedInSphinx70Warning from sphinx.environment import BuildEnvironment from sphinx.transforms import (AutoIndexUpgrader, DoctreeReadEvent, FigureAligner, SphinxTransformer) from sphinx.transforms.i18n import (Locale, PreserveTranslatableMessages, RemoveTranslatableInline) from sphinx.transforms.references import SphinxDomains -from sphinx.util import logging +from sphinx.util import UnicodeDecodeErrorHandler, get_filetype, logging from sphinx.util.docutils import LoggingReporter from sphinx.versioning import UIDTransform @@ -152,6 +155,38 @@ class SphinxFileInput(FileInput): super().__init__(*args, **kwargs) +def read_doc(app: "Sphinx", env: BuildEnvironment, filename: str) -> nodes.document: + """Parse a document and convert to doctree.""" + warnings.warn('sphinx.io.read_doc() is deprecated.', + RemovedInSphinx70Warning, stacklevel=2) + + # set up error_handler for the target document + error_handler = UnicodeDecodeErrorHandler(env.docname) + codecs.register_error('sphinx', error_handler) # type: ignore + + reader = SphinxStandaloneReader() + reader.setup(app) + filetype = get_filetype(app.config.source_suffix, filename) + parser = app.registry.create_source_parser(app, filetype) + if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == (): + # a workaround for recommonmark + # If recommonmark.AutoStrictify is enabled, the parser invokes reST parser + # internally. But recommonmark-0.4.0 does not provide settings_spec for reST + # parser. As a workaround, this copies settings_spec for RSTParser to the + # CommonMarkParser. + parser.settings_spec = RSTParser.settings_spec + + pub = Publisher(reader=reader, + parser=parser, + writer=SphinxDummyWriter(), + source_class=SphinxFileInput, + destination=NullOutput()) + pub.process_programmatic_settings(None, env.settings, None) + pub.set_source(source_path=filename) + pub.publish() + return pub.document + + def create_publisher(app: "Sphinx", filetype: str) -> Publisher: reader = SphinxStandaloneReader() reader.setup(app)