diff --git a/sphinx/io.py b/sphinx/io.py index ad6985778..89d2bc209 100644 --- a/sphinx/io.py +++ b/sphinx/io.py @@ -21,7 +21,6 @@ from six import text_type from typing import Any, Union # NOQA from sphinx.deprecation import RemovedInSphinx30Warning -from sphinx.locale import __ from sphinx.transforms import ( ApplySourceWorkaround, ExtraTranslatableNodes, CitationReferences, DefaultSubstitutions, MoveModuleTargets, HandleCodeBlocks, SortIds, @@ -35,6 +34,7 @@ from sphinx.transforms.i18n import ( ) from sphinx.transforms.references import SphinxDomains, SubstitutionDefinitionsRemover from sphinx.util import logging +from sphinx.util import UnicodeDecodeErrorHandler from sphinx.util.docutils import LoggingReporter from sphinx.util.rst import append_epilog, docinfo_re, prepend_prolog from sphinx.versioning import UIDTransform @@ -167,9 +167,6 @@ class SphinxBaseFileInput(FileInput): self.app = app self.env = env - # set up error handler - codecs.register_error('sphinx', self.warn_and_replace) # type: ignore - kwds['error_handler'] = 'sphinx' # py3: handle error on open. super(SphinxBaseFileInput, self).__init__(*args, **kwds) @@ -194,18 +191,11 @@ class SphinxBaseFileInput(FileInput): def warn_and_replace(self, error): # type: (Any) -> Tuple - """Custom decoding error handler that warns and replaces.""" - linestart = error.object.rfind(b'\n', 0, error.start) - lineend = error.object.find(b'\n', error.start) - if lineend == -1: - lineend = len(error.object) - lineno = error.object.count(b'\n', 0, error.start) + 1 - logger.warning(__('undecodable source characters, replacing with "?": %r'), - (error.object[linestart + 1:error.start] + b'>>>' + - error.object[error.start:error.end] + b'<<<' + - error.object[error.end:lineend]), - location=(self.env.docname, lineno)) - return (u'?', error.end) + warnings.warn('SphinxBaseFileInput.warn_and_replace() is deprecated. ' + 'Use UnicodeDecodeErrorHandler instead.', + RemovedInSphinx30Warning, stacklevel=2) + + return UnicodeDecodeErrorHandler(self.env.docname)(error) class SphinxFileInput(SphinxBaseFileInput): @@ -294,6 +284,10 @@ def get_filetype(source_suffix, filename): def read_doc(app, env, filename): # type: (Sphinx, BuildEnvironment, unicode) -> nodes.document """Parse a document and convert to doctree.""" + # set up error_handler for the target document + error_handler = UnicodeDecodeErrorHandler(env.docname) + codecs.register_error('sphinx', error_handler) # type: ignore + filetype = get_filetype(app.config.source_suffix, filename) input_class = app.registry.get_source_input(filetype) reader = SphinxStandaloneReader(app) diff --git a/sphinx/util/__init__.py b/sphinx/util/__init__.py index d57f5627a..459434804 100644 --- a/sphinx/util/__init__.py +++ b/sphinx/util/__init__.py @@ -32,6 +32,7 @@ from six import text_type from sphinx.deprecation import RemovedInSphinx30Warning, RemovedInSphinx40Warning from sphinx.errors import PycodeError, SphinxParallelError, ExtensionError +from sphinx.locale import __ from sphinx.util import logging from sphinx.util.console import strip_colors, colorize, bold, term_width_line # type: ignore from sphinx.util.fileutil import copy_asset_file @@ -405,6 +406,28 @@ def detect_encoding(readline): return default +class UnicodeDecodeErrorHandler: + """Custom error handler for open() that warns and replaces.""" + + def __init__(self, docname): + # type: (unicode) -> None + self.docname = docname + + def __call__(self, error): + # type: (UnicodeDecodeError) -> Tuple[Union[unicode, str], int] + linestart = error.object.rfind(b'\n', 0, error.start) + lineend = error.object.find(b'\n', error.start) + if lineend == -1: + lineend = len(error.object) + lineno = error.object.count(b'\n', 0, error.start) + 1 + logger.warning(__('undecodable source characters, replacing with "?": %r'), + (error.object[linestart + 1:error.start] + b'>>>' + + error.object[error.start:error.end] + b'<<<' + + error.object[error.end:lineend]), + location=(self.docname, lineno)) + return (u'?', error.end) + + # Low-level utility functions and classes. class Tee: