Merge pull request #5729 from tk0miya/refactor_io

Add UnicodeDecodeErrorHandler as a error_handler for open()
This commit is contained in:
Takeshi KOMIYA 2018-12-07 23:22:15 +09:00 committed by GitHub
commit 4493d7bf07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 16 deletions

View File

@ -21,7 +21,6 @@ from six import text_type
from typing import Any, Union # NOQA
from sphinx.deprecation import RemovedInSphinx30Warning
from sphinx.locale import __
from sphinx.transforms import (
ApplySourceWorkaround, ExtraTranslatableNodes, CitationReferences,
DefaultSubstitutions, MoveModuleTargets, HandleCodeBlocks, SortIds,
@ -35,6 +34,7 @@ from sphinx.transforms.i18n import (
)
from sphinx.transforms.references import SphinxDomains, SubstitutionDefinitionsRemover
from sphinx.util import logging
from sphinx.util import UnicodeDecodeErrorHandler
from sphinx.util.docutils import LoggingReporter
from sphinx.util.rst import append_epilog, docinfo_re, prepend_prolog
from sphinx.versioning import UIDTransform
@ -167,9 +167,6 @@ class SphinxBaseFileInput(FileInput):
self.app = app
self.env = env
# set up error handler
codecs.register_error('sphinx', self.warn_and_replace) # type: ignore
kwds['error_handler'] = 'sphinx' # py3: handle error on open.
super(SphinxBaseFileInput, self).__init__(*args, **kwds)
@ -194,18 +191,11 @@ class SphinxBaseFileInput(FileInput):
def warn_and_replace(self, error):
# type: (Any) -> Tuple
"""Custom decoding error handler that warns and replaces."""
linestart = error.object.rfind(b'\n', 0, error.start)
lineend = error.object.find(b'\n', error.start)
if lineend == -1:
lineend = len(error.object)
lineno = error.object.count(b'\n', 0, error.start) + 1
logger.warning(__('undecodable source characters, replacing with "?": %r'),
(error.object[linestart + 1:error.start] + b'>>>' +
error.object[error.start:error.end] + b'<<<' +
error.object[error.end:lineend]),
location=(self.env.docname, lineno))
return (u'?', error.end)
warnings.warn('SphinxBaseFileInput.warn_and_replace() is deprecated. '
'Use UnicodeDecodeErrorHandler instead.',
RemovedInSphinx30Warning, stacklevel=2)
return UnicodeDecodeErrorHandler(self.env.docname)(error)
class SphinxFileInput(SphinxBaseFileInput):
@ -294,6 +284,10 @@ def get_filetype(source_suffix, filename):
def read_doc(app, env, filename):
# type: (Sphinx, BuildEnvironment, unicode) -> nodes.document
"""Parse a document and convert to doctree."""
# set up error_handler for the target document
error_handler = UnicodeDecodeErrorHandler(env.docname)
codecs.register_error('sphinx', error_handler) # type: ignore
filetype = get_filetype(app.config.source_suffix, filename)
input_class = app.registry.get_source_input(filetype)
reader = SphinxStandaloneReader(app)

View File

@ -32,6 +32,7 @@ from six import text_type
from sphinx.deprecation import RemovedInSphinx30Warning, RemovedInSphinx40Warning
from sphinx.errors import PycodeError, SphinxParallelError, ExtensionError
from sphinx.locale import __
from sphinx.util import logging
from sphinx.util.console import strip_colors, colorize, bold, term_width_line # type: ignore
from sphinx.util.fileutil import copy_asset_file
@ -405,6 +406,28 @@ def detect_encoding(readline):
return default
class UnicodeDecodeErrorHandler:
"""Custom error handler for open() that warns and replaces."""
def __init__(self, docname):
# type: (unicode) -> None
self.docname = docname
def __call__(self, error):
# type: (UnicodeDecodeError) -> Tuple[Union[unicode, str], int]
linestart = error.object.rfind(b'\n', 0, error.start)
lineend = error.object.find(b'\n', error.start)
if lineend == -1:
lineend = len(error.object)
lineno = error.object.count(b'\n', 0, error.start) + 1
logger.warning(__('undecodable source characters, replacing with "?": %r'),
(error.object[linestart + 1:error.start] + b'>>>' +
error.object[error.start:error.end] + b'<<<' +
error.object[error.end:lineend]),
location=(self.docname, lineno))
return (u'?', error.end)
# Low-level utility functions and classes.
class Tee: