Merge pull request #5729 from tk0miya/refactor_io

Add UnicodeDecodeErrorHandler as a error_handler for open()
This commit is contained in:
Takeshi KOMIYA 2018-12-07 23:22:15 +09:00 committed by GitHub
commit 4493d7bf07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 16 deletions

View File

@ -21,7 +21,6 @@ from six import text_type
from typing import Any, Union # NOQA from typing import Any, Union # NOQA
from sphinx.deprecation import RemovedInSphinx30Warning from sphinx.deprecation import RemovedInSphinx30Warning
from sphinx.locale import __
from sphinx.transforms import ( from sphinx.transforms import (
ApplySourceWorkaround, ExtraTranslatableNodes, CitationReferences, ApplySourceWorkaround, ExtraTranslatableNodes, CitationReferences,
DefaultSubstitutions, MoveModuleTargets, HandleCodeBlocks, SortIds, DefaultSubstitutions, MoveModuleTargets, HandleCodeBlocks, SortIds,
@ -35,6 +34,7 @@ from sphinx.transforms.i18n import (
) )
from sphinx.transforms.references import SphinxDomains, SubstitutionDefinitionsRemover from sphinx.transforms.references import SphinxDomains, SubstitutionDefinitionsRemover
from sphinx.util import logging from sphinx.util import logging
from sphinx.util import UnicodeDecodeErrorHandler
from sphinx.util.docutils import LoggingReporter from sphinx.util.docutils import LoggingReporter
from sphinx.util.rst import append_epilog, docinfo_re, prepend_prolog from sphinx.util.rst import append_epilog, docinfo_re, prepend_prolog
from sphinx.versioning import UIDTransform from sphinx.versioning import UIDTransform
@ -167,9 +167,6 @@ class SphinxBaseFileInput(FileInput):
self.app = app self.app = app
self.env = env self.env = env
# set up error handler
codecs.register_error('sphinx', self.warn_and_replace) # type: ignore
kwds['error_handler'] = 'sphinx' # py3: handle error on open. kwds['error_handler'] = 'sphinx' # py3: handle error on open.
super(SphinxBaseFileInput, self).__init__(*args, **kwds) super(SphinxBaseFileInput, self).__init__(*args, **kwds)
@ -194,18 +191,11 @@ class SphinxBaseFileInput(FileInput):
def warn_and_replace(self, error): def warn_and_replace(self, error):
# type: (Any) -> Tuple # type: (Any) -> Tuple
"""Custom decoding error handler that warns and replaces.""" warnings.warn('SphinxBaseFileInput.warn_and_replace() is deprecated. '
linestart = error.object.rfind(b'\n', 0, error.start) 'Use UnicodeDecodeErrorHandler instead.',
lineend = error.object.find(b'\n', error.start) RemovedInSphinx30Warning, stacklevel=2)
if lineend == -1:
lineend = len(error.object) return UnicodeDecodeErrorHandler(self.env.docname)(error)
lineno = error.object.count(b'\n', 0, error.start) + 1
logger.warning(__('undecodable source characters, replacing with "?": %r'),
(error.object[linestart + 1:error.start] + b'>>>' +
error.object[error.start:error.end] + b'<<<' +
error.object[error.end:lineend]),
location=(self.env.docname, lineno))
return (u'?', error.end)
class SphinxFileInput(SphinxBaseFileInput): class SphinxFileInput(SphinxBaseFileInput):
@ -294,6 +284,10 @@ def get_filetype(source_suffix, filename):
def read_doc(app, env, filename): def read_doc(app, env, filename):
# type: (Sphinx, BuildEnvironment, unicode) -> nodes.document # type: (Sphinx, BuildEnvironment, unicode) -> nodes.document
"""Parse a document and convert to doctree.""" """Parse a document and convert to doctree."""
# set up error_handler for the target document
error_handler = UnicodeDecodeErrorHandler(env.docname)
codecs.register_error('sphinx', error_handler) # type: ignore
filetype = get_filetype(app.config.source_suffix, filename) filetype = get_filetype(app.config.source_suffix, filename)
input_class = app.registry.get_source_input(filetype) input_class = app.registry.get_source_input(filetype)
reader = SphinxStandaloneReader(app) reader = SphinxStandaloneReader(app)

View File

@ -32,6 +32,7 @@ from six import text_type
from sphinx.deprecation import RemovedInSphinx30Warning, RemovedInSphinx40Warning from sphinx.deprecation import RemovedInSphinx30Warning, RemovedInSphinx40Warning
from sphinx.errors import PycodeError, SphinxParallelError, ExtensionError from sphinx.errors import PycodeError, SphinxParallelError, ExtensionError
from sphinx.locale import __
from sphinx.util import logging from sphinx.util import logging
from sphinx.util.console import strip_colors, colorize, bold, term_width_line # type: ignore from sphinx.util.console import strip_colors, colorize, bold, term_width_line # type: ignore
from sphinx.util.fileutil import copy_asset_file from sphinx.util.fileutil import copy_asset_file
@ -405,6 +406,28 @@ def detect_encoding(readline):
return default return default
class UnicodeDecodeErrorHandler:
"""Custom error handler for open() that warns and replaces."""
def __init__(self, docname):
# type: (unicode) -> None
self.docname = docname
def __call__(self, error):
# type: (UnicodeDecodeError) -> Tuple[Union[unicode, str], int]
linestart = error.object.rfind(b'\n', 0, error.start)
lineend = error.object.find(b'\n', error.start)
if lineend == -1:
lineend = len(error.object)
lineno = error.object.count(b'\n', 0, error.start) + 1
logger.warning(__('undecodable source characters, replacing with "?": %r'),
(error.object[linestart + 1:error.start] + b'>>>' +
error.object[error.start:error.end] + b'<<<' +
error.object[error.end:lineend]),
location=(self.docname, lineno))
return (u'?', error.end)
# Low-level utility functions and classes. # Low-level utility functions and classes.
class Tee: class Tee: