Move `UnicodeDecodeErrorHandler to sphinx.builders`

This commit is contained in:
Adam Turner 2024-10-05 17:05:45 +01:00
parent 71598455a2
commit d3df44cc8b
2 changed files with 28 additions and 29 deletions

View File

@ -18,7 +18,6 @@ from sphinx.environment.adapters.asset import ImageAdapter
from sphinx.errors import SphinxError from sphinx.errors import SphinxError
from sphinx.locale import __ from sphinx.locale import __
from sphinx.util import ( from sphinx.util import (
UnicodeDecodeErrorHandler,
get_filetype, get_filetype,
logging, logging,
rst, rst,
@ -618,7 +617,8 @@ class Builder:
rst.default_role(docname, self.config.default_role), rst.default_role(docname, self.config.default_role),
): ):
# set up error_handler for the target document # set up error_handler for the target document
codecs.register_error('sphinx', UnicodeDecodeErrorHandler(docname)) # type: ignore[arg-type] error_handler = _UnicodeDecodeErrorHandler(docname)
codecs.register_error('sphinx', error_handler) # type: ignore[arg-type]
publisher.set_source(source_path=filename) publisher.set_source(source_path=filename)
publisher.publish() publisher.publish()
@ -813,3 +813,29 @@ class Builder:
except AttributeError: except AttributeError:
optname = f'{default}_{option}' optname = f'{default}_{option}'
return getattr(self.config, optname) return getattr(self.config, optname)
class _UnicodeDecodeErrorHandler:
"""Custom error handler for open() that warns and replaces."""
def __init__(self, docname: str, /) -> None:
self.docname = docname
def __call__(self, error: UnicodeDecodeError) -> tuple[str, int]:
line_start = error.object.rfind(b'\n', 0, error.start)
line_end = error.object.find(b'\n', error.start)
if line_end == -1:
line_end = len(error.object)
line_num = error.object.count(b'\n', 0, error.start) + 1
logger.warning(
__('undecodable source characters, replacing with "?": %r'),
(
error.object[line_start + 1 : error.start]
+ b'>>>'
+ error.object[error.start : error.end]
+ b'<<<'
+ error.object[error.end : line_end]
),
location=(self.docname, line_num),
)
return '?', error.end

View File

@ -9,7 +9,6 @@ import re
from typing import Any from typing import Any
from sphinx.errors import FiletypeNotFoundError from sphinx.errors import FiletypeNotFoundError
from sphinx.locale import __
from sphinx.util import _files, _importer, logging from sphinx.util import _files, _importer, logging
from sphinx.util import index_entries as _index_entries from sphinx.util import index_entries as _index_entries
from sphinx.util._lines import parse_line_num_spec as parselinenos # NoQA: F401 from sphinx.util._lines import parse_line_num_spec as parselinenos # NoQA: F401
@ -73,32 +72,6 @@ def _sha1(data: bytes = b'', **_kw: Any) -> hashlib._Hash:
return hashlib.sha1(data, usedforsecurity=False) return hashlib.sha1(data, usedforsecurity=False)
class UnicodeDecodeErrorHandler:
"""Custom error handler for open() that warns and replaces."""
def __init__(self, docname: str) -> None:
self.docname = docname
def __call__(self, error: UnicodeDecodeError) -> tuple[str, int]:
linestart = error.object.rfind(b'\n', 0, error.start)
lineend = error.object.find(b'\n', error.start)
if lineend == -1:
lineend = len(error.object)
lineno = error.object.count(b'\n', 0, error.start) + 1
logger.warning(
__('undecodable source characters, replacing with "?": %r'),
(
error.object[linestart + 1 : error.start]
+ b'>>>'
+ error.object[error.start : error.end]
+ b'<<<'
+ error.object[error.end : lineend]
),
location=(self.docname, lineno),
)
return ('?', error.end)
# deprecated name -> (object to return, canonical path or empty string) # deprecated name -> (object to return, canonical path or empty string)
_DEPRECATED_OBJECTS: dict[str, tuple[Any, str, tuple[int, int]]] = { _DEPRECATED_OBJECTS: dict[str, tuple[Any, str, tuple[int, int]]] = {
'split_index_msg': ( 'split_index_msg': (