From 2d0345a7d9ad1ddfd9203db7e1e0cb0ce558da8a Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sat, 21 Feb 2009 19:58:52 +0100 Subject: [PATCH] #71: If a decoding error occurs in source files, print a warning and replace the characters by "?". --- CHANGES | 3 +++ sphinx/environment.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/CHANGES b/CHANGES index 17433eb84..407455d3f 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,9 @@ Release 0.5.2 (in development) ============================== +* #71: If a decoding error occurs in source files, print a + warning and replace the characters by "?". + * Fix a problem in the HTML search if the index takes too long to load. diff --git a/sphinx/environment.py b/sphinx/environment.py index 43b44b76b..2a9cf3019 100644 --- a/sphinx/environment.py +++ b/sphinx/environment.py @@ -14,6 +14,7 @@ import os import time import heapq import types +import codecs import imghdr import difflib import cPickle as pickle @@ -496,6 +497,21 @@ class BuildEnvironment: # --------- SINGLE FILE READING -------------------------------------------- + def warn_and_replace(self, error): + """ + Custom decoding error handler that warns and replaces. + """ + linestart = error.object.rfind('\n', None, error.start) + lineend = error.object.find('\n', error.start) + if lineend == -1: lineend = len(error.object) + lineno = error.object.count('\n', 0, error.start) + 1 + self.warn(self.docname, 'undecodable source characters, ' + 'replacing with "?": %r' % + (error.object[linestart+1:error.start] + '>>>' + + error.object[error.start:error.end] + '<<<' + + error.object[error.end:lineend]), lineno) + return (u'?', error.end) + def read_doc(self, docname, src_path=None, save_parsed=True, app=None): """ Parse a file and add/update inventory entries for the doctree. @@ -521,7 +537,12 @@ class BuildEnvironment: self.docname = docname self.settings['input_encoding'] = self.config.source_encoding + codecs.register_error('sphinx', self.warn_and_replace) + class SphinxSourceClass(FileInput): + def decode(self_, data): + return data.decode(self_.encoding, 'sphinx') + def read(self): data = FileInput.read(self) if app: