Fix #705: read module source in ModuleAnalyzer in binary mode, decode afterwards.

This commit is contained in:
Georg Brandl 2011-09-19 09:03:07 +02:00
parent 28609cc9b9
commit 7fa67682ac
2 changed files with 21 additions and 8 deletions

View File

@ -10,13 +10,12 @@
"""
from os import path
from cStringIO import StringIO
from sphinx.errors import PycodeError
from sphinx.pycode import nodes
from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
from sphinx.util import get_module_source, detect_encoding
from sphinx.util.pycompat import next
from sphinx.util.pycompat import next, StringIO, BytesIO, TextIOWrapper
from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
@ -170,14 +169,16 @@ class ModuleAnalyzer(object):
@classmethod
def for_string(cls, string, modname, srcname='<string>'):
return cls(StringIO(string), modname, srcname)
if isinstance(string, bytes):
return cls(BytesIO(string), modname, srcname)
return cls(StringIO(string), modname, srcname, decoded=True)
@classmethod
def for_file(cls, filename, modname):
if ('file', filename) in cls.cache:
return cls.cache['file', filename]
try:
fileobj = open(filename, 'r')
fileobj = open(filename, 'rb')
except Exception, err:
raise PycodeError('error opening %r' % filename, err)
obj = cls(fileobj, modname, filename)
@ -204,7 +205,7 @@ class ModuleAnalyzer(object):
cls.cache['module', modname] = obj
return obj
def __init__(self, source, modname, srcname):
def __init__(self, source, modname, srcname, decoded=False):
# name of the module
self.modname = modname
# name of the source file
@ -214,9 +215,15 @@ class ModuleAnalyzer(object):
# cache the source code as well
pos = self.source.tell()
self.encoding = detect_encoding(self.source.readline)
self.code = self.source.read()
self.source.seek(pos)
if not decoded:
self.encoding = detect_encoding(self.source.readline)
self.code = self.source.read().decode(self.encoding)
self.source.seek(pos)
self.source = TextIOWrapper(self.source, self.encoding)
else:
self.encoding = None
self.code = self.source.read()
self.source.seek(pos)
# will be filled by tokenize()
self.tokens = None

View File

@ -25,6 +25,8 @@ if sys.version_info >= (3, 0):
bytes = bytes
# prefix for Unicode strings
u = ''
# StringIO/BytesIO classes
from io import StringIO, BytesIO, TextIOWrapper
# support for running 2to3 over config files
def convert_with_2to3(filepath):
from lib2to3.refactor import RefactoringTool, get_fixers_from_package
@ -48,8 +50,12 @@ else:
b = str
bytes = str
u = 'u'
from StringIO import StringIO
BytesIO = StringIO
# no need to refactor on 2.x versions
convert_with_2to3 = None
def TextIOWrapper(stream, encoding):
return codecs.lookup(encoding or 'ascii')[2](stream)
# ------------------------------------------------------------------------------