Fix #705: read module source in ModuleAnalyzer in binary mode, decode afterwards.

This commit is contained in:
Georg Brandl 2011-09-19 09:03:07 +02:00
parent 28609cc9b9
commit 7fa67682ac
2 changed files with 21 additions and 8 deletions

View File

@ -10,13 +10,12 @@
""" """
from os import path from os import path
from cStringIO import StringIO
from sphinx.errors import PycodeError from sphinx.errors import PycodeError
from sphinx.pycode import nodes from sphinx.pycode import nodes
from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
from sphinx.util import get_module_source, detect_encoding from sphinx.util import get_module_source, detect_encoding
from sphinx.util.pycompat import next from sphinx.util.pycompat import next, StringIO, BytesIO, TextIOWrapper
from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
@ -170,14 +169,16 @@ class ModuleAnalyzer(object):
@classmethod @classmethod
def for_string(cls, string, modname, srcname='<string>'): def for_string(cls, string, modname, srcname='<string>'):
return cls(StringIO(string), modname, srcname) if isinstance(string, bytes):
return cls(BytesIO(string), modname, srcname)
return cls(StringIO(string), modname, srcname, decoded=True)
@classmethod @classmethod
def for_file(cls, filename, modname): def for_file(cls, filename, modname):
if ('file', filename) in cls.cache: if ('file', filename) in cls.cache:
return cls.cache['file', filename] return cls.cache['file', filename]
try: try:
fileobj = open(filename, 'r') fileobj = open(filename, 'rb')
except Exception, err: except Exception, err:
raise PycodeError('error opening %r' % filename, err) raise PycodeError('error opening %r' % filename, err)
obj = cls(fileobj, modname, filename) obj = cls(fileobj, modname, filename)
@ -204,7 +205,7 @@ class ModuleAnalyzer(object):
cls.cache['module', modname] = obj cls.cache['module', modname] = obj
return obj return obj
def __init__(self, source, modname, srcname): def __init__(self, source, modname, srcname, decoded=False):
# name of the module # name of the module
self.modname = modname self.modname = modname
# name of the source file # name of the source file
@ -214,9 +215,15 @@ class ModuleAnalyzer(object):
# cache the source code as well # cache the source code as well
pos = self.source.tell() pos = self.source.tell()
self.encoding = detect_encoding(self.source.readline) if not decoded:
self.code = self.source.read() self.encoding = detect_encoding(self.source.readline)
self.source.seek(pos) self.code = self.source.read().decode(self.encoding)
self.source.seek(pos)
self.source = TextIOWrapper(self.source, self.encoding)
else:
self.encoding = None
self.code = self.source.read()
self.source.seek(pos)
# will be filled by tokenize() # will be filled by tokenize()
self.tokens = None self.tokens = None

View File

@ -25,6 +25,8 @@ if sys.version_info >= (3, 0):
bytes = bytes bytes = bytes
# prefix for Unicode strings # prefix for Unicode strings
u = '' u = ''
# StringIO/BytesIO classes
from io import StringIO, BytesIO, TextIOWrapper
# support for running 2to3 over config files # support for running 2to3 over config files
def convert_with_2to3(filepath): def convert_with_2to3(filepath):
from lib2to3.refactor import RefactoringTool, get_fixers_from_package from lib2to3.refactor import RefactoringTool, get_fixers_from_package
@ -48,8 +50,12 @@ else:
b = str b = str
bytes = str bytes = str
u = 'u' u = 'u'
from StringIO import StringIO
BytesIO = StringIO
# no need to refactor on 2.x versions # no need to refactor on 2.x versions
convert_with_2to3 = None convert_with_2to3 = None
def TextIOWrapper(stream, encoding):
return codecs.lookup(encoding or 'ascii')[2](stream)
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------