mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Closes #657: viewcode now works correctly with source files that have non-ASCII encoding.
This commit is contained in:
parent
8965cf1095
commit
40c294f0c8
3
CHANGES
3
CHANGES
@ -1,6 +1,9 @@
|
||||
Release 1.0.8 (in development)
|
||||
==============================
|
||||
|
||||
* #657: viewcode now works correctly with source files that have
|
||||
non-ASCII encoding.
|
||||
|
||||
* #669: Respect the ``noindex`` flag option in py:module directives.
|
||||
|
||||
* #675: Fix IndexErrors when including nonexisting lines with
|
||||
|
@ -17,7 +17,7 @@ from cStringIO import StringIO
|
||||
from sphinx.errors import PycodeError
|
||||
from sphinx.pycode import nodes
|
||||
from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
|
||||
from sphinx.util import get_module_source
|
||||
from sphinx.util import get_module_source, detect_encoding
|
||||
from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
|
||||
|
||||
|
||||
@ -37,10 +37,6 @@ for k, v in token.tok_name.iteritems():
|
||||
number2name = pygrammar.number2symbol.copy()
|
||||
number2name.update(token.tok_name)
|
||||
|
||||
|
||||
# a regex to recognize coding cookies
|
||||
_coding_re = re.compile(r'coding[:=]\s*([-\w.]+)')
|
||||
|
||||
_eq = nodes.Leaf(token.EQUAL, '=')
|
||||
|
||||
|
||||
@ -195,11 +191,10 @@ class ModuleAnalyzer(object):
|
||||
self.srcname = srcname
|
||||
# file-like object yielding source lines
|
||||
self.source = source
|
||||
# will be changed when found by parse()
|
||||
self.encoding = sys.getdefaultencoding()
|
||||
|
||||
# cache the source code as well
|
||||
pos = self.source.tell()
|
||||
self.encoding = detect_encoding(self.source.readline)
|
||||
self.code = self.source.read()
|
||||
self.source.seek(pos)
|
||||
|
||||
@ -229,13 +224,6 @@ class ModuleAnalyzer(object):
|
||||
self.parsetree = pydriver.parse_tokens(self.tokens)
|
||||
except parse.ParseError, err:
|
||||
raise PycodeError('parsing failed', err)
|
||||
# find the source code encoding, if present
|
||||
comments = self.parsetree.get_prefix()
|
||||
for line in comments.splitlines()[:2]:
|
||||
match = _coding_re.search(line)
|
||||
if match is not None:
|
||||
self.encoding = match.group(1)
|
||||
break
|
||||
|
||||
def find_attr_docs(self, scope=''):
|
||||
"""Find class and module-level attributes and their documentation."""
|
||||
|
@ -18,6 +18,7 @@ import tempfile
|
||||
import posixpath
|
||||
import traceback
|
||||
from os import path
|
||||
from codecs import BOM_UTF8
|
||||
|
||||
import docutils
|
||||
from docutils.utils import relative_path
|
||||
@ -211,6 +212,59 @@ def get_module_source(modname):
|
||||
return 'file', filename
|
||||
|
||||
|
||||
# a regex to recognize coding cookies
|
||||
_coding_re = re.compile(r'coding[:=]\s*([-\w.]+)')
|
||||
|
||||
def detect_encoding(readline):
|
||||
"""Like tokenize.detect_encoding() from Py3k, but a bit simplified."""
|
||||
|
||||
def read_or_stop():
|
||||
try:
|
||||
return readline()
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def get_normal_name(orig_enc):
|
||||
"""Imitates get_normal_name in tokenizer.c."""
|
||||
# Only care about the first 12 characters.
|
||||
enc = orig_enc[:12].lower().replace('_', '-')
|
||||
if enc == 'utf-8' or enc.startswith('utf-8-'):
|
||||
return 'utf-8'
|
||||
if enc in ('latin-1', 'iso-8859-1', 'iso-latin-1') or \
|
||||
enc.startswith(('latin-1-', 'iso-8859-1-', 'iso-latin-1-')):
|
||||
return 'iso-8859-1'
|
||||
return orig_enc
|
||||
|
||||
def find_cookie(line):
|
||||
try:
|
||||
line_string = line.decode('ascii')
|
||||
except UnicodeDecodeError:
|
||||
return None
|
||||
|
||||
matches = _coding_re.findall(line_string)
|
||||
if not matches:
|
||||
return None
|
||||
return get_normal_name(matches[0])
|
||||
|
||||
default = sys.getdefaultencoding()
|
||||
first = read_or_stop()
|
||||
if first and first.startswith(BOM_UTF8):
|
||||
first = first[3:]
|
||||
default = 'utf-8-sig'
|
||||
if not first:
|
||||
return default
|
||||
encoding = find_cookie(first)
|
||||
if encoding:
|
||||
return encoding
|
||||
second = read_or_stop()
|
||||
if not second:
|
||||
return default
|
||||
encoding = find_cookie(second)
|
||||
if encoding:
|
||||
return encoding
|
||||
return default
|
||||
|
||||
|
||||
# Low-level utility functions and classes.
|
||||
|
||||
class Tee(object):
|
||||
|
Loading…
Reference in New Issue
Block a user