Merge pull request #6907 from kpnr/patch-2

Non-ASCII & non utf-8 charset corruption fix
This commit is contained in:
Takeshi KOMIYA 2019-12-27 01:25:50 +09:00 committed by GitHub
commit b6244736c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 83 additions and 8 deletions

View File

@ -11,8 +11,9 @@
import re
from io import StringIO
from os import path
from typing import Any, Dict, IO, List, Tuple
from typing import Any, Dict, IO, List, Tuple, Optional
from zipfile import ZipFile
from importlib import import_module
from sphinx.errors import PycodeError
from sphinx.pycode.parser import Parser
@ -23,6 +24,55 @@ class ModuleAnalyzer:
# cache for analyzer objects -- caches both by module and file name
cache = {} # type: Dict[Tuple[str, str], Any]
@staticmethod
def get_module_source(modname: str) -> Tuple[Optional[str], Optional[str]]:
"""Try to find the source code for a module.
Returns ('filename', 'source'). One of it can be None if
no filename or source found
"""
try:
mod = import_module(modname)
except Exception as err:
raise PycodeError('error importing %r' % modname, err)
loader = getattr(mod, '__loader__', None)
filename = getattr(mod, '__file__', None)
if loader and getattr(loader, 'get_source', None):
# prefer Native loader, as it respects #coding directive
try:
source = loader.get_source(modname)
if source:
# no exception and not None - it must be module source
return filename, source
except ImportError as err:
pass # Try other "source-mining" methods
if filename is None and loader and getattr(loader, 'get_filename', None):
# have loader, but no filename
try:
filename = loader.get_filename(modname)
except ImportError as err:
raise PycodeError('error getting filename for %r' % modname, err)
if filename is None:
# all methods for getting filename failed, so raise...
raise PycodeError('no source found for module %r' % modname)
filename = path.normpath(path.abspath(filename))
lfilename = filename.lower()
if lfilename.endswith('.pyo') or lfilename.endswith('.pyc'):
filename = filename[:-1]
if not path.isfile(filename) and path.isfile(filename + 'w'):
filename += 'w'
elif not (lfilename.endswith('.py') or lfilename.endswith('.pyw')):
raise PycodeError('source is not a .py file: %r' % filename)
elif ('.egg' + os.path.sep) in filename:
pat = '(?<=\\.egg)' + re.escape(os.path.sep)
eggpath, _ = re.split(pat, filename, 1)
if path.isfile(eggpath):
return filename, None
if not path.isfile(filename):
raise PycodeError('source file is not present: %r' % filename)
return filename, None
@classmethod
def for_string(cls, string: str, modname: str, srcname: str = '<string>'
) -> "ModuleAnalyzer":
@ -63,11 +113,11 @@ class ModuleAnalyzer:
return entry
try:
type, source = get_module_source(modname)
if type == 'string':
obj = cls.for_string(source, modname)
else:
obj = cls.for_file(source, modname)
filename, source = cls.get_module_source(modname)
if source is not None:
obj = cls.for_string(source, modname, filename if filename is not None else '<string>')
elif filename is not None:
obj = cls.for_file(filename, modname)
except PycodeError as err:
cls.cache['module', modname] = err
raise

View File

@ -0,0 +1,4 @@
#!python
# -*- coding: windows-1251 -*-
X="Õ" #:It MUST look like X="Õ"

View File

@ -10,12 +10,23 @@
import os
import sys
import pytest
import sphinx
from sphinx.pycode import ModuleAnalyzer
from sphinx.errors import PycodeError
SPHINX_MODULE_PATH = os.path.splitext(sphinx.__file__)[0] + '.py'
def test_ModuleAnalyzer_get_module_source():
assert ModuleAnalyzer.get_module_source('sphinx') == (sphinx.__file__, sphinx.__loader__.get_source('sphinx'))
# failed to obtain source information from builtin modules
with pytest.raises(PycodeError):
ModuleAnalyzer.get_module_source('builtins')
with pytest.raises(PycodeError):
ModuleAnalyzer.get_module_source('itertools')
def test_ModuleAnalyzer_for_string():
analyzer = ModuleAnalyzer.for_string('print("Hello world")', 'module_name')
@ -31,12 +42,22 @@ def test_ModuleAnalyzer_for_file():
assert analyzer.encoding is None
def test_ModuleAnalyzer_for_module():
def test_ModuleAnalyzer_for_module(rootdir):
analyzer = ModuleAnalyzer.for_module('sphinx')
assert analyzer.modname == 'sphinx'
assert analyzer.srcname in (SPHINX_MODULE_PATH,
os.path.abspath(SPHINX_MODULE_PATH))
assert analyzer.encoding == 'utf-8'
# source should be loaded via native loader, so don`t know file enconding
assert analyzer.encoding == None
path = rootdir / 'test-pycode'
sys.path.insert(0, path)
try:
analyzer = ModuleAnalyzer.for_module('cp_1251_coded')
docs = analyzer.find_attr_docs()
assert docs == {('', 'X'): ['It MUST look like X="\u0425"', '']}
finally:
sys.path.pop(0)
def test_ModuleAnalyzer_for_file_in_egg(rootdir):