sphinx/sphinx/pycode/__init__.py

140 lines
4.5 KiB
Python

# -*- coding: utf-8 -*-
"""
sphinx.pycode
~~~~~~~~~~~~~
Utilities parsing and analyzing Python code.
:copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from __future__ import print_function
from six import iteritems, BytesIO, StringIO
from sphinx.errors import PycodeError
from sphinx.pycode.parser import Parser
from sphinx.util import get_module_source, detect_encoding
if False:
# For type annotation
from typing import Any, Dict, IO, List, Tuple # NOQA
class ModuleAnalyzer(object):
# cache for analyzer objects -- caches both by module and file name
cache = {} # type: Dict[Tuple[unicode, unicode], Any]
@classmethod
def for_string(cls, string, modname, srcname='<string>'):
if isinstance(string, bytes):
return cls(BytesIO(string), modname, srcname)
return cls(StringIO(string), modname, srcname, decoded=True)
@classmethod
def for_file(cls, filename, modname):
if ('file', filename) in cls.cache:
return cls.cache['file', filename]
try:
fileobj = open(filename, 'rb')
except Exception as err:
raise PycodeError('error opening %r' % filename, err)
obj = cls(fileobj, modname, filename)
cls.cache['file', filename] = obj
return obj
@classmethod
def for_module(cls, modname):
if ('module', modname) in cls.cache:
entry = cls.cache['module', modname]
if isinstance(entry, PycodeError):
raise entry
return entry
try:
type, source = get_module_source(modname)
if type == 'string':
obj = cls.for_string(source, modname)
else:
obj = cls.for_file(source, modname)
except PycodeError as err:
cls.cache['module', modname] = err
raise
cls.cache['module', modname] = obj
return obj
def __init__(self, source, modname, srcname, decoded=False):
# type: (IO, unicode, unicode, bool) -> None
self.modname = modname # name of the module
self.srcname = srcname # name of the source file
# cache the source code as well
pos = source.tell()
if not decoded:
self.encoding = detect_encoding(source.readline)
source.seek(pos)
self.code = source.read().decode(self.encoding)
else:
self.encoding = None
self.code = source.read()
# will be filled by parse()
self.attr_docs = None # type: Dict[Tuple[unicode, unicode], List[unicode]]
self.tagorder = None # type: Dict[unicode, int]
self.tags = None # type: Dict[unicode, Tuple[unicode, int, int]]
def parse(self):
# type: () -> None
"""Parse the source code."""
try:
parser = Parser(self.code, self.encoding)
parser.parse()
self.attr_docs = {}
for (scope, comment) in iteritems(parser.comments):
if comment:
self.attr_docs[scope] = comment.splitlines() + ['']
else:
self.attr_docs[scope] = ['']
self.tags = parser.definitions
self.tagorder = parser.deforders
except Exception as exc:
raise PycodeError('parsing %r failed: %r' % (self.srcname, exc))
def find_attr_docs(self):
# type: () -> Dict[Tuple[unicode, unicode], List[unicode]]
"""Find class and module-level attributes and their documentation."""
if self.attr_docs is None:
self.parse()
return self.attr_docs
def find_tags(self):
# type: () -> Dict[unicode, Tuple[unicode, int, int]]
"""Find class, function and method definitions and their location."""
if self.tags is None:
self.parse()
return self.tags
if __name__ == '__main__':
import time
import pprint
x0 = time.time()
# ma = ModuleAnalyzer.for_file(__file__.rstrip('c'), 'sphinx.builders.html')
ma = ModuleAnalyzer.for_file('sphinx/environment.py',
'sphinx.environment')
ma.tokenize()
x1 = time.time()
ma.parse()
x2 = time.time()
# for (ns, name), doc in iteritems(ma.find_attr_docs()):
# print '>>', ns, name
# print '\n'.join(doc)
pprint.pprint(ma.find_tags())
x3 = time.time()
# print nodes.nice_repr(ma.parsetree, number2name)
print("tokenizing %.4f, parsing %.4f, finding %.4f" % (x1 - x0, x2 - x1, x3 - x2))