sphinx/sphinx/pycode/__init__.py

# -*- coding: utf-8 -*-
"""
    sphinx.pycode
    ~~~~~~~~~~~~~

    Utilities parsing and analyzing Python code.

    :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""
from __future__ import print_function

from six import iteritems, BytesIO, StringIO

from sphinx.errors import PycodeError
from sphinx.pycode.parser import Parser
from sphinx.util import get_module_source, detect_encoding

if False:
    # For type annotation
    from typing import Any, Dict, IO, List, Tuple  # NOQA


class ModuleAnalyzer(object):
    # cache for analyzer objects -- caches both by module and file name
    cache = {}  # type: Dict[Tuple[unicode, unicode], Any]

    @classmethod
    def for_string(cls, string, modname, srcname='<string>'):
        if isinstance(string, bytes):
            return cls(BytesIO(string), modname, srcname)
        return cls(StringIO(string), modname, srcname, decoded=True)

    @classmethod
    def for_file(cls, filename, modname):
        if ('file', filename) in cls.cache:
            return cls.cache['file', filename]
        try:
            fileobj = open(filename, 'rb')
        except Exception as err:
            raise PycodeError('error opening %r' % filename, err)
        obj = cls(fileobj, modname, filename)
        cls.cache['file', filename] = obj
        return obj

    @classmethod
    def for_module(cls, modname):
        if ('module', modname) in cls.cache:
            entry = cls.cache['module', modname]
            if isinstance(entry, PycodeError):
                raise entry
            return entry

        try:
            type, source = get_module_source(modname)
            if type == 'string':
                obj = cls.for_string(source, modname)
            else:
                obj = cls.for_file(source, modname)
        except PycodeError as err:
            cls.cache['module', modname] = err
            raise
        cls.cache['module', modname] = obj
        return obj

    def __init__(self, source, modname, srcname, decoded=False):
        # type: (IO, unicode, unicode, bool) -> None
        self.modname = modname  # name of the module
        self.srcname = srcname  # name of the source file

        # cache the source code as well
        pos = source.tell()
        if not decoded:
            self.encoding = detect_encoding(source.readline)
            source.seek(pos)
            self.code = source.read().decode(self.encoding)
        else:
            self.encoding = None
            self.code = source.read()

        # will be filled by parse()
        self.attr_docs = None   # type: Dict[Tuple[unicode, unicode], List[unicode]]
        self.tagorder = None    # type: Dict[unicode, int]
        self.tags = None        # type: Dict[unicode, Tuple[unicode, int, int]]

    def parse(self):
        # type: () -> None
        """Parse the source code."""
        try:
            parser = Parser(self.code, self.encoding)
            parser.parse()

            self.attr_docs = {}
            for (scope, comment) in iteritems(parser.comments):
                if comment:
                    self.attr_docs[scope] = comment.splitlines() + ['']
                else:
                    self.attr_docs[scope] = ['']

            self.tags = parser.definitions
            self.tagorder = parser.deforders
        except Exception as exc:
            raise PycodeError('parsing %r failed: %r' % (self.srcname, exc))

    def find_attr_docs(self):
        # type: () -> Dict[Tuple[unicode, unicode], List[unicode]]
        """Find class and module-level attributes and their documentation."""
        if self.attr_docs is None:
            self.parse()

        return self.attr_docs

    def find_tags(self):
        # type: () -> Dict[unicode, Tuple[unicode, int, int]]
        """Find class, function and method definitions and their location."""
        if self.tags is None:
            self.parse()

        return self.tags


if __name__ == '__main__':
    import time
    import pprint
    x0 = time.time()
    # ma = ModuleAnalyzer.for_file(__file__.rstrip('c'), 'sphinx.builders.html')
    ma = ModuleAnalyzer.for_file('sphinx/environment.py',
                                 'sphinx.environment')
    ma.tokenize()
    x1 = time.time()
    ma.parse()
    x2 = time.time()
    # for (ns, name), doc in iteritems(ma.find_attr_docs()):
    #     print '>>', ns, name
    #     print '\n'.join(doc)
    pprint.pprint(ma.find_tags())
    x3 = time.time()
    # print nodes.nice_repr(ma.parsetree, number2name)
    print("tokenizing %.4f, parsing %.4f, finding %.4f" % (x1 - x0, x2 - x1, x3 - x2))