sphinx/sphinx/pycode/__init__.py

# -*- coding: utf-8 -*-
"""
    sphinx.pycode
    ~~~~~~~~~~~~~

    Utilities parsing and analyzing Python code.

    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import re
import sys
from os import path
from cStringIO import StringIO

from sphinx.pycode import nodes
from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc


# load the Python grammar
_grammarfile = path.join(path.dirname(__file__), 'Grammar.txt')
pygrammar = driver.load_grammar(_grammarfile)
pydriver = driver.Driver(pygrammar, convert=nodes.convert)

# an object with attributes corresponding to token and symbol names
class sym: pass
for k, v in pygrammar.symbol2number.iteritems():
    setattr(sym, k, v)
for k, v in token.tok_name.iteritems():
    setattr(sym, v, k)

# a dict mapping terminal and nonterminal numbers to their names
number2name = pygrammar.number2symbol.copy()
number2name.update(token.tok_name)


# a regex to recognize coding cookies
_coding_re = re.compile(r'coding[:=]\s*([-\w.]+)')

_eq = nodes.Leaf(token.EQUAL, '=')


class AttrDocVisitor(nodes.NodeVisitor):
    """
    Visitor that collects docstrings for attribute assignments on toplevel and
    in classes.

    The docstrings can either be in special '#:' comments before the assignment
    or in a docstring after it.
    """
    def init(self, scope, encoding):
        self.scope = scope
        self.encoding = encoding
        self.namespace = []
        self.collected = {}

    def visit_classdef(self, node):
        self.namespace.append(node[1].value)
        self.generic_visit(node)
        self.namespace.pop()

    def visit_expr_stmt(self, node):
        """Visit an assignment which may have a special comment before it."""
        if _eq not in node.children:
            # not an assignment (we don't care for augmented assignments)
            return
        pnode = node[0]
        prefix = pnode.get_prefix()
        # if the assignment is the first statement on a new indentation
        # level, its preceding whitespace and comments are not assigned
        # to that token, but the first INDENT or DEDENT token
        while not prefix:
            pnode = pnode.get_prev_leaf()
            if not pnode or pnode.type not in (token.INDENT, token.DEDENT):
                break
            prefix = pnode.get_prefix()
        prefix = prefix.decode(self.encoding)
        docstring = prepare_commentdoc(prefix)
        if docstring:
            self.add_docstring(node, docstring)

    def visit_simple_stmt(self, node):
        """Visit a docstring statement which may have an assignment before."""
        if node[0].type != token.STRING:
            # not a docstring; but still need to visit children
            return self.generic_visit(node)
        prev = node.get_prev_sibling()
        if not prev:
            return
        if prev.type == sym.simple_stmt and \
               prev[0].type == sym.expr_stmt and _eq in prev[0].children:
            # need to "eval" the string because it's returned in its
            # original form
            docstring = literals.evalString(node[0].value, self.encoding)
            docstring = prepare_docstring(docstring)
            self.add_docstring(prev[0], docstring)

    def visit_funcdef(self, node):
        # don't descend into functions -- nothing interesting there
        return

    def add_docstring(self, node, docstring):
        # add an item for each assignment target
        for i in range(0, len(node) - 1, 2):
            target = node[i]
            if target.type != token.NAME:
                # don't care about complex targets
                continue
            namespace = '.'.join(self.namespace)
            if namespace.startswith(self.scope):
                self.collected[namespace, target.value] = docstring


class PycodeError(Exception):
    def __str__(self):
        res = self.args[0]
        if len(self.args) > 1:
            res += ' (exception was: %r)' % self.args[1]
        return res


class ModuleAnalyzer(object):
    # cache for analyzer objects -- caches both by module and file name
    cache = {}

    @classmethod
    def for_string(cls, string, modname, srcname='<string>'):
        return cls(StringIO(string), modname, srcname)

    @classmethod
    def for_file(cls, filename, modname):
        if ('file', filename) in cls.cache:
            return cls.cache['file', filename]
        try:
            fileobj = open(filename, 'r')
        except Exception, err:
            raise PycodeError('error opening %r' % filename, err)
        obj = cls(fileobj, modname, filename)
        cls.cache['file', filename] = obj
        return obj

    @classmethod
    def for_module(cls, modname):
        if ('module', modname) in cls.cache:
            entry = cls.cache['module', modname]
            if isinstance(entry, PycodeError):
                raise entry
            return entry

        try:
            if modname not in sys.modules:
                try:
                    __import__(modname)
                except ImportError, err:
                    raise PycodeError('error importing %r' % modname, err)
            mod = sys.modules[modname]
            if hasattr(mod, '__loader__'):
                try:
                    source = mod.__loader__.get_source(modname)
                except Exception, err:
                    raise PycodeError('error getting source for %r' % modname,
                                      err)
                obj = cls.for_string(source, modname)
                cls.cache['module', modname] = obj
                return obj
            filename = getattr(mod, '__file__', None)
            if filename is None:
                raise PycodeError('no source found for module %r' % modname)
            filename = path.normpath(path.abspath(filename))
            lfilename = filename.lower()
            if lfilename.endswith('.pyo') or lfilename.endswith('.pyc'):
                filename = filename[:-1]
            elif not lfilename.endswith('.py'):
                raise PycodeError('source is not a .py file: %r' % filename)
            if not path.isfile(filename):
                raise PycodeError('source file is not present: %r' % filename)
            obj = cls.for_file(filename, modname)
        except PycodeError, err:
            cls.cache['module', modname] = err
            raise
        cls.cache['module', modname] = obj
        return obj

    def __init__(self, source, modname, srcname):
        # name of the module
        self.modname = modname
        # name of the source file
        self.srcname = srcname
        # file-like object yielding source lines
        self.source = source

        # will be filled by tokenize()
        self.tokens = None
        # will be filled by parse()
        self.parsetree = None
        # will be filled by find_attr_docs()
        self.attr_docs = None
        # will be filled by find_tags()
        self.tags = None

    def tokenize(self):
        """Generate tokens from the source."""
        if self.tokens is not None:
            return
        self.tokens = list(tokenize.generate_tokens(self.source.readline))
        self.source.close()

    def parse(self):
        """Parse the generated source tokens."""
        if self.parsetree is not None:
            return
        self.tokenize()
        try:
            self.parsetree = pydriver.parse_tokens(self.tokens)
        except parse.ParseError, err:
            raise PycodeError('parsing failed', err)
        # find the source code encoding
        encoding = sys.getdefaultencoding()
        comments = self.parsetree.get_prefix()
        for line in comments.splitlines()[:2]:
            match = _coding_re.search(line)
            if match is not None:
                encoding = match.group(1)
                break
        self.encoding = encoding

    def find_attr_docs(self, scope=''):
        """Find class and module-level attributes and their documentation."""
        if self.attr_docs is not None:
            return self.attr_docs
        self.parse()
        attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)
        attr_visitor.visit(self.parsetree)
        self.attr_docs = attr_visitor.collected
        # now that we found everything we could in the tree, throw it away
        # (it takes quite a bit of memory for large modules)
        self.parsetree = None
        return attr_visitor.collected

    def find_tags(self):
        """Find class, function and method definitions and their location."""
        if self.tags is not None:
            return self.tags
        self.tokenize()
        result = {}
        namespace = []
        stack = []
        indent = 0
        defline = False
        expect_indent = False
        def tokeniter(ignore = (token.COMMENT, token.NL)):
            for tokentup in self.tokens:
                if tokentup[0] not in ignore:
                    yield tokentup
        tokeniter = tokeniter()
        for type, tok, spos, epos, line in tokeniter:
            if expect_indent:
                if type != token.INDENT:
                    # no suite -- one-line definition
                    assert stack
                    dtype, fullname, startline, _ = stack.pop()
                    endline = epos[0]
                    namespace.pop()
                    result[fullname] = (dtype, startline, endline)
                expect_indent = False
            if tok in ('def', 'class'):
                name = tokeniter.next()[1]
                namespace.append(name)
                fullname = '.'.join(namespace)
                stack.append((tok, fullname, spos[0], indent))
                defline = True
            elif type == token.INDENT:
                expect_indent = False
                indent += 1
            elif type == token.DEDENT:
                indent -= 1
                # if the stacklevel is the same as it was before the last
                # def/class block, this dedent closes that block
                if stack and indent == stack[-1][3]:
                    dtype, fullname, startline, _ = stack.pop()
                    endline = spos[0]
                    namespace.pop()
                    result[fullname] = (dtype, startline, endline)
            elif type == token.NEWLINE:
                # if this line contained a definition, expect an INDENT
                # to start the suite; if there is no such INDENT
                # it's a one-line definition
                if defline:
                    defline = False
                    expect_indent = True
        self.tags = result
        return result


if __name__ == '__main__':
    import time, pprint
    x0 = time.time()
    #ma = ModuleAnalyzer.for_file(__file__.rstrip('c'), 'sphinx.builders.html')
    ma = ModuleAnalyzer.for_file('sphinx/builders/html.py',
                                 'sphinx.builders.html')
    ma.tokenize()
    x1 = time.time()
    ma.parse()
    x2 = time.time()
    #for (ns, name), doc in ma.find_attr_docs().iteritems():
    #    print '>>', ns, name
    #    print '\n'.join(doc)
    pprint.pprint(ma.find_tags())
    x3 = time.time()
    #print nodes.nice_repr(ma.parsetree, number2name)
    print "tokenizing %.4f, parsing %.4f, finding %.4f" % (x1-x0, x2-x1, x3-x2)
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00			`# -- coding: utf-8 --`
			`"""`
			`sphinx.pycode`
			`~~~~~~~~~~~~~`

			`Utilities parsing and analyzing Python code.`

Fix a few remaining copyrights and add 2009 to license. 2009-01-04 14:39:36 +01:00			`:copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00			`:license: BSD, see LICENSE for details.`
			`"""`

Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`import re`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00			`import sys`
			`from os import path`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`from cStringIO import StringIO`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00
Add Python license info, add parse.c source generated by Cython. 2009-01-01 23:48:10 +01:00			`from sphinx.pycode import nodes`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals`
Move docstring processing to an util module. 2008-12-30 02:37:20 +01:00			`from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00

			`# load the Python grammar`
			`_grammarfile = path.join(path.dirname(__file__), 'Grammar.txt')`
			`pygrammar = driver.load_grammar(_grammarfile)`
Add Python license info, add parse.c source generated by Cython. 2009-01-01 23:48:10 +01:00			`pydriver = driver.Driver(pygrammar, convert=nodes.convert)`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`# an object with attributes corresponding to token and symbol names`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00			`class sym: pass`
			`for k, v in pygrammar.symbol2number.iteritems():`
			`setattr(sym, k, v)`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`for k, v in token.tok_name.iteritems():`
			`setattr(sym, v, k)`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00
			`# a dict mapping terminal and nonterminal numbers to their names`
			`number2name = pygrammar.number2symbol.copy()`
			`number2name.update(token.tok_name)`


Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`# a regex to recognize coding cookies`
			`_coding_re = re.compile(r'coding[:=]\s*([-\w.]+)')`

Add Python license info, add parse.c source generated by Cython. 2009-01-01 23:48:10 +01:00			`_eq = nodes.Leaf(token.EQUAL, '=')`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00

Add Python license info, add parse.c source generated by Cython. 2009-01-01 23:48:10 +01:00			`class AttrDocVisitor(nodes.NodeVisitor):`
Fix handling of INDENT/DEDENT tokens. 2008-12-29 22:25:15 +01:00			`"""`
Also find attribute docs in the "other" style: docstrings after the assignment. 2008-12-30 02:26:47 +01:00			`Visitor that collects docstrings for attribute assignments on toplevel and`
			`in classes.`

			`The docstrings can either be in special '#:' comments before the assignment`
			`or in a docstring after it.`
Fix handling of INDENT/DEDENT tokens. 2008-12-29 22:25:15 +01:00			`"""`
Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`def init(self, scope, encoding):`
Cleanup; add scoping to ClassAttrVisitor. 2008-12-29 20:37:30 +01:00			`self.scope = scope`
Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`self.encoding = encoding`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00			`self.namespace = []`
First iteration of an autodoc that handles attribute documentation. 2008-12-30 02:09:29 +01:00			`self.collected = {}`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00
			`def visit_classdef(self, node):`
			`self.namespace.append(node[1].value)`
			`self.generic_visit(node)`
			`self.namespace.pop()`

			`def visit_expr_stmt(self, node):`
Also find attribute docs in the "other" style: docstrings after the assignment. 2008-12-30 02:26:47 +01:00			`"""Visit an assignment which may have a special comment before it."""`
Cleanup; add scoping to ClassAttrVisitor. 2008-12-29 20:37:30 +01:00			`if _eq not in node.children:`
			`# not an assignment (we don't care for augmented assignments)`
			`return`
Fix handling of INDENT/DEDENT tokens. 2008-12-29 22:25:15 +01:00			`pnode = node[0]`
			`prefix = pnode.get_prefix()`
			`# if the assignment is the first statement on a new indentation`
			`# level, its preceding whitespace and comments are not assigned`
			`# to that token, but the first INDENT or DEDENT token`
			`while not prefix:`
			`pnode = pnode.get_prev_leaf()`
			`if not pnode or pnode.type not in (token.INDENT, token.DEDENT):`
			`break`
Another fix for DEDENT/INDENT handling. 2008-12-29 22:30:00 +01:00			`prefix = pnode.get_prefix()`
Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`prefix = prefix.decode(self.encoding)`
Fix handling of INDENT/DEDENT tokens. 2008-12-29 22:25:15 +01:00			`docstring = prepare_commentdoc(prefix)`
Also find attribute docs in the "other" style: docstrings after the assignment. 2008-12-30 02:26:47 +01:00			`if docstring:`
			`self.add_docstring(node, docstring)`

			`def visit_simple_stmt(self, node):`
			`"""Visit a docstring statement which may have an assignment before."""`
			`if node[0].type != token.STRING:`
			`# not a docstring; but still need to visit children`
			`return self.generic_visit(node)`
			`prev = node.get_prev_sibling()`
			`if not prev:`
Fix handling of INDENT/DEDENT tokens. 2008-12-29 22:25:15 +01:00			`return`
Also find attribute docs in the "other" style: docstrings after the assignment. 2008-12-30 02:26:47 +01:00			`if prev.type == sym.simple_stmt and \`
			`prev[0].type == sym.expr_stmt and _eq in prev[0].children:`
Reformat to EOL80. 2009-01-10 21:23:39 +01:00			`# need to "eval" the string because it's returned in its`
			`# original form`
Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`docstring = literals.evalString(node[0].value, self.encoding)`
			`docstring = prepare_docstring(docstring)`
Also find attribute docs in the "other" style: docstrings after the assignment. 2008-12-30 02:26:47 +01:00			`self.add_docstring(prev[0], docstring)`

			`def visit_funcdef(self, node):`
			`# don't descend into functions -- nothing interesting there`
			`return`

			`def add_docstring(self, node, docstring):`
Fix handling of INDENT/DEDENT tokens. 2008-12-29 22:25:15 +01:00			`# add an item for each assignment target`
			`for i in range(0, len(node) - 1, 2):`
			`target = node[i]`
			`if target.type != token.NAME:`
			`# don't care about complex targets`
			`continue`
First iteration of an autodoc that handles attribute documentation. 2008-12-30 02:09:29 +01:00			`namespace = '.'.join(self.namespace)`
			`if namespace.startswith(self.scope):`
			`self.collected[namespace, target.value] = docstring`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00

Improve error handling. 2008-12-29 20:47:32 +01:00			`class PycodeError(Exception):`
			`def __str__(self):`
			`res = self.args[0]`
			`if len(self.args) > 1:`
			`res += ' (exception was: %r)' % self.args[1]`
			`return res`


Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00			`class ModuleAnalyzer(object):`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`# cache for analyzer objects -- caches both by module and file name`
First iteration of an autodoc that handles attribute documentation. 2008-12-30 02:09:29 +01:00			`cache = {}`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00
			`@classmethod`
			`def for_string(cls, string, modname, srcname='<string>'):`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`return cls(StringIO(string), modname, srcname)`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00
			`@classmethod`
			`def for_file(cls, filename, modname):`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`if ('file', filename) in cls.cache:`
			`return cls.cache['file', filename]`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00			`try:`
Improve error handling. 2008-12-29 20:47:32 +01:00			`fileobj = open(filename, 'r')`
			`except Exception, err:`
			`raise PycodeError('error opening %r' % filename, err)`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`obj = cls(fileobj, modname, filename)`
			`cls.cache['file', filename] = obj`
			`return obj`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00
			`@classmethod`
			`def for_module(cls, modname):`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`if ('module', modname) in cls.cache:`
Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`entry = cls.cache['module', modname]`
			`if isinstance(entry, PycodeError):`
			`raise entry`
			`return entry`

			`try:`
			`if modname not in sys.modules:`
			`try:`
			`__import__(modname)`
			`except ImportError, err:`
			`raise PycodeError('error importing %r' % modname, err)`
			`mod = sys.modules[modname]`
			`if hasattr(mod, '__loader__'):`
			`try:`
			`source = mod.__loader__.get_source(modname)`
			`except Exception, err:`
Reformat to EOL80. 2009-01-10 21:23:39 +01:00			`raise PycodeError('error getting source for %r' % modname,`
			`err)`
Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`obj = cls.for_string(source, modname)`
			`cls.cache['module', modname] = obj`
			`return obj`
			`filename = getattr(mod, '__file__', None)`
			`if filename is None:`
			`raise PycodeError('no source found for module %r' % modname)`
Fix test_env. 2009-03-16 22:47:52 +01:00			`filename = path.normpath(path.abspath(filename))`
Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`lfilename = filename.lower()`
			`if lfilename.endswith('.pyo') or lfilename.endswith('.pyc'):`
			`filename = filename[:-1]`
			`elif not lfilename.endswith('.py'):`
			`raise PycodeError('source is not a .py file: %r' % filename)`
			`if not path.isfile(filename):`
			`raise PycodeError('source file is not present: %r' % filename)`
			`obj = cls.for_file(filename, modname)`
			`except PycodeError, err:`
			`cls.cache['module', modname] = err`
			`raise`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`cls.cache['module', modname] = obj`
First iteration of an autodoc that handles attribute documentation. 2008-12-30 02:09:29 +01:00			`return obj`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`def __init__(self, source, modname, srcname):`
Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`# name of the module`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`self.modname = modname`
Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`# name of the source file`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`self.srcname = srcname`
			`# file-like object yielding source lines`
			`self.source = source`

			`# will be filled by tokenize()`
			`self.tokens = None`
			`# will be filled by parse()`
			`self.parsetree = None`
Cache tags and attribute docs in the analyzer. 2009-01-04 19:16:52 +01:00			`# will be filled by find_attr_docs()`
			`self.attr_docs = None`
			`# will be filled by find_tags()`
			`self.tags = None`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00
			`def tokenize(self):`
			`"""Generate tokens from the source."""`
			`if self.tokens is not None:`
			`return`
			`self.tokens = list(tokenize.generate_tokens(self.source.readline))`
			`self.source.close()`

			`def parse(self):`
			`"""Parse the generated source tokens."""`
			`if self.parsetree is not None:`
			`return`
			`self.tokenize()`
Raise PycodeError directly on parsing. 2009-01-10 20:34:26 +01:00			`try:`
			`self.parsetree = pydriver.parse_tokens(self.tokens)`
			`except parse.ParseError, err:`
			`raise PycodeError('parsing failed', err)`
Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`# find the source code encoding`
			`encoding = sys.getdefaultencoding()`
			`comments = self.parsetree.get_prefix()`
			`for line in comments.splitlines()[:2]:`
			`match = _coding_re.search(line)`
			`if match is not None:`
			`encoding = match.group(1)`
			`break`
			`self.encoding = encoding`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00
First iteration of an autodoc that handles attribute documentation. 2008-12-30 02:09:29 +01:00			`def find_attr_docs(self, scope=''):`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`"""Find class and module-level attributes and their documentation."""`
Cache tags and attribute docs in the analyzer. 2009-01-04 19:16:52 +01:00			`if self.attr_docs is not None:`
			`return self.attr_docs`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`self.parse()`
Add support for decoding strings and comments to the analyzer. 2009-01-04 20:02:24 +01:00			`attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`attr_visitor.visit(self.parsetree)`
Cache tags and attribute docs in the analyzer. 2009-01-04 19:16:52 +01:00			`self.attr_docs = attr_visitor.collected`
Really fix the problem of keeping all pgen2 parser nodes in memory. 2009-09-03 18:00:11 +02:00			`# now that we found everything we could in the tree, throw it away`
			`# (it takes quite a bit of memory for large modules)`
Remove parsetree after collecting attributes. 2009-07-02 19:41:25 +02:00			`self.parsetree = None`
Improve error handling. 2008-12-29 20:47:32 +01:00			`return attr_visitor.collected`
Add pgen2 and custom utilities. 2008-12-29 20:22:18 +01:00
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`def find_tags(self):`
			`"""Find class, function and method definitions and their location."""`
Cache tags and attribute docs in the analyzer. 2009-01-04 19:16:52 +01:00			`if self.tags is not None:`
			`return self.tags`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`self.tokenize()`
			`result = {}`
			`namespace = []`
			`stack = []`
			`indent = 0`
			`defline = False`
			`expect_indent = False`
			`def tokeniter(ignore = (token.COMMENT, token.NL)):`
			`for tokentup in self.tokens:`
			`if tokentup[0] not in ignore:`
			`yield tokentup`
			`tokeniter = tokeniter()`
			`for type, tok, spos, epos, line in tokeniter:`
			`if expect_indent:`
			`if type != token.INDENT:`
			`# no suite -- one-line definition`
			`assert stack`
			`dtype, fullname, startline, _ = stack.pop()`
			`endline = epos[0]`
			`namespace.pop()`
Add "object" option to literalinclude directive. 2008-12-30 15:41:21 +01:00			`result[fullname] = (dtype, startline, endline)`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`expect_indent = False`
			`if tok in ('def', 'class'):`
			`name = tokeniter.next()[1]`
			`namespace.append(name)`
			`fullname = '.'.join(namespace)`
			`stack.append((tok, fullname, spos[0], indent))`
			`defline = True`
			`elif type == token.INDENT:`
			`expect_indent = False`
			`indent += 1`
			`elif type == token.DEDENT:`
			`indent -= 1`
Fix long line. 2009-01-01 23:49:32 +01:00			`# if the stacklevel is the same as it was before the last`
			`# def/class block, this dedent closes that block`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`if stack and indent == stack[-1][3]:`
			`dtype, fullname, startline, _ = stack.pop()`
			`endline = spos[0]`
			`namespace.pop()`
Add "object" option to literalinclude directive. 2008-12-30 15:41:21 +01:00			`result[fullname] = (dtype, startline, endline)`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`elif type == token.NEWLINE:`
Reformat to EOL80. 2009-01-10 21:23:39 +01:00			`# if this line contained a definition, expect an INDENT`
			`# to start the suite; if there is no such INDENT`
			`# it's a one-line definition`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`if defline:`
			`defline = False`
			`expect_indent = True`
Cache tags and attribute docs in the analyzer. 2009-01-04 19:16:52 +01:00			`self.tags = result`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`return result`

First iteration of an autodoc that handles attribute documentation. 2008-12-30 02:09:29 +01:00
Move benchmark into __main__ block. 2008-12-30 01:32:01 +01:00			`if __name__ == '__main__':`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`import time, pprint`
Move benchmark into __main__ block. 2008-12-30 01:32:01 +01:00			`x0 = time.time()`
Move docstring processing to an util module. 2008-12-30 02:37:20 +01:00			`#ma = ModuleAnalyzer.for_file(__file__.rstrip('c'), 'sphinx.builders.html')`
Reformat to EOL80. 2009-01-10 21:23:39 +01:00			`ma = ModuleAnalyzer.for_file('sphinx/builders/html.py',`
			`'sphinx.builders.html')`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`ma.tokenize()`
Move benchmark into __main__ block. 2008-12-30 01:32:01 +01:00			`x1 = time.time()`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`ma.parse()`
Move benchmark into __main__ block. 2008-12-30 01:32:01 +01:00			`x2 = time.time()`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`#for (ns, name), doc in ma.find_attr_docs().iteritems():`
			`# print '>>', ns, name`
			`# print '\n'.join(doc)`
			`pprint.pprint(ma.find_tags())`
			`x3 = time.time()`
Add Python license info, add parse.c source generated by Cython. 2009-01-01 23:48:10 +01:00			`#print nodes.nice_repr(ma.parsetree, number2name)`
* Add a tag-finding method based on tokens. * Don't parse immediately if tokenizing suffices. * Also cache by file name. 2008-12-30 12:42:26 +01:00			`print "tokenizing %.4f, parsing %.4f, finding %.4f" % (x1-x0, x2-x1, x3-x2)`