Merge pull request #3937 from tk0miya/refactor_pycode

Refactor pycode
2025-02-25 18:55:22 -06:00 · 2017-07-26 21:06:35 +09:00
parent e97aa92b1b 79243816f1
commit 582c714d31
21 changed files with 905 additions and 7061 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -19,13 +19,11 @@ include sphinx/locale/.tx/config
 recursive-include sphinx/templates *
 recursive-include sphinx/texinputs *
 recursive-include sphinx/themes *
-recursive-include sphinx/pycode/pgen2 *.c *.pyx
 recursive-include sphinx/locale *.js *.pot *.po *.mo
 recursive-include sphinx/search/non-minified-js *.js
 recursive-include sphinx/ext/autosummary/templates *
 recursive-include tests *
 recursive-include utils *
-include sphinx/pycode/Grammar-py*

 recursive-include doc *
 prune doc/_build
--- a/sphinx/directives/code.py
+++ b/sphinx/directives/code.py
@@ -256,7 +256,7 @@ class LiteralIncludeReader(object):
            else:
                start = tags[pyobject][1]
                end = tags[pyobject][2]
-                lines = lines[start - 1:end - 1]
+                lines = lines[start - 1:end]
                if 'lineno-match' in self.options:
                    self.lineno_start = start

--- a/sphinx/pycode/Grammar-py2.txt
+++ b/sphinx/pycode/Grammar-py2.txt
@@ -1,135 +0,0 @@
-# Grammar for Python 2.x
-
-# IMPORTANT: when copying over a new Grammar file, make sure file_input
-# is the first nonterminal in the file!
-
-# Start symbols for the grammar:
-#       single_input is a single interactive statement;
-#       file_input is a module or sequence of commands read from an input file;
-#       eval_input is the input for the eval() and input() functions.
-# NB: compound_stmt in single_input is followed by extra NEWLINE!
-file_input: (NEWLINE | stmt)* ENDMARKER
-single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
-eval_input: testlist NEWLINE* ENDMARKER
-
-decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
-decorators: decorator+
-decorated: decorators (classdef | funcdef)
-funcdef: 'def' NAME parameters ':' suite
-parameters: '(' [varargslist] ')'
-varargslist: ((fpdef ['=' test] ',')*
-              ('*' NAME [',' '**' NAME] | '**' NAME) |
-              fpdef ['=' test] (',' fpdef ['=' test])* [','])
-fpdef: NAME | '(' fplist ')'
-fplist: fpdef (',' fpdef)* [',']
-
-stmt: simple_stmt | compound_stmt
-simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
-small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
-             import_stmt | global_stmt | exec_stmt | assert_stmt)
-expr_stmt: testlist (augassign (yield_expr|testlist) |
-                     ('=' (yield_expr|testlist))*)
-augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
-            '<<=' | '>>=' | '**=' | '//=')
-# For normal assignments, additional restrictions enforced by the interpreter
-print_stmt: 'print' ( [ test (',' test)* [','] ] |
-                      '>>' test [ (',' test)+ [','] ] )
-del_stmt: 'del' exprlist
-pass_stmt: 'pass'
-flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
-break_stmt: 'break'
-continue_stmt: 'continue'
-return_stmt: 'return' [testlist]
-yield_stmt: yield_expr
-raise_stmt: 'raise' [test [',' test [',' test]]]
-import_stmt: import_name | import_from
-import_name: 'import' dotted_as_names
-import_from: ('from' ('.'* dotted_name | '.'+)
-              'import' ('*' | '(' import_as_names ')' | import_as_names))
-import_as_name: NAME ['as' NAME]
-dotted_as_name: dotted_name ['as' NAME]
-import_as_names: import_as_name (',' import_as_name)* [',']
-dotted_as_names: dotted_as_name (',' dotted_as_name)*
-dotted_name: NAME ('.' NAME)*
-global_stmt: 'global' NAME (',' NAME)*
-exec_stmt: 'exec' expr ['in' test [',' test]]
-assert_stmt: 'assert' test [',' test]
-
-compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
-if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
-while_stmt: 'while' test ':' suite ['else' ':' suite]
-for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
-try_stmt: ('try' ':' suite
-           ((except_clause ':' suite)+
-            ['else' ':' suite]
-            ['finally' ':' suite] |
-           'finally' ':' suite))
-with_stmt: 'with' with_item (',' with_item)*  ':' suite
-with_item: test ['as' expr]
-# NB compile.c makes sure that the default except clause is last
-except_clause: 'except' [test [('as' | ',') test]]
-suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
-
-# Backward compatibility cruft to support:
-# [ x for x in lambda: True, lambda: False if x() ]
-# even while also allowing:
-# lambda x: 5 if x else 2
-# (But not a mix of the two)
-testlist_safe: old_test [(',' old_test)+ [',']]
-old_test: or_test | old_lambdef
-old_lambdef: 'lambda' [varargslist] ':' old_test
-
-test: or_test ['if' or_test 'else' test] | lambdef
-or_test: and_test ('or' and_test)*
-and_test: not_test ('and' not_test)*
-not_test: 'not' not_test | comparison
-comparison: expr (comp_op expr)*
-comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
-expr: xor_expr ('|' xor_expr)*
-xor_expr: and_expr ('^' and_expr)*
-and_expr: shift_expr ('&' shift_expr)*
-shift_expr: arith_expr (('<<'|'>>') arith_expr)*
-arith_expr: term (('+'|'-') term)*
-term: factor (('*'|'/'|'%'|'//') factor)*
-factor: ('+'|'-'|'~') factor | power
-power: atom trailer* ['**' factor]
-atom: ('(' [yield_expr|testlist_comp] ')' |
-       '[' [listmaker] ']' |
-       '{' [dictorsetmaker] '}' |
-       '`' testlist1 '`' |
-       NAME | NUMBER | STRING+)
-listmaker: test ( list_for | (',' test)* [','] )
-testlist_comp: test ( comp_for | (',' test)* [','] )
-lambdef: 'lambda' [varargslist] ':' test
-trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
-subscriptlist: subscript (',' subscript)* [',']
-subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
-sliceop: ':' [test]
-exprlist: expr (',' expr)* [',']
-testlist: test (',' test)* [',']
-dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
-                  (test (comp_for | (',' test)* [','])) )
-
-classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
-
-arglist: (argument ',')* (argument [',']
-                         |'*' test (',' argument)* [',' '**' test]
-                         |'**' test)
-# The reason that keywords are test nodes instead of NAME is that using NAME
-# results in an ambiguity. ast.c makes sure it's a NAME.
-argument: test [comp_for] | test '=' test
-
-list_iter: list_for | list_if
-list_for: 'for' exprlist 'in' testlist_safe [list_iter]
-list_if: 'if' old_test [list_iter]
-
-comp_iter: comp_for | comp_if
-comp_for: 'for' exprlist 'in' or_test [comp_iter]
-comp_if: 'if' old_test [comp_iter]
-
-testlist1: test (',' test)*
-
-# not used in grammar, but may appear in "node" passed from Parser to Compiler
-encoding_decl: NAME
-
-yield_expr: 'yield' [testlist]
--- a/sphinx/pycode/Grammar-py3.txt
+++ b/sphinx/pycode/Grammar-py3.txt
@@ -1,143 +0,0 @@
-# Grammar for Python 3.x (with at least x <= 5)
-
-
-# IMPORTANT: when copying over a new Grammar file, make sure file_input
-# is the first nonterminal in the file!
-
-# Start symbols for the grammar:
-#       single_input is a single interactive statement;
-#       file_input is a module or sequence of commands read from an input file;
-#       eval_input is the input for the eval() functions.
-# NB: compound_stmt in single_input is followed by extra NEWLINE!
-file_input: (NEWLINE | stmt)* ENDMARKER
-single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
-eval_input: testlist NEWLINE* ENDMARKER
-
-decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
-decorators: decorator+
-decorated: decorators (classdef | funcdef | async_funcdef)
-
-async_funcdef: ASYNC funcdef
-funcdef: 'def' NAME parameters ['->' test] ':' suite
-
-parameters: '(' [typedargslist] ')'
-typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
-       ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
-     |  '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
-tfpdef: NAME [':' test]
-varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
-       ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
-     |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
-vfpdef: NAME
-
-stmt: simple_stmt | compound_stmt
-simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
-small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
-             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
-expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
-                     ('=' (yield_expr|testlist_star_expr))*)
-testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
-augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
-            '<<=' | '>>=' | '**=' | '//=')
-# For normal assignments, additional restrictions enforced by the interpreter
-del_stmt: 'del' exprlist
-pass_stmt: 'pass'
-flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
-break_stmt: 'break'
-continue_stmt: 'continue'
-return_stmt: 'return' [testlist]
-yield_stmt: yield_expr
-raise_stmt: 'raise' [test ['from' test]]
-import_stmt: import_name | import_from
-import_name: 'import' dotted_as_names
-# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
-import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
-              'import' ('*' | '(' import_as_names ')' | import_as_names))
-import_as_name: NAME ['as' NAME]
-dotted_as_name: dotted_name ['as' NAME]
-import_as_names: import_as_name (',' import_as_name)* [',']
-dotted_as_names: dotted_as_name (',' dotted_as_name)*
-dotted_name: NAME ('.' NAME)*
-global_stmt: 'global' NAME (',' NAME)*
-nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
-assert_stmt: 'assert' test [',' test]
-
-compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
-async_stmt: ASYNC (funcdef | with_stmt | for_stmt)
-if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
-while_stmt: 'while' test ':' suite ['else' ':' suite]
-for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
-try_stmt: ('try' ':' suite
-           ((except_clause ':' suite)+
-            ['else' ':' suite]
-            ['finally' ':' suite] |
-           'finally' ':' suite))
-with_stmt: 'with' with_item (',' with_item)*  ':' suite
-with_item: test ['as' expr]
-# NB compile.c makes sure that the default except clause is last
-except_clause: 'except' [test ['as' NAME]]
-suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
-
-test: or_test ['if' or_test 'else' test] | lambdef
-test_nocond: or_test | lambdef_nocond
-lambdef: 'lambda' [varargslist] ':' test
-lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
-or_test: and_test ('or' and_test)*
-and_test: not_test ('and' not_test)*
-not_test: 'not' not_test | comparison
-comparison: expr (comp_op expr)*
-# <> isn't actually a valid comparison operator in Python. It's here for the
-# sake of a __future__ import described in PEP 401 (which really works :-)
-comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
-star_expr: '*' expr
-expr: xor_expr ('|' xor_expr)*
-xor_expr: and_expr ('^' and_expr)*
-and_expr: shift_expr ('&' shift_expr)*
-shift_expr: arith_expr (('<<'|'>>') arith_expr)*
-arith_expr: term (('+'|'-') term)*
-term: factor (('*'|'@'|'/'|'%'|'//') factor)*
-factor: ('+'|'-'|'~') factor | power
-power: [AWAIT] atom trailer* ['**' factor]
-atom: ('(' [yield_expr|testlist_comp] ')' |
-       '[' [testlist_comp] ']' |
-       '{' [dictorsetmaker] '}' |
-       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
-testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
-trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
-subscriptlist: subscript (',' subscript)* [',']
-subscript: test | [test] ':' [test] [sliceop]
-sliceop: ':' [test]
-exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
-testlist: test (',' test)* [',']
-dictorsetmaker: ( ((test ':' test | '**' expr)
-                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
-                  ((test | star_expr)
-                   (comp_for | (',' (test | star_expr))* [','])) )
-
-classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
-
-arglist: argument (',' argument)*  [',']
-
-# The reason that keywords are test nodes instead of NAME is that using NAME
-# results in an ambiguity. ast.c makes sure it's a NAME.
-# "test '=' test" is really "keyword '=' test", but we have no such token.
-# These need to be in a single rule to avoid grammar that is ambiguous
-# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
-# we explicitly match '*' here, too, to give it proper precedence.
-# Illegal combinations and orderings are blocked in ast.c:
-# multiple (test comp_for) arguements are blocked; keyword unpackings
-# that precede iterable unpackings are blocked; etc.
-argument: ( test [comp_for] |
-            test '=' test |
-            '**' test |
-            '*' test )
-
-comp_iter: comp_for | comp_if
-comp_for: 'for' exprlist 'in' or_test [comp_iter]
-comp_if: 'if' test_nocond [comp_iter]
-
-# not used in grammar, but may appear in "node" passed from Parser to Compiler
-encoding_decl: NAME
-
-yield_expr: 'yield' [yield_arg]
-yield_arg: 'from' test | testlist
--- a/sphinx/pycode/init.py
+++ b/sphinx/pycode/init.py
@@ -10,174 +10,15 @@
 """
 from __future__ import print_function

-import re
-import sys
-from os import path
+from six import iteritems, BytesIO, StringIO

-from six import iteritems, text_type, BytesIO, StringIO
-
-from sphinx import package_dir
 from sphinx.errors import PycodeError
-from sphinx.pycode import nodes
-from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
+from sphinx.pycode.parser import Parser
 from sphinx.util import get_module_source, detect_encoding
-from sphinx.util.pycompat import TextIOWrapper
-from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc

 if False:
    # For type annotation
-    from typing import Any, Dict, List, Tuple  # NOQA
-
-
-# load the Python grammar
-_grammarfile = path.join(package_dir, 'pycode',
-                         'Grammar-py%d.txt' % sys.version_info[0])
-pygrammar = driver.load_grammar(_grammarfile)
-pydriver = driver.Driver(pygrammar, convert=nodes.convert)
-
-
-# an object with attributes corresponding to token and symbol names
-class sym(object):
-    pass
-
-
-for k, v in iteritems(pygrammar.symbol2number):
-    setattr(sym, k, v)
-for k, v in iteritems(token.tok_name):
-    setattr(sym, v, k)
-
-# a dict mapping terminal and nonterminal numbers to their names
-number2name = pygrammar.number2symbol.copy()
-number2name.update(token.tok_name)
-
-_eq = nodes.Leaf(token.EQUAL, '=')
-
-emptyline_re = re.compile(r'^\s*(#.*)?$')
-
-
-class AttrDocVisitor(nodes.NodeVisitor):
-    """
-    Visitor that collects docstrings for attribute assignments on toplevel and
-    in classes (class attributes and attributes set in __init__).
-
-    The docstrings can either be in special '#:' comments before the assignment
-    or in a docstring after it.
-    """
-    def init(self, scope, encoding):
-        self.scope = scope
-        self.in_init = 0
-        self.encoding = encoding
-        self.namespace = []  # type: List[unicode]
-        self.collected = {}  # type: Dict[Tuple[unicode, unicode], unicode]
-        self.tagnumber = 0
-        self.tagorder = {}   # type: Dict[unicode, int]
-
-    def add_tag(self, name):
-        name = '.'.join(self.namespace + [name])
-        self.tagorder[name] = self.tagnumber
-        self.tagnumber += 1
-
-    def visit_classdef(self, node):
-        """Visit a class."""
-        self.add_tag(node[1].value)
-        self.namespace.append(node[1].value)
-        self.generic_visit(node)
-        self.namespace.pop()
-
-    def visit_funcdef(self, node):
-        """Visit a function (or method)."""
-        # usually, don't descend into functions -- nothing interesting there
-        self.add_tag(node[1].value)
-        if node[1].value == '__init__':
-            # however, collect attributes set in __init__ methods
-            self.in_init += 1
-            self.generic_visit(node)
-            self.in_init -= 1
-
-    def visit_expr_stmt(self, node):
-        """Visit an assignment which may have a special comment before (or
-        after) it.
-        """
-        if _eq not in node.children:
-            # not an assignment (we don't care for augmented assignments)
-            return
-        # look *after* the node; there may be a comment prefixing the NEWLINE
-        # of the simple_stmt
-        parent = node.parent
-        idx = parent.children.index(node) + 1
-        while idx < len(parent):
-            if parent[idx].type == sym.SEMI:  # type: ignore
-                idx += 1
-                continue  # skip over semicolon
-            if parent[idx].type == sym.NEWLINE:  # type: ignore
-                prefix = parent[idx].get_prefix()
-                if not isinstance(prefix, text_type):
-                    prefix = prefix.decode(self.encoding)
-                docstring = prepare_commentdoc(prefix)
-                if docstring:
-                    self.add_docstring(node, docstring)
-                    return  # don't allow docstrings both before and after
-            break
-        # now look *before* the node
-        pnode = node[0]
-        prefix = pnode.get_prefix()
-        # if the assignment is the first statement on a new indentation
-        # level, its preceding whitespace and comments are not assigned
-        # to that token, but the first INDENT or DEDENT token
-        while not prefix:
-            pnode = pnode.get_prev_leaf()
-            if not pnode or pnode.type not in (token.INDENT, token.DEDENT):
-                break
-            prefix = pnode.get_prefix()
-        if not isinstance(prefix, text_type):
-            prefix = prefix.decode(self.encoding)
-        docstring = prepare_commentdoc(prefix)
-        self.add_docstring(node, docstring)
-
-    def visit_simple_stmt(self, node):
-        """Visit a docstring statement which may have an assignment before."""
-        if node[0].type != token.STRING:
-            # not a docstring; but still need to visit children
-            return self.generic_visit(node)
-        prev = node.get_prev_sibling()
-        if not prev:
-            return
-        if (prev.type == sym.simple_stmt and  # type: ignore
-           prev[0].type == sym.expr_stmt and _eq in prev[0].children):  # type: ignore
-            # need to "eval" the string because it's returned in its
-            # original form
-            docstring = literals.evalString(node[0].value, self.encoding)
-            docstring = prepare_docstring(docstring)
-            self.add_docstring(prev[0], docstring)
-
-    def add_docstring(self, node, docstring):
-        # add an item for each assignment target
-        for i in range(0, len(node) - 1, 2):
-            target = node[i]
-            if self.in_init and self.number2name[target.type] == 'power':
-                # maybe an attribute assignment -- check necessary conditions
-                if (  # node must have two children
-                        len(target) != 2 or
-                        # first child must be "self"
-                        target[0].type != token.NAME or target[0].value != 'self' or
-                        # second child must be a "trailer" with two children
-                        self.number2name[target[1].type] != 'trailer' or
-                        len(target[1]) != 2 or
-                        # first child must be a dot, second child a name
-                        target[1][0].type != token.DOT or
-                        target[1][1].type != token.NAME):
-                    continue
-                name = target[1][1].value
-            elif target.type != token.NAME:
-                # don't care about other complex targets
-                continue
-            else:
-                name = target.value
-            self.add_tag(name)
-            if docstring:
-                namespace = '.'.join(self.namespace)
-                if namespace.startswith(self.scope):
-                    self.collected[namespace, name] = docstring
+    from typing import Any, Dict, IO, List, Tuple  # NOQA


 class ModuleAnalyzer(object):
@@ -223,137 +64,59 @@ class ModuleAnalyzer(object):
        return obj

    def __init__(self, source, modname, srcname, decoded=False):
-        # name of the module
-        self.modname = modname
-        # name of the source file
-        self.srcname = srcname
-        # file-like object yielding source lines
-        self.source = source
+        # type: (IO, unicode, unicode, bool) -> None
+        self.modname = modname  # name of the module
+        self.srcname = srcname  # name of the source file

        # cache the source code as well
-        pos = self.source.tell()
+        pos = source.tell()
        if not decoded:
-            self.encoding = detect_encoding(self.source.readline)
-            self.source.seek(pos)
-            self.code = self.source.read().decode(self.encoding)
-            self.source.seek(pos)
-            self.source = TextIOWrapper(self.source, self.encoding)
+            self.encoding = detect_encoding(source.readline)
+            source.seek(pos)
+            self.code = source.read().decode(self.encoding)
        else:
            self.encoding = None
-            self.code = self.source.read()
-            self.source.seek(pos)
+            self.code = source.read()

-        # will be filled by tokenize()
-        self.tokens = None      # type: List[unicode]
        # will be filled by parse()
-        self.parsetree = None   # type: Any
-        # will be filled by find_attr_docs()
-        self.attr_docs = None   # type: List[unicode]
+        self.attr_docs = None   # type: Dict[Tuple[unicode, unicode], List[unicode]]
        self.tagorder = None    # type: Dict[unicode, int]
-        # will be filled by find_tags()
-        self.tags = None        # type: List[unicode]
-
-    def tokenize(self):
-        """Generate tokens from the source."""
-        if self.tokens is not None:
-            return
-        try:
-            self.tokens = list(tokenize.generate_tokens(self.source.readline))
-        except tokenize.TokenError as err:
-            raise PycodeError('tokenizing failed', err)
-        self.source.close()
+        self.tags = None        # type: Dict[unicode, Tuple[unicode, int, int]]

    def parse(self):
-        """Parse the generated source tokens."""
-        if self.parsetree is not None:
-            return
-        self.tokenize()
+        # type: () -> None
+        """Parse the source code."""
        try:
-            self.parsetree = pydriver.parse_tokens(self.tokens)
-        except parse.ParseError as err:
-            raise PycodeError('parsing failed', err)
+            parser = Parser(self.code, self.encoding)
+            parser.parse()

-    def find_attr_docs(self, scope=''):
+            self.attr_docs = {}
+            for (scope, comment) in iteritems(parser.comments):
+                if comment:
+                    self.attr_docs[scope] = comment.splitlines() + ['']
+                else:
+                    self.attr_docs[scope] = ['']
+
+            self.tags = parser.definitions
+            self.tagorder = parser.deforders
+        except Exception as exc:
+            raise PycodeError('parsing failed: %r' % exc)
+
+    def find_attr_docs(self):
+        # type: () -> Dict[Tuple[unicode, unicode], List[unicode]]
        """Find class and module-level attributes and their documentation."""
-        if self.attr_docs is not None:
-            return self.attr_docs
-        self.parse()
-        attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)
-        attr_visitor.visit(self.parsetree)
-        self.attr_docs = attr_visitor.collected
-        self.tagorder = attr_visitor.tagorder
-        # now that we found everything we could in the tree, throw it away
-        # (it takes quite a bit of memory for large modules)
-        self.parsetree = None
-        return attr_visitor.collected
+        if self.attr_docs is None:
+            self.parse()
+
+        return self.attr_docs

    def find_tags(self):
+        # type: () -> Dict[unicode, Tuple[unicode, int, int]]
        """Find class, function and method definitions and their location."""
-        if self.tags is not None:
-            return self.tags
-        self.tokenize()
-        result = {}
-        namespace = []  # type: List[unicode]
-        stack = []      # type: List[Tuple[unicode, unicode, unicode, int]]
-        indent = 0
-        decopos = None
-        defline = False
-        expect_indent = False
-        emptylines = 0
+        if self.tags is None:
+            self.parse()

-        def tokeniter(ignore = (token.COMMENT,)):
-            for tokentup in self.tokens:
-                if tokentup[0] not in ignore:
-                    yield tokentup
-        tokeniter = tokeniter()
-        for type, tok, spos, epos, line in tokeniter:  # type: ignore
-            if expect_indent and type != token.NL:
-                if type != token.INDENT:
-                    # no suite -- one-line definition
-                    assert stack
-                    dtype, fullname, startline, _ = stack.pop()
-                    endline = epos[0]
-                    namespace.pop()
-                    result[fullname] = (dtype, startline, endline - emptylines)
-                expect_indent = False
-            if tok in ('def', 'class'):
-                name = next(tokeniter)[1]  # type: ignore
-                namespace.append(name)
-                fullname = '.'.join(namespace)
-                stack.append((tok, fullname, decopos or spos[0], indent))
-                defline = True
-                decopos = None
-            elif type == token.OP and tok == '@':
-                if decopos is None:
-                    decopos = spos[0]
-            elif type == token.INDENT:
-                expect_indent = False
-                indent += 1
-            elif type == token.DEDENT:
-                indent -= 1
-                # if the stacklevel is the same as it was before the last
-                # def/class block, this dedent closes that block
-                if stack and indent == stack[-1][3]:
-                    dtype, fullname, startline, _ = stack.pop()
-                    endline = spos[0]
-                    namespace.pop()
-                    result[fullname] = (dtype, startline, endline - emptylines)
-            elif type == token.NEWLINE:
-                # if this line contained a definition, expect an INDENT
-                # to start the suite; if there is no such INDENT
-                # it's a one-line definition
-                if defline:
-                    defline = False
-                    expect_indent = True
-                emptylines = 0
-            elif type == token.NL:
-                # count up if line is empty or comment only
-                if emptyline_re.match(line):
-                    emptylines += 1
-                else:
-                    emptylines = 0
-        self.tags = result
-        return result
+        return self.tags


 if __name__ == '__main__':
--- a/sphinx/pycode/nodes.py
+++ b/sphinx/pycode/nodes.py
@@ -1,212 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-    sphinx.pycode.nodes
-    ~~~~~~~~~~~~~~~~~~~
-
-    Parse tree node implementations.
-
-    :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
-    :license: BSD, see LICENSE for details.
-"""
-
-if False:
-    # For type annotation
-    from typing import Callable  # NOQA
-
-
-class BaseNode(object):
-    """
-    Node superclass for both terminal and nonterminal nodes.
-    """
-    parent = None  # type: BaseNode
-
-    def _eq(self, other):
-        raise NotImplementedError
-
-    def __eq__(self, other):
-        if self.__class__ is not other.__class__:
-            return NotImplemented
-        return self._eq(other)
-
-    def __ne__(self, other):
-        if self.__class__ is not other.__class__:
-            return NotImplemented
-        return not self._eq(other)
-
-    __hash__ = None  # type: Callable[[object], int]
-
-    def get_prev_sibling(self):
-        """Return previous child in parent's children, or None."""
-        if self.parent is None:
-            return None
-        for i, child in enumerate(self.parent.children):
-            if child is self:
-                if i == 0:
-                    return None
-                return self.parent.children[i - 1]
-
-    def get_next_sibling(self):
-        """Return next child in parent's children, or None."""
-        if self.parent is None:
-            return None
-        for i, child in enumerate(self.parent.children):
-            if child is self:
-                try:
-                    return self.parent.children[i + 1]
-                except IndexError:
-                    return None
-
-    def get_prev_leaf(self):
-        """Return the leaf node that precedes this node in the parse tree."""
-        def last_child(node):
-            if isinstance(node, Leaf):
-                return node
-            elif not node.children:
-                return None
-            else:
-                return last_child(node.children[-1])
-        if self.parent is None:
-            return None
-        prev = self.get_prev_sibling()
-        if isinstance(prev, Leaf):
-            return prev
-        elif prev is not None:
-            return last_child(prev)
-        return self.parent.get_prev_leaf()
-
-    def get_next_leaf(self):
-        """Return self if leaf, otherwise the leaf node that succeeds this
-        node in the parse tree.
-        """
-        node = self
-        while not isinstance(node, Leaf):
-            assert node.children
-            node = node.children[0]
-        return node
-
-    def get_lineno(self):
-        """Return the line number which generated the invocant node."""
-        return self.get_next_leaf().lineno
-
-    def get_prefix(self):
-        """Return the prefix of the next leaf node."""
-        # only leaves carry a prefix
-        return self.get_next_leaf().prefix
-
-
-class Node(BaseNode):
-    """
-    Node implementation for nonterminals.
-    """
-
-    def __init__(self, type, children, context=None):
-        # type of nonterminals is >= 256
-        # assert type >= 256, type
-        self.type = type
-        self.children = list(children)
-        for ch in self.children:
-            # assert ch.parent is None, repr(ch)
-            ch.parent = self
-
-    def __repr__(self):
-        return '%s(%s, %r)' % (self.__class__.__name__,
-                               self.type, self.children)
-
-    def __str__(self):
-        """This reproduces the input source exactly."""
-        return ''.join(map(str, self.children))
-
-    def _eq(self, other):
-        return (self.type, self.children) == (other.type, other.children)
-
-    # support indexing the node directly instead of .children
-
-    def __getitem__(self, index):
-        return self.children[index]
-
-    def __iter__(self):
-        return iter(self.children)
-
-    def __len__(self):
-        return len(self.children)
-
-
-class Leaf(BaseNode):
-    """
-    Node implementation for leaf nodes (terminals).
-    """
-    prefix = ''  # Whitespace and comments preceding this token in the input
-    lineno = 0   # Line where this token starts in the input
-    column = 0   # Column where this token tarts in the input
-
-    def __init__(self, type, value, context=None):
-        # type of terminals is below 256
-        # assert 0 <= type < 256, type
-        self.type = type
-        self.value = value
-        if context is not None:
-            self.prefix, (self.lineno, self.column) = context
-
-    def __repr__(self):
-        return '%s(%r, %r, %r)' % (self.__class__.__name__,
-                                   self.type, self.value, self.prefix)
-
-    def __str__(self):
-        """This reproduces the input source exactly."""
-        return self.prefix + str(self.value)
-
-    def _eq(self, other):
-        """Compares two nodes for equality."""
-        return (self.type, self.value) == (other.type, other.value)
-
-
-def convert(grammar, raw_node):
-    """Convert raw node to a Node or Leaf instance."""
-    type, value, context, children = raw_node
-    if children or type in grammar.number2symbol:
-        # If there's exactly one child, return that child instead of
-        # creating a new node.
-        if len(children) == 1:
-            return children[0]
-        return Node(type, children, context=context)
-    else:
-        return Leaf(type, value, context=context)
-
-
-def nice_repr(node, number2name, prefix=False):
-    def _repr(node):
-        if isinstance(node, Leaf):
-            return "%s(%r)" % (number2name[node.type], node.value)
-        else:
-            return "%s(%s)" % (number2name[node.type],
-                               ', '.join(map(_repr, node.children)))
-
-    def _prepr(node):
-        if isinstance(node, Leaf):
-            return "%s(%r, %r)" % (number2name[node.type],
-                                   node.prefix, node.value)
-        else:
-            return "%s(%s)" % (number2name[node.type],
-                               ', '.join(map(_prepr, node.children)))
-    return (prefix and _prepr or _repr)(node)
-
-
-class NodeVisitor(object):
-    def __init__(self, number2name, *args):
-        self.number2name = number2name
-        self.init(*args)
-
-    def init(self, *args):
-        pass
-
-    def visit(self, node):
-        """Visit a node."""
-        method = 'visit_' + self.number2name[node.type]
-        visitor = getattr(self, method, self.generic_visit)
-        return visitor(node)
-
-    def generic_visit(self, node):
-        """Called if no explicit visitor function exists for a node."""
-        if isinstance(node, Node):
-            for child in node:  # type: ignore
-                self.visit(child)
--- a/sphinx/pycode/parser.py
+++ b/sphinx/pycode/parser.py
@@ -0,0 +1,463 @@
+# -*- coding: utf-8 -*-
+"""
+    sphinx.pycode.parser
+    ~~~~~~~~~~~~~~~~~~~~
+
+    Utilities parsing and analyzing Python code.
+
+    :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+import re
+import ast
+import inspect
+import tokenize
+import itertools
+from token import NAME, NEWLINE, INDENT, DEDENT, NUMBER, OP, STRING
+from tokenize import COMMENT, NL
+
+from six import PY2, text_type
+
+if False:
+    # For type annotation
+    from typing import Any, Dict, IO, List, Tuple  # NOQA
+
+comment_re = re.compile(u'^\\s*#: ?(.*)\r?\n?$')
+indent_re = re.compile(u'^\\s*$')
+emptyline_re = re.compile(u'^\\s*(#.*)?$')
+
+
+def get_lvar_names(node, self=None):
+    # type: (ast.AST, ast.expr) -> List[unicode]
+    """Convert assignment-AST to variable names.
+
+    This raises `TypeError` if the assignment does not create new variable::
+
+        ary[0] = 'foo'
+        dic["bar"] = 'baz'
+        # => TypeError
+    """
+    if self:
+        if PY2:
+            self_id = self.id  # type: ignore
+        else:
+            self_id = self.arg
+
+    node_name = node.__class__.__name__
+    if node_name in ('Index', 'Num', 'Slice', 'Str', 'Subscript'):
+        raise TypeError('%r does not create new variable' % node)
+    elif node_name == 'Name':
+        if self is None or node.id == self_id:  # type: ignore
+            return [node.id]  # type: ignore
+        else:
+            raise TypeError('The assignment %r is not instance variable' % node)
+    elif node_name == 'Tuple':
+        members = [get_lvar_names(elt) for elt in node.elts]  # type: ignore
+        return sum(members, [])
+    elif node_name == 'Attribute':
+        if node.value.__class__.__name__ == 'Name' and self and node.value.id == self_id:  # type: ignore  # NOQA
+            # instance variable
+            return ["%s" % get_lvar_names(node.attr, self)[0]]  # type: ignore
+        else:
+            raise TypeError('The assignment %r is not instance variable' % node)
+    elif node_name == 'str':
+        return [node]  # type: ignore
+    else:
+        raise NotImplementedError
+
+
+def dedent_docstring(s):
+    # type: (unicode) -> unicode
+    """Remove common leading indentation from docstring."""
+    def dummy():
+        # dummy function to mock `inspect.getdoc`.
+        pass
+
+    dummy.__doc__ = s  # type: ignore
+    docstring = inspect.getdoc(dummy)
+    return docstring.lstrip("\r\n").rstrip("\r\n")
+
+
+class Token(object):
+    """Better token wrapper for tokenize module."""
+
+    def __init__(self, kind, value, start, end, source):
+        # type: (int, Any, Tuple[int, int], Tuple[int, int], unicode) -> None  # NOQA
+        self.kind = kind
+        self.value = value
+        self.start = start
+        self.end = end
+        self.source = source
+
+    def __eq__(self, other):
+        # type: (Any) -> bool
+        if isinstance(other, int):
+            return self.kind == other
+        elif isinstance(other, str):
+            return self.value == other
+        elif isinstance(other, (list, tuple)):
+            return [self.kind, self.value] == list(other)
+        elif other is None:
+            return False
+        else:
+            raise ValueError('Unknown value: %r' % other)
+
+    def __ne__(self, other):
+        # type: (Any) -> bool
+        return not (self == other)
+
+    def match(self, *conditions):
+        # type: (Any) -> bool
+        return any(self == candidate for candidate in conditions)
+
+    def __repr__(self):
+        # type: () -> str
+        return '<Token kind=%r value=%r>' % (tokenize.tok_name[self.kind],
+                                             self.value.strip())
+
+
+class TokenProcessor(object):
+    def __init__(self, buffers):
+        # type: (List[unicode]) -> None
+        lines = iter(buffers)
+        self.buffers = buffers
+        self.tokens = tokenize.generate_tokens(lambda: next(lines))  # type: ignore  # NOQA
+        self.current = None     # type: Token
+        self.previous = None    # type: Token
+
+    def get_line(self, lineno):
+        # type: (int) -> unicode
+        """Returns specified line."""
+        return self.buffers[lineno - 1]
+
+    def fetch_token(self):
+        # type: () -> Token
+        """Fetch a next token from source code.
+
+        Returns ``False`` if sequence finished.
+        """
+        try:
+            self.previous = self.current
+            self.current = Token(*next(self.tokens))
+        except StopIteration:
+            self.current = None
+
+        return self.current
+
+    def fetch_until(self, condition):
+        # type: (Any) -> List[Token]
+        """Fetch tokens until specified token appeared.
+
+        .. note:: This also handles parenthesis well.
+        """
+        tokens = []
+        while self.fetch_token():
+            tokens.append(self.current)
+            if self.current == condition:
+                break
+            elif self.current == [OP, '(']:
+                tokens += self.fetch_until([OP, ')'])
+            elif self.current == [OP, '{']:
+                tokens += self.fetch_until([OP, '}'])
+            elif self.current == [OP, '[']:
+                tokens += self.fetch_until([OP, ']'])
+
+        return tokens
+
+
+class AfterCommentParser(TokenProcessor):
+    """Python source code parser to pick up comment after assignment.
+
+    This parser takes a python code starts with assignment statement,
+    and returns the comments for variable if exists.
+    """
+
+    def __init__(self, lines):
+        # type: (List[unicode]) -> None
+        super(AfterCommentParser, self).__init__(lines)
+        self.comment = None  # type: unicode
+
+    def fetch_rvalue(self):
+        # type: () -> List[Token]
+        """Fetch right-hand value of assignment."""
+        tokens = []
+        while self.fetch_token():
+            tokens.append(self.current)
+            if self.current == [OP, '(']:
+                tokens += self.fetch_until([OP, ')'])
+            elif self.current == [OP, '{']:
+                tokens += self.fetch_until([OP, '}'])
+            elif self.current == [OP, '[']:
+                tokens += self.fetch_until([OP, ']'])
+            elif self.current == INDENT:
+                tokens += self.fetch_until(DEDENT)
+            elif self.current == [OP, ';']:
+                break
+            elif self.current.kind not in (OP, NAME, NUMBER, STRING):
+                break
+
+        return tokens
+
+    def parse(self):
+        # type: () -> None
+        """Parse the code and obtain comment after assignment."""
+        # skip lvalue (until '=' operator)
+        while self.fetch_token() != [OP, '=']:
+            assert self.current
+
+        # skip rvalue
+        self.fetch_rvalue()
+
+        if self.current == COMMENT:
+            self.comment = self.current.value
+
+
+class VariableCommentPicker(ast.NodeVisitor):
+    """Python source code parser to pick up variable comments."""
+
+    def __init__(self, buffers, encoding):
+        # type: (List[unicode], unicode) -> None
+        self.counter = itertools.count()
+        self.buffers = buffers
+        self.encoding = encoding
+        self.context = []               # type: List[unicode]
+        self.current_classes = []       # type: List[unicode]
+        self.current_function = None    # type: ast.FunctionDef
+        self.comments = {}              # type: Dict[Tuple[unicode, unicode], unicode]
+        self.previous = None            # type: ast.AST
+        self.deforders = {}             # type: Dict[unicode, int]
+        super(VariableCommentPicker, self).__init__()
+
+    def add_entry(self, name):
+        # type: (unicode) -> None
+        if self.current_function:
+            if self.current_classes and self.context[-1] == "__init__":
+                # store variable comments inside __init__ method of classes
+                definition = self.context[:-1] + [name]
+            else:
+                return
+        else:
+            definition = self.context + [name]
+
+        self.deforders[".".join(definition)] = next(self.counter)
+
+    def add_variable_comment(self, name, comment):
+        # type: (unicode, unicode) -> None
+        if self.current_function:
+            if self.current_classes and self.context[-1] == "__init__":
+                # store variable comments inside __init__ method of classes
+                context = ".".join(self.context[:-1])
+            else:
+                return
+        else:
+            context = ".".join(self.context)
+
+        self.comments[(context, name)] = comment
+
+    def get_self(self):
+        # type: () -> ast.expr
+        """Returns the name of first argument if in function."""
+        if self.current_function and self.current_function.args.args:
+            return self.current_function.args.args[0]
+        else:
+            return None
+
+    def get_line(self, lineno):
+        # type: (int) -> unicode
+        """Returns specified line."""
+        return self.buffers[lineno - 1]
+
+    def visit(self, node):
+        # type: (ast.AST) -> None
+        """Updates self.previous to ."""
+        super(VariableCommentPicker, self).visit(node)
+        self.previous = node
+
+    def visit_Assign(self, node):
+        # type: (ast.Assign) -> None
+        """Handles Assign node and pick up a variable comment."""
+        try:
+            varnames = sum([get_lvar_names(t, self=self.get_self()) for t in node.targets], [])  # type: ignore  # NOQA
+            current_line = self.get_line(node.lineno)
+        except TypeError:
+            return  # this assignment is not new definition!
+
+        # check comments after assignment
+        parser = AfterCommentParser([current_line[node.col_offset:]] +
+                                    self.buffers[node.lineno:])
+        parser.parse()
+        if parser.comment and comment_re.match(parser.comment):
+            for varname in varnames:
+                self.add_variable_comment(varname, comment_re.sub('\\1', parser.comment))
+                self.add_entry(varname)
+            return
+
+        # check comments before assignment
+        if indent_re.match(current_line[:node.col_offset]):
+            comment_lines = []
+            for i in range(node.lineno - 1):
+                before_line = self.get_line(node.lineno - 1 - i)
+                if comment_re.match(before_line):
+                    comment_lines.append(comment_re.sub('\\1', before_line))
+                else:
+                    break
+
+            if comment_lines:
+                comment = dedent_docstring('\n'.join(reversed(comment_lines)))
+                for varname in varnames:
+                    self.add_variable_comment(varname, comment)
+                    self.add_entry(varname)
+                return
+
+        # not commented (record deforders only)
+        for varname in varnames:
+            self.add_entry(varname)
+
+    def visit_Expr(self, node):
+        # type: (ast.Expr) -> None
+        """Handles Expr node and pick up a comment if string."""
+        if (isinstance(self.previous, ast.Assign) and isinstance(node.value, ast.Str)):
+            try:
+                varnames = get_lvar_names(self.previous.targets[0], self.get_self())
+                for varname in varnames:
+                    if isinstance(node.value.s, text_type):
+                        docstring = node.value.s
+                    else:
+                        docstring = node.value.s.decode(self.encoding or 'utf-8')
+
+                    self.add_variable_comment(varname, dedent_docstring(docstring))
+                    self.add_entry(varname)
+            except TypeError:
+                pass  # this assignment is not new definition!
+
+    def visit_ClassDef(self, node):
+        # type: (ast.ClassDef) -> None
+        """Handles ClassDef node and set context."""
+        self.current_classes.append(node.name)
+        self.add_entry(node.name)
+        self.context.append(node.name)
+        for child in node.body:
+            self.visit(child)
+        self.context.pop()
+        self.current_classes.pop()
+
+    def visit_FunctionDef(self, node):
+        # type: (ast.FunctionDef) -> None
+        """Handles FunctionDef node and set context."""
+        if self.current_function is None:
+            self.add_entry(node.name)  # should be called before setting self.current_function
+            self.context.append(node.name)
+            self.current_function = node
+            for child in node.body:
+                self.visit(child)
+            self.context.pop()
+            self.current_function = None
+
+
+class DefinitionFinder(TokenProcessor):
+    def __init__(self, lines):
+        # type: (List[unicode]) -> None
+        super(DefinitionFinder, self).__init__(lines)
+        self.decorator = None   # type: Token
+        self.context = []       # type: List[unicode]
+        self.indents = []       # type: List
+        self.definitions = {}   # type: Dict[unicode, Tuple[unicode, int, int]]
+
+    def add_definition(self, name, entry):
+        # type: (unicode, Tuple[unicode, int, int]) -> None
+        if self.indents and self.indents[-1][0] == 'def' and entry[0] == 'def':
+            # ignore definition of inner function
+            pass
+        else:
+            self.definitions[name] = entry
+
+    def parse(self):
+        # type: () -> None
+        while True:
+            token = self.fetch_token()
+            if token is None:
+                break
+            elif token == COMMENT:
+                pass
+            elif token == [OP, '@'] and (self.previous is None or
+                                         self.previous.match(NEWLINE, NL, INDENT, DEDENT)):
+                if self.decorator is None:
+                    self.decorator = token
+            elif token.match([NAME, 'class']):
+                self.parse_definition('class')
+            elif token.match([NAME, 'def']):
+                self.parse_definition('def')
+            elif token == INDENT:
+                self.indents.append(('other', None, None))
+            elif token == DEDENT:
+                self.finalize_block()
+
+    def parse_definition(self, typ):
+        # type: (unicode) -> None
+        name = self.fetch_token()
+        self.context.append(name.value)
+        funcname = '.'.join(self.context)
+
+        if self.decorator:
+            start_pos = self.decorator.start[0]
+            self.decorator = None
+        else:
+            start_pos = name.start[0]
+
+        self.fetch_until([OP, ':'])
+        if self.fetch_token().match(COMMENT, NEWLINE):
+            self.fetch_until(INDENT)
+            self.indents.append((typ, funcname, start_pos))
+        else:
+            # one-liner
+            self.add_definition(funcname, (typ, start_pos, name.end[0]))
+            self.context.pop()
+
+    def finalize_block(self):
+        # type: () -> None
+        definition = self.indents.pop()
+        if definition[0] != 'other':
+            typ, funcname, start_pos = definition
+            end_pos = self.current.end[0] - 1
+            while emptyline_re.match(self.get_line(end_pos)):
+                end_pos -= 1
+
+            self.add_definition(funcname, (typ, start_pos, end_pos))
+            self.context.pop()
+
+
+class Parser(object):
+    """Python source code parser to pick up variable comments.
+
+    This is a better wrapper for ``VariableCommentPicker``.
+    """
+
+    def __init__(self, code, encoding='utf-8'):
+        # type: (unicode, unicode) -> None
+        self.code = code
+        self.encoding = encoding
+        self.comments = {}          # type: Dict[Tuple[unicode, unicode], unicode]
+        self.deforders = {}         # type: Dict[unicode, int]
+        self.definitions = {}       # type: Dict[unicode, Tuple[unicode, int, int]]
+
+    def parse(self):
+        # type: () -> None
+        """Parse the source code."""
+        self.parse_comments()
+        self.parse_definition()
+
+    def parse_comments(self):
+        # type: () -> None
+        """Parse the code and pick up comments."""
+        tree = ast.parse(self.code.encode('utf-8'))
+        picker = VariableCommentPicker(self.code.splitlines(True), self.encoding)
+        picker.visit(tree)
+        self.comments = picker.comments
+        self.deforders = picker.deforders
+
+    def parse_definition(self):
+        # type: () -> None
+        """Parse the location of definitions from the code."""
+        parser = DefinitionFinder(self.code.splitlines(True))
+        parser.parse()
+        self.definitions = parser.definitions
--- a/sphinx/pycode/pgen2/init.py
+++ b/sphinx/pycode/pgen2/init.py
@@ -1,4 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-"""The pgen2 package."""
--- a/sphinx/pycode/pgen2/driver.py
+++ b/sphinx/pycode/pgen2/driver.py
@@ -1,154 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Modifications:
-# Copyright 2006 Google, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-"""Parser driver.
-
-This provides a high-level interface to parse a file into a syntax tree.
-
-"""
-
-__author__ = "Guido van Rossum <guido@python.org>"
-
-__all__ = ["Driver", "load_grammar"]
-
-# Python imports
-import os
-import logging
-
-import sphinx
-
-# Pgen imports
-from sphinx.pycode.pgen2 import grammar, parse, token, tokenize, pgen
-
-
-class Driver(object):
-
-    def __init__(self, grammar, convert=None, logger=None):
-        self.grammar = grammar
-        if logger is None:
-            logger = logging.getLogger()
-        self.logger = logger
-        self.convert = convert
-
-    def parse_tokens(self, tokens, debug=False):
-        """Parse a series of tokens and return the syntax tree."""
-        # X X X Move the prefix computation into a wrapper around tokenize.
-        p = parse.Parser(self.grammar, self.convert)
-        p.setup()
-        lineno = 1
-        column = 0
-        type = value = start = end = line_text = None
-        prefix = ""
-        opmap = grammar.opmap
-        for type, value, start, end, line_text in tokens:
-            if start != (lineno, column):
-                assert (lineno, column) <= start, ((lineno, column), start)
-                s_lineno, s_column = start
-                if lineno < s_lineno:
-                    prefix += "\n" * (s_lineno - lineno)
-                    lineno = s_lineno
-                    column = 0
-                if column < s_column:
-                    prefix += line_text[column:s_column]
-                    column = s_column
-            if type in (tokenize.COMMENT, tokenize.NL):
-                prefix += value
-                lineno, column = end
-                if value.endswith("\n"):
-                    lineno += 1
-                    column = 0
-                continue
-            if type == token.OP:
-                type = opmap[value]
-            # if debug:
-            #     self.logger.debug("%s %r (prefix=%r)",
-            #                       token.tok_name[type], value, prefix)
-            if p.addtoken(type, value, (prefix, start)):
-                # if debug:
-                #     self.logger.debug("Stop.")
-                break
-            prefix = ""
-            lineno, column = end
-            if value.endswith("\n"):
-                lineno += 1
-                column = 0
-        else:
-            # We never broke out -- EOF is too soon (how can this happen???)
-            raise parse.ParseError("incomplete input", type, value, line_text)
-        return p.rootnode
-
-    def parse_stream_raw(self, stream, debug=False):
-        """Parse a stream and return the syntax tree."""
-        tokens = tokenize.generate_tokens(stream.readline)
-        return self.parse_tokens(tokens, debug)
-
-    def parse_stream(self, stream, debug=False):
-        """Parse a stream and return the syntax tree."""
-        return self.parse_stream_raw(stream, debug)
-
-    def parse_file(self, filename, debug=False):
-        """Parse a file and return the syntax tree."""
-        with open(filename) as stream:
-            return self.parse_stream(stream, debug)
-
-    def parse_string(self, text, debug=False):
-        """Parse a string and return the syntax tree."""
-        tokens = tokenize.generate_tokens(generate_lines(text).next)
-        return self.parse_tokens(tokens, debug)
-
-
-def generate_lines(text):
-    """Generator that behaves like readline without using StringIO."""
-    for line in text.splitlines(True):
-        yield line
-    while True:
-        yield ""
-
-
-def get_compiled_path(filename):
-    head, tail = os.path.splitext(filename)
-    if tail == ".txt":
-        tail = ""
-    return "%s%s.pickle" % (head, tail)
-
-
-def compile_grammar(gt='Grammar.txt', logger=None):
-    """Compile the grammer."""
-    if logger is None:
-        logger = logging.getLogger()
-
-    logger.info("Generating grammar tables from %s", gt)
-    g = pgen.generate_grammar(gt)
-    gp = get_compiled_path(gt)
-    logger.info("Writing grammar tables to %s", gp)
-    try:
-        g.dump(gp)
-    except IOError as e:
-        logger.info("Writing failed:"+str(e))
-
-
-def load_grammar(gt="Grammar.txt", logger=None):
-    """Load the grammar (maybe from a pickle)."""
-    if logger is None:
-        logger = logging.getLogger()
-    gp = get_compiled_path(gt)
-    if not os.path.exists(gp):
-        logger.info("Generating grammar tables from %s", gt)
-        g = pgen.generate_grammar(gt)
-    else:
-        g = grammar.Grammar()
-        g.load(gp)
-    return g
-
-
-def _newer(a, b):
-    """Inquire whether file a was written since file b."""
-    if not os.path.exists(a):
-        return False
-    if not os.path.exists(b):
-        return True
-    return os.path.getmtime(a) >= os.path.getmtime(b)
--- a/sphinx/pycode/pgen2/grammar.py
+++ b/sphinx/pycode/pgen2/grammar.py
@@ -1,177 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-"""This module defines the data structures used to represent a grammar.
-
-These are a bit arcane because they are derived from the data
-structures used by Python's 'pgen' parser generator.
-
-There's also a table here mapping operators to their names in the
-token module; the Python tokenize module reports all operators as the
-fallback token code OP, but the parser needs the actual token code.
-
-"""
-from __future__ import print_function
-
-# Python imports
-import pickle
-
-# Local imports
-from sphinx.pycode.pgen2 import token
-
-if False:
-    # For type annotation
-    from typing import Dict, List, Tuple  # NOQA
-
-
-class Grammar(object):
-    """Pgen parsing tables tables conversion class.
-
-    Once initialized, this class supplies the grammar tables for the
-    parsing engine implemented by parse.py.  The parsing engine
-    accesses the instance variables directly.  The class here does not
-    provide initialization of the tables; several subclasses exist to
-    do this (see the conv and pgen modules).
-
-    The load() method reads the tables from a pickle file, which is
-    much faster than the other ways offered by subclasses.  The pickle
-    file is written by calling dump() (after loading the grammar
-    tables using a subclass).  The report() method prints a readable
-    representation of the tables to stdout, for debugging.
-
-    The instance variables are as follows:
-
-    symbol2number -- a dict mapping symbol names to numbers.  Symbol
-                     numbers are always 256 or higher, to distinguish
-                     them from token numbers, which are between 0 and
-                     255 (inclusive).
-
-    number2symbol -- a dict mapping numbers to symbol names;
-                     these two are each other's inverse.
-
-    states        -- a list of DFAs, where each DFA is a list of
-                     states, each state is is a list of arcs, and each
-                     arc is a (i, j) pair where i is a label and j is
-                     a state number.  The DFA number is the index into
-                     this list.  (This name is slightly confusing.)
-                     Final states are represented by a special arc of
-                     the form (0, j) where j is its own state number.
-
-    dfas          -- a dict mapping symbol numbers to (DFA, first)
-                     pairs, where DFA is an item from the states list
-                     above, and first is a set of tokens that can
-                     begin this grammar rule (represented by a dict
-                     whose values are always 1).
-
-    labels        -- a list of (x, y) pairs where x is either a token
-                     number or a symbol number, and y is either None
-                     or a string; the strings are keywords.  The label
-                     number is the index in this list; label numbers
-                     are used to mark state transitions (arcs) in the
-                     DFAs.
-
-    start         -- the number of the grammar's start symbol.
-
-    keywords      -- a dict mapping keyword strings to arc labels.
-
-    tokens        -- a dict mapping token numbers to arc labels.
-
-    """
-
-    def __init__(self):
-        self.symbol2number = {}         # type: Dict[unicode, int]
-        self.number2symbol = {}         # type: Dict[int, unicode]
-        self.states = []                # type: List[List[List[Tuple[int, int]]]]
-        self.dfas = {}                  # type: Dict[int, Tuple[List[List[Tuple[int, int]]], unicode]]
-        self.labels = [(0, "EMPTY")]
-        self.keywords = {}              # type: Dict[unicode, unicode]
-        self.tokens = {}                # type: Dict[unicode, unicode]
-        self.symbol2label = {}          # type: Dict[unicode, unicode]
-        self.start = 256
-
-    def dump(self, filename):
-        """Dump the grammar tables to a pickle file."""
-        with open(filename, "wb") as f:
-            pickle.dump(self.__dict__, f, 2)
-
-    def load(self, filename):
-        """Load the grammar tables from a pickle file."""
-        f = open(filename, "rb")
-        d = pickle.load(f)
-        f.close()
-        self.__dict__.update(d)
-
-    def report(self):
-        """Dump the grammar tables to standard output, for debugging."""
-        from pprint import pprint
-        print("s2n")
-        pprint(self.symbol2number)
-        print("n2s")
-        pprint(self.number2symbol)
-        print("states")
-        pprint(self.states)
-        print("dfas")
-        pprint(self.dfas)
-        print("labels")
-        pprint(self.labels)
-        print("start", self.start)
-
-
-# Map from operator to number (since tokenize doesn't do this)
-
-opmap_raw = """
-( LPAR
-) RPAR
-[ LSQB
-] RSQB
-: COLON
-, COMMA
-; SEMI
-+ PLUS
- MINUS
-* STAR
-/ SLASH
-| VBAR
-& AMPER
-< LESS
-> GREATER
-= EQUAL
-. DOT
-% PERCENT
-` BACKQUOTE
-{ LBRACE
-} RBRACE
-@ AT
-@= ATEQUAL
-== EQEQUAL
-!= NOTEQUAL
-<> NOTEQUAL
-<= LESSEQUAL
->= GREATEREQUAL
-~ TILDE
-^ CIRCUMFLEX
-<< LEFTSHIFT
->> RIGHTSHIFT
-** DOUBLESTAR
-+= PLUSEQUAL
-= MINEQUAL
-*= STAREQUAL
-/= SLASHEQUAL
-%= PERCENTEQUAL
-&= AMPEREQUAL
-|= VBAREQUAL
-^= CIRCUMFLEXEQUAL
-<<= LEFTSHIFTEQUAL
->>= RIGHTSHIFTEQUAL
-**= DOUBLESTAREQUAL
-// DOUBLESLASH
-//= DOUBLESLASHEQUAL
-> RARROW
-... ELLIPSIS
-"""
-
-opmap = {}
-for line in opmap_raw.splitlines():
-    if line:
-        op, name = line.split()
-        opmap[op] = getattr(token, name)
--- a/sphinx/pycode/pgen2/literals.py
+++ b/sphinx/pycode/pgen2/literals.py
@@ -1,100 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Extended to handle raw and unicode literals by Georg Brandl.
-
-"""Safely evaluate Python string literals without using eval()."""
-from __future__ import print_function
-
-import re
-
-from six import text_type
-
-
-simple_escapes = {"a": "\a",
-                  "b": "\b",
-                  "f": "\f",
-                  "n": "\n",
-                  "r": "\r",
-                  "t": "\t",
-                  "v": "\v",
-                  "'": "'",
-                  '"': '"',
-                  "\\": "\\"}
-
-def convert_hex(x, n):
-    if len(x) < n+1:
-        raise ValueError("invalid hex string escape ('\\%s')" % x)
-    try:
-        return int(x[1:], 16)
-    except ValueError:
-        raise ValueError("invalid hex string escape ('\\%s')" % x)
-
-def escape(m):
-    all, tail = m.group(0, 1)
-    assert all.startswith("\\")
-    esc = simple_escapes.get(tail)
-    if esc is not None:
-        return esc
-    elif tail.startswith("x"):
-        return chr(convert_hex(tail, 2))
-    elif tail.startswith('u'):
-        return unichr(convert_hex(tail, 4))
-    elif tail.startswith('U'):
-        return unichr(convert_hex(tail, 8))
-    elif tail.startswith('N'):
-        import unicodedata
-        try:
-            return unicodedata.lookup(tail[1:-1])
-        except KeyError:
-            raise ValueError("undefined character name %r" % tail[1:-1])
-    else:
-        try:
-            return chr(int(tail, 8))
-        except ValueError:
-            raise ValueError("invalid octal string escape ('\\%s')" % tail)
-
-def escaperaw(m):
-    all, tail = m.group(0, 1)
-    if tail.startswith('u'):
-        return unichr(convert_hex(tail, 4))
-    elif tail.startswith('U'):
-        return unichr(convert_hex(tail, 8))
-    else:
-        return all
-
-escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})")
-uni_escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3}|"
-                           r"u[0-9a-fA-F]{0,4}|U[0-9a-fA-F]{0,8}|N\{.+?\})")
-
-def evalString(s, encoding=None):
-    regex = escape_re
-    repl = escape
-    if encoding and not isinstance(s, text_type):
-        s = s.decode(encoding)
-    if s.startswith('u') or s.startswith('U'):
-        regex = uni_escape_re
-        s = s[1:]
-    if s.startswith('r') or s.startswith('R'):
-        repl = escaperaw
-        s = s[1:]
-    assert s.startswith("'") or s.startswith('"'), repr(s[:1])
-    q = s[0]
-    if s[:3] == q*3:
-        q = q*3
-    assert s.endswith(q), repr(s[-len(q):])
-    assert len(s) >= 2*len(q)
-    s = s[len(q):-len(q)]
-    return regex.sub(repl, s)
-
-def test():
-    for i in range(256):
-        c = chr(i)
-        s = repr(c)
-        e = evalString(s)
-        if e != c:
-            print(i, c, s, e)
-
-
-if __name__ == "__main__":
-    test()
--- a/sphinx/pycode/pgen2/parse.c
+++ b/sphinx/pycode/pgen2/parse.c
--- a/sphinx/pycode/pgen2/parse.py
+++ b/sphinx/pycode/pgen2/parse.py
@@ -1,206 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-"""Parser engine for the grammar tables generated by pgen.
-
-The grammar table must be loaded first.
-
-See Parser/parser.c in the Python distribution for additional info on
-how this parsing engine works.
-
-"""
-
-# Local imports
-from sphinx.pycode.pgen2 import token
-
-if False:
-    # For type annotation
-    from typing import Any, List, Set, Tuple  # NOQA
-
-class ParseError(Exception):
-    """Exception to signal the parser is stuck."""
-
-    def __init__(self, msg, type, value, context):
-        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
-                           (msg, type, value, context))
-        self.msg = msg
-        self.type = type
-        self.value = value
-        self.context = context
-
-class Parser(object):
-    """Parser engine.
-
-    The proper usage sequence is:
-
-    p = Parser(grammar, [converter])  # create instance
-    p.setup([start])                  # prepare for parsing
-    <for each input token>:
-        if p.addtoken(...):           # parse a token; may raise ParseError
-            break
-    root = p.rootnode                 # root of abstract syntax tree
-
-    A Parser instance may be reused by calling setup() repeatedly.
-
-    A Parser instance contains state pertaining to the current token
-    sequence, and should not be used concurrently by different threads
-    to parse separate token sequences.
-
-    See driver.py for how to get input tokens by tokenizing a file or
-    string.
-
-    Parsing is complete when addtoken() returns True; the root of the
-    abstract syntax tree can then be retrieved from the rootnode
-    instance variable.  When a syntax error occurs, addtoken() raises
-    the ParseError exception.  There is no error recovery; the parser
-    cannot be used after a syntax error was reported (but it can be
-    reinitialized by calling setup()).
-
-    """
-
-    def __init__(self, grammar, convert=None):
-        """Constructor.
-
-        The grammar argument is a grammar.Grammar instance; see the
-        grammar module for more information.
-
-        The parser is not ready yet for parsing; you must call the
-        setup() method to get it started.
-
-        The optional convert argument is a function mapping concrete
-        syntax tree nodes to abstract syntax tree nodes.  If not
-        given, no conversion is done and the syntax tree produced is
-        the concrete syntax tree.  If given, it must be a function of
-        two arguments, the first being the grammar (a grammar.Grammar
-        instance), and the second being the concrete syntax tree node
-        to be converted.  The syntax tree is converted from the bottom
-        up.
-
-        A concrete syntax tree node is a (type, value, context, nodes)
-        tuple, where type is the node type (a token or symbol number),
-        value is None for symbols and a string for tokens, context is
-        None or an opaque value used for error reporting (typically a
-        (lineno, offset) pair), and nodes is a list of children for
-        symbols, and None for tokens.
-
-        An abstract syntax tree node may be anything; this is entirely
-        up to the converter function.
-
-        """
-        self.grammar = grammar
-        self.convert = convert or (lambda grammar, node: node)
-
-    def setup(self, start=None):
-        """Prepare for parsing.
-
-        This *must* be called before starting to parse.
-
-        The optional argument is an alternative start symbol; it
-        defaults to the grammar's start symbol.
-
-        You can use a Parser instance to parse any number of programs;
-        each time you call setup() the parser is reset to an initial
-        state determined by the (implicit or explicit) start symbol.
-
-        """
-        if start is None:
-            start = self.grammar.start
-        # Each stack entry is a tuple: (dfa, state, node).
-        # A node is a tuple: (type, value, context, children),
-        # where children is a list of nodes or None, and context may be None.
-        newnode = (start, None, None, [])  # type: Tuple[unicode, unicode, unicode, List]
-        stackentry = (self.grammar.dfas[start], 0, newnode)
-        self.stack = [stackentry]
-        self.rootnode = None        # type: Any
-        self.used_names = set()     # type: Set[unicode]
-                                    # Aliased to self.rootnode.used_names in pop()
-
-    def addtoken(self, type, value, context):
-        """Add a token; return True iff this is the end of the program."""
-        # Map from token to label
-        ilabel = self.classify(type, value, context)
-        # Loop until the token is shifted; may raise exceptions
-        while True:
-            dfa, state, node = self.stack[-1]
-            states, first = dfa
-            arcs = states[state]
-            # Look for a state with this label
-            for i, newstate in arcs:
-                t, v = self.grammar.labels[i]
-                if ilabel == i:
-                    # Look it up in the list of labels
-                    assert t < 256
-                    # Shift a token; we're done with it
-                    self.shift(type, value, newstate, context)
-                    # Pop while we are in an accept-only state
-                    state = newstate
-                    while states[state] == [(0, state)]:
-                        self.pop()
-                        if not self.stack:
-                            # Done parsing!
-                            return True
-                        dfa, state, node = self.stack[-1]
-                        states, first = dfa
-                    # Done with this token
-                    return False
-                elif t >= 256:
-                    # See if it's a symbol and if we're in its first set
-                    itsdfa = self.grammar.dfas[t]
-                    itsstates, itsfirst = itsdfa
-                    if ilabel in itsfirst:
-                        # Push a symbol
-                        self.push(t, self.grammar.dfas[t], newstate, context)
-                        break # To continue the outer while loop
-            else:
-                if (0, state) in arcs:
-                    # An accepting state, pop it and try something else
-                    self.pop()
-                    if not self.stack:
-                        # Done parsing, but another token is input
-                        raise ParseError("too much input",
-                                         type, value, context)
-                else:
-                    # No success finding a transition
-                    raise ParseError("bad input", type, value, context)
-
-    def classify(self, type, value, context):
-        """Turn a token into a label.  (Internal)"""
-        if type == token.NAME:
-            # Keep a listing of all used names
-            self.used_names.add(value)
-            # Check for reserved words
-            ilabel = self.grammar.keywords.get(value)
-            if ilabel is not None:
-                return ilabel
-        ilabel = self.grammar.tokens.get(type)
-        if ilabel is None:
-            raise ParseError("bad token", type, value, context)
-        return ilabel
-
-    def shift(self, type, value, newstate, context):
-        """Shift a token.  (Internal)"""
-        dfa, state, node = self.stack[-1]
-        newnode = (type, value, context, None)  # type: Tuple[unicode, unicode, unicode, List]
-        newnode = self.convert(self.grammar, newnode)
-        if newnode is not None:
-            node[-1].append(newnode)
-        self.stack[-1] = (dfa, newstate, node)
-
-    def push(self, type, newdfa, newstate, context):
-        """Push a nonterminal.  (Internal)"""
-        dfa, state, node = self.stack[-1]
-        newnode = (type, None, context, [])  # type: Tuple[unicode, unicode, unicode, List]
-        self.stack[-1] = (dfa, newstate, node)
-        self.stack.append((newdfa, 0, newnode))
-
-    def pop(self):
-        """Pop a nonterminal.  (Internal)"""
-        popdfa, popstate, popnode = self.stack.pop()
-        newnode = self.convert(self.grammar, popnode)
-        if newnode is not None:
-            if self.stack:
-                dfa, state, node = self.stack[-1]
-                node[-1].append(newnode)
-            else:
-                self.rootnode = newnode
-                self.rootnode.used_names = self.used_names
--- a/sphinx/pycode/pgen2/parse.pyx
+++ b/sphinx/pycode/pgen2/parse.pyx
@@ -1,165 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Adapted from parse.py to be compiled with Cython by Georg Brandl.
-
-"""Parser engine for the grammar tables generated by pgen.
-
-The grammar table must be loaded first.
-
-See Parser/parser.c in the Python distribution for additional info on
-how this parsing engine works.
-
-"""
-
-from sphinx.pycode.nodes import Node, Leaf
-
-DEF NAME = 1
-
-class ParseError(Exception):
-    """Exception to signal the parser is stuck."""
-
-    def __init__(self, msg, type, value, context):
-        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
-                           (msg, type, value, context))
-        self.msg = msg
-        self.type = type
-        self.value = value
-        self.context = context
-
-
-cdef class Parser:
-    cdef public object grammar
-    cdef public object rootnode
-    cdef public list stack
-    cdef public set used_names
-    cdef int  _grammar_start
-    cdef list _grammar_labels
-    cdef dict _grammar_dfas
-    cdef dict _grammar_keywords
-    cdef dict _grammar_tokens
-    cdef dict _grammar_number2symbol
-
-    def __init__(self, grammar, convert=None):
-        self.grammar = grammar
-        #self.convert = convert or noconvert
-
-        self._grammar_dfas = grammar.dfas
-        self._grammar_labels = grammar.labels
-        self._grammar_keywords = grammar.keywords
-        self._grammar_tokens = grammar.tokens
-        self._grammar_number2symbol = grammar.number2symbol
-        self._grammar_start = grammar.start
-
-    def setup(self, start=None):
-        if start is None:
-            start = self._grammar_start
-        # Each stack entry is a tuple: (dfa, state, node).
-        # A node is a tuple: (type, value, context, children),
-        # where children is a list of nodes or None, and context may be None.
-        newnode = (start, None, None, [])
-        stackentry = (self._grammar_dfas[start], 0, newnode)
-        self.stack = [stackentry]
-        self.rootnode = None
-        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
-
-    def addtoken(self, int type, value, context):
-        """Add a token; return True iff this is the end of the program."""
-        cdef int ilabel, i, t, state, newstate
-        # Map from token to label
-        ilabel = self.classify(type, value, context)
-        # Loop until the token is shifted; may raise exceptions
-        while True:
-            dfa, state, node = self.stack[-1]
-            states, first = dfa
-            arcs = states[state]
-            # Look for a state with this label
-            for i, newstate in arcs:
-                t, v = self._grammar_labels[i]
-                if ilabel == i:
-                    # Look it up in the list of labels
-                    ## assert t < 256
-                    # Shift a token; we're done with it
-                    self.shift(type, value, newstate, context)
-                    # Pop while we are in an accept-only state
-                    state = newstate
-                    while states[state] == [(0, state)]:
-                        self.pop()
-                        if not self.stack:
-                            # Done parsing!
-                            return True
-                        dfa, state, node = self.stack[-1]
-                        states, first = dfa
-                    # Done with this token
-                    return False
-                elif t >= 256:
-                    # See if it's a symbol and if we're in its first set
-                    itsdfa = self._grammar_dfas[t]
-                    itsstates, itsfirst = itsdfa
-                    if ilabel in itsfirst:
-                        # Push a symbol
-                        self.push(t, itsdfa, newstate, context)
-                        break # To continue the outer while loop
-            else:
-                if (0, state) in arcs:
-                    # An accepting state, pop it and try something else
-                    self.pop()
-                    if not self.stack:
-                        # Done parsing, but another token is input
-                        raise ParseError("too much input",
-                                         type, value, context)
-                else:
-                    # No success finding a transition
-                    raise ParseError("bad input", type, value, context)
-
-    cdef int classify(self, int type, value, context):
-        """Turn a token into a label.  (Internal)"""
-        if type == NAME:
-            # Keep a listing of all used names
-            self.used_names.add(value)
-            # Check for reserved words
-            if value in self._grammar_keywords:
-                return self._grammar_keywords[value]
-        if type not in self._grammar_tokens:
-            raise ParseError("bad token", type, value, context)
-        return self._grammar_tokens[type]
-
-    cdef void shift(self, type, value, newstate, context):
-        """Shift a token.  (Internal)"""
-        cdef tuple node
-        dfa, state, node = self.stack[-1]
-        newnode = (type, value, context, None)
-        newnode = self.convert(newnode)
-        if newnode is not None:
-            node[-1].append(newnode)
-        self.stack[-1] = (dfa, newstate, node)
-
-    cdef void push(self, type, newdfa, newstate, context):
-        """Push a nonterminal.  (Internal)"""
-        dfa, state, node = self.stack[-1]
-        newnode = (type, None, context, [])
-        self.stack[-1] = (dfa, newstate, node)
-        self.stack.append((newdfa, 0, newnode))
-
-    cdef void pop(self):
-        """Pop a nonterminal.  (Internal)"""
-        popdfa, popstate, popnode = self.stack.pop()
-        newnode = self.convert(popnode)
-        if newnode is not None:
-            if self.stack:
-                dfa, state, node = self.stack[-1]
-                node[-1].append(newnode)
-            else:
-                self.rootnode = newnode
-                self.rootnode.used_names = self.used_names
-
-    cdef convert(self, tuple raw_node):
-        type, value, context, children = raw_node
-        if children or type in self._grammar_number2symbol:
-            # If there's exactly one child, return that child instead of
-            # creating a new node.
-            if len(children) == 1:
-                return children[0]
-            return Node(type, children, context=context)
-        else:
-            return Leaf(type, value, context=context)
--- a/sphinx/pycode/pgen2/pgen.py
+++ b/sphinx/pycode/pgen2/pgen.py
@@ -1,403 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-from __future__ import print_function
-
-from six import iteritems
-from collections import OrderedDict
-
-# Pgen imports
-from sphinx.pycode.pgen2 import grammar, token, tokenize
-
-if False:
-    # For type annotation
-    from typing import Any, Dict, List, Tuple  # NOQA
-
-
-class PgenGrammar(grammar.Grammar):
-    pass
-
-class ParserGenerator(object):
-
-    def __init__(self, filename, stream=None):
-        close_stream = None
-        if stream is None:
-            stream = open(filename)
-            close_stream = stream.close
-        self.filename = filename
-        self.stream = stream
-        self.generator = tokenize.generate_tokens(stream.readline)
-        self.gettoken() # Initialize lookahead
-        self.dfas, self.startsymbol = self.parse()
-        if close_stream is not None:
-            close_stream()
-        self.first = {}     # type: Dict[unicode, List[unicode]]
-                            # map from symbol name to set of tokens
-        self.addfirstsets()
-
-    def make_grammar(self):
-        c = PgenGrammar()
-        names = list(self.dfas.keys())
-        names.sort()
-        names.remove(self.startsymbol)
-        names.insert(0, self.startsymbol)
-        for name in names:
-            i = 256 + len(c.symbol2number)
-            c.symbol2number[name] = i
-            c.number2symbol[i] = name
-        for name in names:
-            dfa = self.dfas[name]
-            states = []  # type: List[List[Tuple[int, int]]]
-            for state in dfa:
-                arcs = []
-                for label, next in iteritems(state.arcs):
-                    arcs.append((self.make_label(c, label), dfa.index(next)))
-                if state.isfinal:
-                    arcs.append((0, dfa.index(state)))
-                states.append(arcs)
-            c.states.append(states)
-            c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
-        c.start = c.symbol2number[self.startsymbol]
-        return c
-
-    def make_first(self, c, name):
-        rawfirst = self.first[name]
-        first = {}
-        for label in sorted(rawfirst):
-            ilabel = self.make_label(c, label)
-            ##assert ilabel not in first # X X X failed on <> ... !=
-            first[ilabel] = 1
-        return first
-
-    def make_label(self, c, label):
-        # X X X Maybe this should be a method on a subclass of converter?
-        ilabel = len(c.labels)
-        if label[0].isalpha():
-            # Either a symbol name or a named token
-            if label in c.symbol2number:
-                # A symbol name (a non-terminal)
-                if label in c.symbol2label:
-                    return c.symbol2label[label]
-                else:
-                    c.labels.append((c.symbol2number[label], None))
-                    c.symbol2label[label] = ilabel
-                    return ilabel
-            else:
-                # A named token (NAME, NUMBER, STRING)
-                itoken = getattr(token, label, None)
-                assert isinstance(itoken, int), label
-                assert itoken in token.tok_name, label
-                if itoken in c.tokens:
-                    return c.tokens[itoken]
-                else:
-                    c.labels.append((itoken, None))
-                    c.tokens[itoken] = ilabel
-                    return ilabel
-        else:
-            # Either a keyword or an operator
-            assert label[0] in ('"', "'"), label
-            value = eval(label)
-            if value[0].isalpha():
-                # A keyword
-                if value in c.keywords:
-                    return c.keywords[value]
-                else:
-                    c.labels.append((token.NAME, value))
-                    c.keywords[value] = ilabel
-                    return ilabel
-            else:
-                # An operator (any non-numeric token)
-                itoken = grammar.opmap[value] # Fails if unknown token
-                if itoken in c.tokens:
-                    return c.tokens[itoken]
-                else:
-                    c.labels.append((itoken, None))
-                    c.tokens[itoken] = ilabel
-                    return ilabel
-
-    def addfirstsets(self):
-        names = list(self.dfas.keys())
-        names.sort()
-        for name in names:
-            if name not in self.first:
-                self.calcfirst(name)
-            #print name, self.first[name].keys()
-
-    def calcfirst(self, name):
-        dfa = self.dfas[name]
-        self.first[name] = None # dummy to detect left recursion
-        state = dfa[0]
-        totalset = {}  # type: Dict[unicode, int]
-        overlapcheck = {}
-        for label, next in iteritems(state.arcs):
-            if label in self.dfas:
-                if label in self.first:
-                    fset = self.first[label]
-                    if fset is None:
-                        raise ValueError("recursion for rule %r" % name)
-                else:
-                    self.calcfirst(label)
-                    fset = self.first[label]
-                totalset.update(fset)
-                overlapcheck[label] = fset
-            else:
-                totalset[label] = 1
-                overlapcheck[label] = {label: 1}
-        inverse = {}  # type: Dict[unicode, unicode]
-        for label, itsfirst in sorted(overlapcheck.items()):
-            for symbol in sorted(itsfirst):
-                if symbol in inverse:
-                    raise ValueError("rule %s is ambiguous; %s is in the"
-                                     " first sets of %s as well as %s" %
-                                     (name, symbol, label, inverse[symbol]))
-                inverse[symbol] = label
-        self.first[name] = totalset
-
-    def parse(self):
-        dfas = {}
-        startsymbol = None
-        # MSTART: (NEWLINE | RULE)* ENDMARKER
-        while self.type != token.ENDMARKER:
-            while self.type == token.NEWLINE:
-                self.gettoken()
-            # RULE: NAME ':' RHS NEWLINE
-            name = self.expect(token.NAME)
-            self.expect(token.OP, ":")
-            a, z = self.parse_rhs()
-            self.expect(token.NEWLINE)
-            #self.dump_nfa(name, a, z)
-            dfa = self.make_dfa(a, z)
-            #self.dump_dfa(name, dfa)
-            #oldlen = len(dfa)
-            self.simplify_dfa(dfa)
-            #newlen = len(dfa)
-            dfas[name] = dfa
-            #print name, oldlen, newlen
-            if startsymbol is None:
-                startsymbol = name
-        return dfas, startsymbol
-
-    def make_dfa(self, start, finish):
-        # To turn an NFA into a DFA, we define the states of the DFA
-        # to correspond to *sets* of states of the NFA.  Then do some
-        # state reduction.  Let's represent sets as dicts with 1 for
-        # values.
-        assert isinstance(start, NFAState)
-        assert isinstance(finish, NFAState)
-        def closure(state):
-            base = {}  # type: Dict
-            addclosure(state, base)
-            return base
-        def addclosure(state, base):
-            assert isinstance(state, NFAState)
-            if state in base:
-                return
-            base[state] = 1
-            for label, next in state.arcs:
-                if label is None:
-                    addclosure(next, base)
-        states = [DFAState(closure(start), finish)]
-        for state in states: # NB states grows while we're iterating
-            arcs = {}  # type: Dict[unicode, Dict]
-            for nfastate in state.nfaset:
-                for label, next in nfastate.arcs:
-                    if label is not None:
-                        addclosure(next, arcs.setdefault(label, {}))
-            for label, nfaset in iteritems(arcs):
-                for st in states:
-                    if st.nfaset == nfaset:
-                        break
-                else:
-                    st = DFAState(nfaset, finish)
-                    states.append(st)
-                state.addarc(st, label)
-        return states # List of DFAState instances; first one is start
-
-    def dump_nfa(self, name, start, finish):
-        print("Dump of NFA for", name)
-        todo = [start]
-        for i, state in enumerate(todo):
-            print("  State", i, state is finish and "(final)" or "")
-            for label, next in state.arcs:
-                if next in todo:
-                    j = todo.index(next)
-                else:
-                    j = len(todo)
-                    todo.append(next)
-                if label is None:
-                    print("    -> %d" % j)
-                else:
-                    print("    %s -> %d" % (label, j))
-
-    def dump_dfa(self, name, dfa):
-        print("Dump of DFA for", name)
-        for i, state in enumerate(dfa):
-            print("  State", i, state.isfinal and "(final)" or "")
-            for label, next in iteritems(state.arcs):
-                print("    %s -> %d" % (label, dfa.index(next)))
-
-    def simplify_dfa(self, dfa):
-        # This is not theoretically optimal, but works well enough.
-        # Algorithm: repeatedly look for two states that have the same
-        # set of arcs (same labels pointing to the same nodes) and
-        # unify them, until things stop changing.
-
-        # dfa is a list of DFAState instances
-        changes = True
-        while changes:
-            changes = False
-            for i, state_i in enumerate(dfa):
-                for j in range(i+1, len(dfa)):
-                    state_j = dfa[j]
-                    if state_i == state_j:
-                        #print "  unify", i, j
-                        del dfa[j]
-                        for state in dfa:
-                            state.unifystate(state_j, state_i)
-                        changes = True
-                        break
-
-    def parse_rhs(self):
-        # RHS: ALT ('|' ALT)*
-        a, z = self.parse_alt()
-        if self.value != "|":
-            return a, z
-        else:
-            aa = NFAState()
-            zz = NFAState()
-            aa.addarc(a)
-            z.addarc(zz)
-            while self.value == "|":
-                self.gettoken()
-                a, z = self.parse_alt()
-                aa.addarc(a)
-                z.addarc(zz)
-            return aa, zz
-
-    def parse_alt(self):
-        # ALT: ITEM+
-        a, b = self.parse_item()
-        while (self.value in ("(", "[") or
-               self.type in (token.NAME, token.STRING)):
-            c, d = self.parse_item()
-            b.addarc(c)
-            b = d
-        return a, b
-
-    def parse_item(self):
-        # ITEM: '[' RHS ']' | ATOM ['+' | '*']
-        if self.value == "[":
-            self.gettoken()
-            a, z = self.parse_rhs()
-            self.expect(token.OP, "]")
-            a.addarc(z)
-            return a, z
-        else:
-            a, z = self.parse_atom()
-            value = self.value
-            if value not in ("+", "*"):
-                return a, z
-            self.gettoken()
-            z.addarc(a)
-            if value == "+":
-                return a, z
-            else:
-                return a, a
-
-    def parse_atom(self):
-        # ATOM: '(' RHS ')' | NAME | STRING
-        if self.value == "(":
-            self.gettoken()
-            a, z = self.parse_rhs()
-            self.expect(token.OP, ")")
-            return a, z
-        elif self.type in (token.NAME, token.STRING):
-            a = NFAState()
-            z = NFAState()
-            a.addarc(z, self.value)
-            self.gettoken()
-            return a, z
-        else:
-            self.raise_error("expected (...) or NAME or STRING, got %s/%s",
-                             self.type, self.value)
-
-    def expect(self, type, value=None):
-        if self.type != type or (value is not None and self.value != value):
-            self.raise_error("expected %s/%s, got %s/%s",
-                             type, value, self.type, self.value)
-        value = self.value
-        self.gettoken()
-        return value
-
-    def gettoken(self):
-        tup = next(self.generator)
-        while tup[0] in (tokenize.COMMENT, tokenize.NL):
-            tup = next(self.generator)
-        self.type, self.value, self.begin, self.end, self.line = tup
-        #print token.tok_name[self.type], repr(self.value)
-
-    def raise_error(self, msg, *args):
-        if args:
-            try:
-                msg = msg % args
-            except:
-                msg = " ".join([msg] + [str(x) for x in args])
-        raise SyntaxError(msg, (self.filename, self.end[0],
-                                self.end[1], self.line))
-
-class NFAState(object):
-
-    def __init__(self):
-        self.arcs = []  # type: List[Tuple[unicode, Any]]
-                        # list of (label, NFAState) pairs
-
-    def addarc(self, next, label=None):
-        assert label is None or isinstance(label, str)
-        assert isinstance(next, NFAState)
-        self.arcs.append((label, next))
-
-    def __hash__(self):
-        return hash(tuple(x[0] for x in self.arcs))
-
-class DFAState(object):
-
-    def __init__(self, nfaset, final):
-        assert isinstance(nfaset, dict)
-        assert isinstance(next(iter(nfaset)), NFAState)
-        assert isinstance(final, NFAState)
-        self.nfaset = nfaset
-        self.isfinal = final in nfaset
-        self.arcs = OrderedDict()   # type: OrderedDict
-                                    # map from label to DFAState
-
-    def __hash__(self):
-        return hash(tuple(self.arcs))
-
-    def addarc(self, next, label):
-        assert isinstance(label, str)
-        assert label not in self.arcs
-        assert isinstance(next, DFAState)
-        self.arcs[label] = next
-
-    def unifystate(self, old, new):
-        for label, next in iteritems(self.arcs):
-            if next is old:
-                self.arcs[label] = new
-
-    def __eq__(self, other):
-        # Equality test -- ignore the nfaset instance variable
-        assert isinstance(other, DFAState)
-        if self.isfinal != other.isfinal:
-            return False
-        # Can't just return self.arcs == other.arcs, because that
-        # would invoke this method recursively, with cycles...
-        if len(self.arcs) != len(other.arcs):
-            return False
-        for label, next in iteritems(self.arcs):
-            if next is not other.arcs.get(label):
-                return False
-        return True
-
-def generate_grammar(filename="Grammar.txt"):
-    p = ParserGenerator(filename)
-    return p.make_grammar()
--- a/sphinx/pycode/pgen2/token.py
+++ b/sphinx/pycode/pgen2/token.py
@@ -1,86 +0,0 @@
-#! /usr/bin/env python
-
-"""Token constants (from "token.h")."""
-
-#  Taken from Python (r53757) and modified to include some tokens
-#   originally monkeypatched in by pgen2.tokenize
-
-#--start constants--
-ENDMARKER = 0
-NAME = 1
-NUMBER = 2
-STRING = 3
-NEWLINE = 4
-INDENT = 5
-DEDENT = 6
-LPAR = 7
-RPAR = 8
-LSQB = 9
-RSQB = 10
-COLON = 11
-COMMA = 12
-SEMI = 13
-PLUS = 14
-MINUS = 15
-STAR = 16
-SLASH = 17
-VBAR = 18
-AMPER = 19
-LESS = 20
-GREATER = 21
-EQUAL = 22
-DOT = 23
-PERCENT = 24
-BACKQUOTE = 25
-LBRACE = 26
-RBRACE = 27
-EQEQUAL = 28
-NOTEQUAL = 29
-LESSEQUAL = 30
-GREATEREQUAL = 31
-TILDE = 32
-CIRCUMFLEX = 33
-LEFTSHIFT = 34
-RIGHTSHIFT = 35
-DOUBLESTAR = 36
-PLUSEQUAL = 37
-MINEQUAL = 38
-STAREQUAL = 39
-SLASHEQUAL = 40
-PERCENTEQUAL = 41
-AMPEREQUAL = 42
-VBAREQUAL = 43
-CIRCUMFLEXEQUAL = 44
-LEFTSHIFTEQUAL = 45
-RIGHTSHIFTEQUAL = 46
-DOUBLESTAREQUAL = 47
-DOUBLESLASH = 48
-DOUBLESLASHEQUAL = 49
-AT = 50
-ATEQUAL = 51
-RARROW = 52
-ELLIPSIS = 53
-OP = 54
-AWAIT = 55
-ASYNC = 56
-COMMENT = 57
-NL = 58
-ERRORTOKEN = 59
-N_TOKENS = 60
-NT_OFFSET = 256
-#--end constants--
-
-tok_name = {}
-for _name, _value in list(globals().items()):
-    if type(_value) is type(0):
-        tok_name[_value] = _name
-
-
-def ISTERMINAL(x):
-    return x < NT_OFFSET
-
-def ISNONTERMINAL(x):
-    return x >= NT_OFFSET
-
-def ISEOF(x):
-    return x == ENDMARKER
--- a/sphinx/pycode/pgen2/tokenize.py
+++ b/sphinx/pycode/pgen2/tokenize.py
@@ -1,441 +0,0 @@
-# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
-# All rights reserved.
-
-"""Tokenization help for Python programs.
-
-generate_tokens(readline) is a generator that breaks a stream of
-text into Python tokens.  It accepts a readline-like method which is called
-repeatedly to get the next line of input (or "" for EOF).  It generates
-5-tuples with these members:
-
-    the token type (see token.py)
-    the token (a string)
-    the starting (row, column) indices of the token (a 2-tuple of ints)
-    the ending (row, column) indices of the token (a 2-tuple of ints)
-    the original line (string)
-
-It is designed to match the working of the Python tokenizer exactly, except
-that it produces COMMENT tokens for comments and gives type OP for all
-operators
-
-Older entry points
-    tokenize_loop(readline, tokeneater)
-    tokenize(readline, tokeneater=printtoken)
-are the same, except instead of generating tokens, tokeneater is a callback
-function to which the 5 fields described above are passed as 5 arguments,
-each time a new token is found.
-"""
-
-from __future__ import print_function
-
-__author__ = 'Ka-Ping Yee <ping@lfw.org>'
-__credits__ = \
-    'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
-
-import string, re
-from six import PY3
-from sphinx.pycode.pgen2.token import *
-from sphinx.pycode.pgen2 import token
-
-if False:
-    # For type annotation
-    from typing import List  # NOQA
-
-__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
-           "generate_tokens", "untokenize"]
-del token
-
-def group(*choices): return '(' + '|'.join(choices) + ')'
-def any(*choices): return group(*choices) + '*'
-def maybe(*choices): return group(*choices) + '?'
-
-Whitespace = r'[ \f\t]*'
-Comment = r'#[^\r\n]*'
-Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
-Name = r'[a-zA-Z_]\w*'
-
-Binnumber = r'0[bB][01]*'
-Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
-Octnumber = r'0[oO]?[0-7]*[lL]?'
-Decnumber = r'[1-9]\d*[lL]?'
-Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
-Exponent = r'[eE][-+]?\d+'
-Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
-Expfloat = r'\d+' + Exponent
-Floatnumber = group(Pointfloat, Expfloat)
-Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
-Number = group(Imagnumber, Floatnumber, Intnumber)
-
-# Tail end of ' string.
-Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
-# Tail end of " string.
-Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
-# Tail end of ''' string.
-Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
-# Tail end of """ string.
-Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
-Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""')
-# Single-line ' or " string.
-String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
-               r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
-
-# Because of leftmost-then-longest match semantics, be sure to put the
-# longest operators first (e.g., if = came before ==, == would get
-# recognized as two instances of =).
-Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
-                 r"//=?", r"->",
-                 r"[+\-*/%&|^=<>]=?",
-                 r"~")
-
-Bracket = '[][(){}]'
-Special = group(r'\r?\n', r'[:;.,`@]')
-if PY3:
-    Ellipsis_ = r'\.{3}'
-    Special = group(Ellipsis_, Special)
-Funny = group(Operator, Bracket, Special)
-
-PlainToken = group(Number, Funny, String, Name)
-Token = Ignore + PlainToken
-
-# First (or only) line of ' or " string.
-ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
-                group("'", r'\\\r?\n'),
-                r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
-                group('"', r'\\\r?\n'))
-PseudoExtras = group(r'\\\r?\n', Comment, Triple)
-PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
-
-tokenprog, pseudoprog, single3prog, double3prog = [
-    re.compile(x) for x in (Token, PseudoToken, Single3, Double3)
-]
-endprogs = {"'": re.compile(Single), '"': re.compile(Double),
-            "'''": single3prog, '"""': double3prog,
-            "r'''": single3prog, 'r"""': double3prog,
-            "u'''": single3prog, 'u"""': double3prog,
-            "b'''": single3prog, 'b"""': double3prog,
-            "ur'''": single3prog, 'ur"""': double3prog,
-            "br'''": single3prog, 'br"""': double3prog,
-            "R'''": single3prog, 'R"""': double3prog,
-            "U'''": single3prog, 'U"""': double3prog,
-            "B'''": single3prog, 'B"""': double3prog,
-            "uR'''": single3prog, 'uR"""': double3prog,
-            "Ur'''": single3prog, 'Ur"""': double3prog,
-            "UR'''": single3prog, 'UR"""': double3prog,
-            "bR'''": single3prog, 'bR"""': double3prog,
-            "Br'''": single3prog, 'Br"""': double3prog,
-            "BR'''": single3prog, 'BR"""': double3prog,
-            'r': None, 'R': None,
-            'u': None, 'U': None,
-            'b': None, 'B': None}
-
-triple_quoted = {}
-for t in ("'''", '"""',
-          "r'''", 'r"""', "R'''", 'R"""',
-          "u'''", 'u"""', "U'''", 'U"""',
-          "b'''", 'b"""', "B'''", 'B"""',
-          "ur'''", 'ur"""', "Ur'''", 'Ur"""',
-          "uR'''", 'uR"""', "UR'''", 'UR"""',
-          "br'''", 'br"""', "Br'''", 'Br"""',
-          "bR'''", 'bR"""', "BR'''", 'BR"""',):
-    triple_quoted[t] = t
-single_quoted = {}
-for t in ("'", '"',
-          "r'", 'r"', "R'", 'R"',
-          "u'", 'u"', "U'", 'U"',
-          "b'", 'b"', "B'", 'B"',
-          "ur'", 'ur"', "Ur'", 'Ur"',
-          "uR'", 'uR"', "UR'", 'UR"',
-          "br'", 'br"', "Br'", 'Br"',
-          "bR'", 'bR"', "BR'", 'BR"', ):
-    single_quoted[t] = t
-
-tabsize = 8
-
-class TokenError(Exception): pass
-
-class StopTokenizing(Exception): pass
-
-def printtoken(type, token, scell, ecell, line): # for testing
-    srow, scol = scell
-    erow, ecol = ecell
-    print("%d,%d-%d,%d:\t%s\t%s" %
-          (srow, scol, erow, ecol, tok_name[type], repr(token)))
-
-def tokenize(readline, tokeneater=printtoken):
-    """
-    The tokenize() function accepts two parameters: one representing the
-    input stream, and one providing an output mechanism for tokenize().
-
-    The first parameter, readline, must be a callable object which provides
-    the same interface as the readline() method of built-in file objects.
-    Each call to the function should return one line of input as a string.
-
-    The second parameter, tokeneater, must also be a callable object. It is
-    called once for each token, with five arguments, corresponding to the
-    tuples generated by generate_tokens().
-    """
-    try:
-        tokenize_loop(readline, tokeneater)
-    except StopTokenizing:
-        pass
-
-# backwards compatible interface
-def tokenize_loop(readline, tokeneater):
-    for token_info in generate_tokens(readline):
-        tokeneater(*token_info)
-
-class Untokenizer:
-
-    def __init__(self):
-        self.tokens = []  # type: List[unicode]
-        self.prev_row = 1
-        self.prev_col = 0
-
-    def add_whitespace(self, start):
-        row, col = start
-        assert row <= self.prev_row
-        col_offset = col - self.prev_col
-        if col_offset:
-            self.tokens.append(" " * col_offset)
-
-    def untokenize(self, iterable):
-        for t in iterable:
-            if len(t) == 2:
-                self.compat(t, iterable)
-                break
-            tok_type, token, start, end, line = t
-            self.add_whitespace(start)
-            self.tokens.append(token)
-            self.prev_row, self.prev_col = end
-            if tok_type in (NEWLINE, NL):
-                self.prev_row += 1
-                self.prev_col = 0
-        return "".join(self.tokens)
-
-    def compat(self, token, iterable):
-        startline = False
-        indents = []
-        toks_append = self.tokens.append
-        toknum, tokval = token
-        if toknum in (NAME, NUMBER):
-            tokval += ' '
-        if toknum in (NEWLINE, NL):
-            startline = True
-        for tok in iterable:
-            toknum, tokval = tok[:2]
-
-            if toknum in (NAME, NUMBER):
-                tokval += ' '
-
-            if toknum == INDENT:
-                indents.append(tokval)
-                continue
-            elif toknum == DEDENT:
-                indents.pop()
-                continue
-            elif toknum in (NEWLINE, NL):
-                startline = True
-            elif startline and indents:
-                toks_append(indents[-1])
-                startline = False
-            toks_append(tokval)
-
-def untokenize(iterable):
-    """Transform tokens back into Python source code.
-
-    Each element returned by the iterable must be a token sequence
-    with at least two elements, a token number and token value.  If
-    only two tokens are passed, the resulting output is poor.
-
-    Round-trip invariant for full input:
-        Untokenized source will match input source exactly
-
-    Round-trip invariant for limited intput:
-        # Output text will tokenize the back to the input
-        t1 = [tok[:2] for tok in generate_tokens(f.readline)]
-        newcode = untokenize(t1)
-        readline = iter(newcode.splitlines(1)).next
-        t2 = [tok[:2] for tokin generate_tokens(readline)]
-        assert t1 == t2
-    """
-    ut = Untokenizer()
-    return ut.untokenize(iterable)
-
-def generate_tokens(readline):
-    """
-    The generate_tokens() generator requires one argment, readline, which
-    must be a callable object which provides the same interface as the
-    readline() method of built-in file objects. Each call to the function
-    should return one line of input as a string.  Alternately, readline
-    can be a callable function terminating with StopIteration:
-        readline = open(myfile).next    # Example of alternate readline
-
-    The generator produces 5-tuples with these members: the token type; the
-    token string; a 2-tuple (srow, scol) of ints specifying the row and
-    column where the token begins in the source; a 2-tuple (erow, ecol) of
-    ints specifying the row and column where the token ends in the source;
-    and the line on which the token was found. The line passed is the
-    logical line; continuation lines are included.
-    """
-    lnum = parenlev = continued = 0
-    namechars, numchars = string.ascii_letters + '_', '0123456789'
-    contstr, needcont = '', 0
-    contline = None
-    indents = [0]
-
-    while 1:                                   # loop over lines in stream
-        try:
-            line = readline()
-        except StopIteration:
-            line = ''
-        # if we are not at the end of the file make sure the
-        # line ends with a newline because the parser depends
-        # on that.
-        if line:
-            line = line.rstrip() + '\n'
-        lnum = lnum + 1
-        pos, max = 0, len(line)
-
-        if contstr:                            # continued string
-            if not line:
-                raise TokenError("EOF in multi-line string", strstart)  # type: ignore
-            endmatch = endprog.match(line)  # type: ignore
-            if endmatch:
-                pos = end = endmatch.end(0)
-                yield (STRING, contstr + line[:end],
-                       strstart, (lnum, end), contline + line)  # type: ignore
-                contstr, needcont = '', 0
-                contline = None
-            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
-                yield (ERRORTOKEN, contstr + line,
-                           strstart, (lnum, len(line)), contline)  # type: ignore
-                contstr = ''
-                contline = None
-                continue
-            else:
-                contstr = contstr + line
-                contline = contline + line
-                continue
-
-        elif parenlev == 0 and not continued:  # new statement
-            if not line: break
-            column = 0
-            while pos < max:                   # measure leading whitespace
-                if line[pos] == ' ': column = column + 1
-                elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
-                elif line[pos] == '\f': column = 0
-                else: break
-                pos = pos + 1
-            if pos == max: break
-
-            if line[pos] in '#\r\n':           # skip comments or blank lines
-                if line[pos] == '#':
-                    comment_token = line[pos:].rstrip('\r\n')
-                    nl_pos = pos + len(comment_token)
-                    yield (COMMENT, comment_token,
-                           (lnum, pos), (lnum, pos + len(comment_token)), line)
-                    yield (NL, line[nl_pos:],
-                           (lnum, nl_pos), (lnum, len(line)), line)
-                else:
-                    yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
-                           (lnum, pos), (lnum, len(line)), line)
-                continue
-
-            if column > indents[-1]:           # count indents or dedents
-                indents.append(column)
-                yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
-            while column < indents[-1]:
-                if column not in indents:
-                    raise IndentationError(
-                        "unindent does not match any outer indentation level",
-                        ("<tokenize>", lnum, pos, line))
-                indents = indents[:-1]
-                yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
-
-        else:                                  # continued statement
-            if not line:
-                raise TokenError("EOF in multi-line statement", (lnum, 0))
-            continued = 0
-
-        while pos < max:
-            pseudomatch = pseudoprog.match(line, pos)
-            if pseudomatch:                                # scan for tokens
-                start, end = pseudomatch.span(1)
-                spos, epos, pos = (lnum, start), (lnum, end), end
-                token, initial = line[start:end], line[start]
-
-                if end < max:
-                    next_pseudomatch = pseudoprog.match(line, end)
-                    if next_pseudomatch:
-                        n_start, n_end = next_pseudomatch.span(1)
-                        n_token  = line[n_start:n_end]
-                    else:
-                        n_token = None
-                else:
-                    n_token = None
-
-                if initial in numchars or (
-                   initial == '.' and token not in ('.', '...')
-                   ):                                      # ordinary number
-                    yield (NUMBER, token, spos, epos, line)
-                elif initial in '\r\n':
-                    newline = NEWLINE
-                    if parenlev > 0:
-                        newline = NL
-                    yield (newline, token, spos, epos, line)
-                elif initial == '#':
-                    assert not token.endswith("\n")
-                    yield (COMMENT, token, spos, epos, line)
-                elif token in triple_quoted:
-                    endprog = endprogs[token]
-                    endmatch = endprog.match(line, pos)
-                    if endmatch:                           # all on one line
-                        pos = endmatch.end(0)
-                        token = line[start:pos]
-                        yield (STRING, token, spos, (lnum, pos), line)
-                    else:
-                        strstart = (lnum, start)           # multiple lines
-                        contstr = line[start:]
-                        contline = line
-                        break
-                elif initial in single_quoted or \
-                    token[:2] in single_quoted or \
-                    token[:3] in single_quoted:
-                    if token[-1] == '\n':                  # continued string
-                        strstart = (lnum, start)
-                        endprog = (endprogs[initial] or endprogs[token[1]] or
-                                   endprogs[token[2]])
-                        contstr, needcont = line[start:], 1
-                        contline = line
-                        break
-                    else:                                  # ordinary string
-                        yield (STRING, token, spos, epos, line)
-                elif token == 'await' and n_token:
-                    yield (AWAIT, token, spos, epos, line)
-                elif token == 'async' and n_token in ('def', 'for', 'with'):
-                    yield (ASYNC, token, spos, epos, line)
-                elif initial in namechars:                 # ordinary name
-                    yield (NAME, token, spos, epos, line)
-                elif token in ('...',):                    # ordinary name
-                    yield (NAME, token, spos, epos, line)
-                elif initial == '\\':                      # continued stmt
-                    # This yield is new; needed for better idempotency:
-                    yield (NL, token, spos, (lnum, pos), line)
-                    continued = 1
-                else:
-                    if initial in '([{': parenlev = parenlev + 1
-                    elif initial in ')]}': parenlev = parenlev - 1
-                    yield (OP, token, spos, epos, line)
-            else:
-                yield (ERRORTOKEN, line[pos],
-                           (lnum, pos), (lnum, pos+1), line)
-                pos = pos + 1
-
-    for _ in indents[1:]:                      # pop remaining indent levels
-        yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
-    yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
-
-if __name__ == '__main__':                     # testing
-    import sys
-    if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
-    else: tokenize(sys.stdin.readline)
--- a/tests/test_pycode.py
+++ b/tests/test_pycode.py
@@ -9,8 +9,41 @@
    :license: BSD, see LICENSE for details.
 """

+import os
+from six import PY2
+
+import sphinx
 from sphinx.pycode import ModuleAnalyzer

+SPHINX_MODULE_PATH = os.path.splitext(sphinx.__file__)[0] + '.py'
+
+
+def test_ModuleAnalyzer_for_string():
+    analyzer = ModuleAnalyzer.for_string('print("Hello world")', 'module_name')
+    assert analyzer.modname == 'module_name'
+    assert analyzer.srcname == '<string>'
+    if PY2:
+        assert analyzer.encoding == 'ascii'
+    else:
+        assert analyzer.encoding is None
+
+
+def test_ModuleAnalyzer_for_file():
+    analyzer = ModuleAnalyzer.for_string(SPHINX_MODULE_PATH, 'sphinx')
+    assert analyzer.modname == 'sphinx'
+    assert analyzer.srcname == '<string>'
+    if PY2:
+        assert analyzer.encoding == 'ascii'
+    else:
+        assert analyzer.encoding is None
+
+
+def test_ModuleAnalyzer_for_module():
+    analyzer = ModuleAnalyzer.for_module('sphinx')
+    assert analyzer.modname == 'sphinx'
+    assert analyzer.srcname == SPHINX_MODULE_PATH
+    assert analyzer.encoding == 'utf-8'
+

 def test_ModuleAnalyzer_find_tags():
    code = ('class Foo(object):\n'  # line: 1
@@ -30,20 +63,30 @@ def test_ModuleAnalyzer_find_tags():
            '   """function baz"""\n'
            '   pass\n'
            '\n'
-            '@decorator\n'
+            '@decorator1\n'
+            '@decorator2\n'
            'def quux():\n'
-            '   pass\n')
+            '   pass\n'  # line: 21
+            '\n'
+            'class Corge(object):\n'
+            '    @decorator1\n'
+            '    @decorator2\n'
+            '    def grault(self):\n'
+            '        pass\n')
    analyzer = ModuleAnalyzer.for_string(code, 'module')
    tags = analyzer.find_tags()
    assert set(tags.keys()) == {'Foo', 'Foo.__init__', 'Foo.bar',
-                                'Foo.Baz', 'Foo.Baz.__init__', 'qux', 'quux'}
-    assert tags['Foo'] == ('class', 1, 13)  # type, start, end
-    assert tags['Foo.__init__'] == ('def', 3, 5)
-    assert tags['Foo.bar'] == ('def', 6, 9)
-    assert tags['Foo.Baz'] == ('class', 10, 13)
-    assert tags['Foo.Baz.__init__'] == ('def', 11, 13)
-    assert tags['qux'] == ('def', 14, 17)
-    assert tags['quux'] == ('def', 18, 21)  # decorator
+                                'Foo.Baz', 'Foo.Baz.__init__', 'qux', 'quux',
+                                'Corge', 'Corge.grault'}
+    assert tags['Foo'] == ('class', 1, 12)  # type, start, end
+    assert tags['Foo.__init__'] == ('def', 3, 4)
+    assert tags['Foo.bar'] == ('def', 6, 8)
+    assert tags['Foo.Baz'] == ('class', 10, 12)
+    assert tags['Foo.Baz.__init__'] == ('def', 11, 12)
+    assert tags['qux'] == ('def', 14, 16)
+    assert tags['quux'] == ('def', 18, 21)
+    assert tags['Corge'] == ('class', 23, 27)
+    assert tags['Corge.grault'] == ('def', 24, 27)


 def test_ModuleAnalyzer_find_attr_docs():
@@ -72,13 +115,17 @@ def test_ModuleAnalyzer_find_attr_docs():
            '\n'
            'def baz():\n'
            '   """function baz"""\n'
-            '   pass\n')
+            '   pass\n'
+            '\n'
+            'class Qux: attr1 = 1; attr2 = 2')
    analyzer = ModuleAnalyzer.for_string(code, 'module')
    docs = analyzer.find_attr_docs()
    assert set(docs) == {('Foo', 'attr1'),
                         ('Foo', 'attr3'),
                         ('Foo', 'attr4'),
                         ('Foo', 'attr5'),
+                         ('Foo', 'attr6'),
+                         ('Foo', 'attr7'),
                         ('Foo', 'attr8'),
                         ('Foo', 'attr9')}
    assert docs[('Foo', 'attr1')] == ['comment before attr1', '']
@@ -86,5 +133,23 @@ def test_ModuleAnalyzer_find_attr_docs():
    assert docs[('Foo', 'attr4')] == ['long attribute comment', '']
    assert docs[('Foo', 'attr4')] == ['long attribute comment', '']
    assert docs[('Foo', 'attr5')] == ['attribute comment for attr5', '']
+    assert docs[('Foo', 'attr6')] == ['this comment is ignored', '']
+    assert docs[('Foo', 'attr7')] == ['this comment is ignored', '']
    assert docs[('Foo', 'attr8')] == ['attribute comment for attr8', '']
    assert docs[('Foo', 'attr9')] == ['string after attr9', '']
+    assert analyzer.tagorder == {'Foo': 0,
+                                 'Foo.__init__': 8,
+                                 'Foo.attr1': 1,
+                                 'Foo.attr2': 2,
+                                 'Foo.attr3': 3,
+                                 'Foo.attr4': 4,
+                                 'Foo.attr5': 5,
+                                 'Foo.attr6': 6,
+                                 'Foo.attr7': 7,
+                                 'Foo.attr8': 10,
+                                 'Foo.attr9': 12,
+                                 'Foo.bar': 13,
+                                 'baz': 14,
+                                 'Qux': 15,
+                                 'Qux.attr1': 16,
+                                 'Qux.attr2': 17}
--- a/tests/test_pycode_parser.py
+++ b/tests/test_pycode_parser.py
@@ -0,0 +1,261 @@
+# -*- coding: utf-8 -*-
+"""
+    test_pycode_parser
+    ~~~~~~~~~~~~~~~~~~
+
+    Test pycode.parser.
+
+    :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+from sphinx.pycode.parser import Parser
+
+
+def test_comment_picker_basic():
+    source = ('a = 1 + 1      #: assignment\n'
+              'b = 1 +\\\n 1  #: assignment including a CR\n'
+              'c = (1 +\n 1)  #: tuple  \n'
+              'd = {1, \n 1}  #:     set\n'
+              'e = [1, \n 1]  #: list #: additional comment\n'
+              'f = "abc"\n'
+              '#: string; comment on next line (ignored)\n'
+              'g = 1.0\n'
+              '"""float; string on next line"""\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('', 'a'): 'assignment',
+                               ('', 'b'): 'assignment including a CR',
+                               ('', 'c'): 'tuple  ',
+                               ('', 'd'): '    set',
+                               ('', 'e'): 'list #: additional comment',
+                               ('', 'g'): 'float; string on next line'}
+
+
+def test_comment_picker_location():
+    # multiple "before" comments
+    source = ('#: comment before assignment1\n'
+              '#:\n'
+              '#: comment before assignment2\n'
+              'a = 1 + 1\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('', 'a'): ('comment before assignment1\n'
+                                           '\n'
+                                           'comment before assignment2')}
+
+    # before and after comments
+    source = ('#: comment before assignment\n'
+              'a = 1 + 1  #: comment after assignment\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('', 'a'): 'comment after assignment'}
+
+    # after comment and next line string
+    source = ('a = 1 + 1\n  #: comment after assignment\n'
+              '"""string on next line"""\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('', 'a'): 'string on next line'}
+
+    # before comment and next line string
+    source = ('#: comment before assignment\n'
+              'a = 1 + 1\n'
+              '"""string on next line"""\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('', 'a'): 'string on next line'}
+
+    # before comment, after comment and next line string
+    source = ('#: comment before assignment\n'
+              'a = 1 + 1  #: comment after assignment\n'
+              '"""string on next line"""\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('', 'a'): 'string on next line'}
+
+    # inside __init__ method
+    source = ('class Foo(object):\n'
+              '    def __init__(self):\n'
+              '        #: comment before assignment\n'
+              '        self.attr1 = None\n'
+              '        self.attr2 = None  #: comment after assignment\n'
+              '\n'
+              '        #: comment for attr3(1)\n'
+              '        self.attr3 = None  #: comment for attr3(2)\n'
+              '        """comment for attr3(3)"""\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('Foo', 'attr1'): 'comment before assignment',
+                               ('Foo', 'attr2'): 'comment after assignment',
+                               ('Foo', 'attr3'): 'comment for attr3(3)'}
+
+
+def test_complex_assignment():
+    source = ('a = 1 + 1; b = a  #: compound statement\n'
+              'c, d = (1, 1)  #: unpack assignment\n'
+              'e = True  #: first assignment\n'
+              'e = False  #: second assignment\n'
+              'f = g = None  #: multiple assignment at once\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('', 'b'): 'compound statement',
+                               ('', 'c'): 'unpack assignment',
+                               ('', 'd'): 'unpack assignment',
+                               ('', 'e'): 'second assignment',
+                               ('', 'f'): 'multiple assignment at once',
+                               ('', 'g'): 'multiple assignment at once'}
+    assert parser.definitions == {}
+
+
+def test_obj_assignment():
+    source = ('obj = SomeObject()  #: some object\n'
+              'obj.attr = 1  #: attr1\n'
+              'obj.attr.attr = 1  #: attr2\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('', 'obj'): 'some object'}
+    assert parser.definitions == {}
+
+
+def test_container_assignment():
+    source = ('l = []  #: list\n'
+              'l[1] = True  #: list assignment\n'
+              'l[0:0] = []  #: list assignment\n'
+              'l[_from:_to] = []  #: list assignment\n'
+              'd = {}  #: dict\n'
+              'd["doc"] = 1  #: dict assignment\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('', 'l'): 'list',
+                               ('', 'd'): 'dict'}
+    assert parser.definitions == {}
+
+
+def test_function():
+    source = ('def some_function():\n'
+              '    """docstring"""\n'
+              '    a = 1 + 1  #: comment1\n'
+              '\n'
+              '    b = a  #: comment2\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {}
+    assert parser.definitions == {'some_function': ('def', 1, 5)}
+    assert parser.deforders == {'some_function': 0}
+
+
+def test_nested_function():
+    source = ('def some_function():\n'
+              '    a = 1 + 1  #: comment1\n'
+              '\n'
+              '    def inner_function():\n'
+              '        b = 1 + 1  #: comment2\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {}
+    assert parser.definitions == {'some_function': ('def', 1, 5)}
+    assert parser.deforders == {'some_function': 0}
+
+
+def test_class():
+    source = ('class Foo(object):\n'
+              '    attr1 = None  #: comment1\n'
+              '    attr2 = None  #: comment2\n'
+              '\n'
+              '    def __init__(self):\n'
+              '        self.a = 1 + 1  #: comment3\n'
+              '        self.attr2 = 1 + 1  #: overrided\n'
+              '        b = 1 + 1  #: comment5\n'
+              '\n'
+              '    def some_method(self):\n'
+              '        c = 1 + 1  #: comment6\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('Foo', 'attr1'): 'comment1',
+                               ('Foo', 'a'): 'comment3',
+                               ('Foo', 'attr2'): 'overrided'}
+    assert parser.definitions == {'Foo': ('class', 1, 11),
+                                  'Foo.__init__': ('def', 5, 8),
+                                  'Foo.some_method': ('def', 10, 11)}
+    assert parser.deforders == {'Foo': 0,
+                                'Foo.attr1': 1,
+                                'Foo.__init__': 3,
+                                'Foo.a': 4,
+                                'Foo.attr2': 5,
+                                'Foo.some_method': 6}
+
+
+def test_class_uses_non_self():
+    source = ('class Foo(object):\n'
+              '    def __init__(this):\n'
+              '        this.a = 1 + 1  #: comment\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('Foo', 'a'): 'comment'}
+    assert parser.definitions == {'Foo': ('class', 1, 3),
+                                  'Foo.__init__': ('def', 2, 3)}
+    assert parser.deforders == {'Foo': 0,
+                                'Foo.__init__': 1,
+                                'Foo.a': 2}
+
+
+def test_nested_class():
+    source = ('class Foo(object):\n'
+              '    attr1 = None  #: comment1\n'
+              '\n'
+              '    class Bar(object):\n'
+              '        attr2 = None  #: comment2\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('Foo', 'attr1'): 'comment1',
+                               ('Foo.Bar', 'attr2'): 'comment2'}
+    assert parser.definitions == {'Foo': ('class', 1, 5),
+                                  'Foo.Bar': ('class', 4, 5)}
+    assert parser.deforders == {'Foo': 0,
+                                'Foo.attr1': 1,
+                                'Foo.Bar': 2,
+                                'Foo.Bar.attr2': 3}
+
+
+def test_comment_picker_multiline_string():
+    source = ('class Foo(object):\n'
+              '    a = None\n'
+              '    """multiline\n'
+              '    docstring\n'
+              '    """\n'
+              '    b = None\n'
+              '    """\n'
+              '    docstring\n'
+              '    starts with::\n'
+              '\n'
+              '        empty line"""\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.comments == {('Foo', 'a'): 'multiline\ndocstring',
+                               ('Foo', 'b'): 'docstring\nstarts with::\n\n    empty line'}
+
+
+def test_decorators():
+    source = ('@deco\n'
+              'def func1(): pass\n'
+              '\n'
+              '@deco(param1, param2)\n'
+              'def func2(): pass\n'
+              '\n'
+              '@deco1\n'
+              '@deco2\n'
+              'def func3(): pass\n'
+              '\n'
+              '@deco\n'
+              'class Foo():\n'
+              '    @deco1\n'
+              '    @deco2\n'
+              '    def method(self): pass\n')
+    parser = Parser(source)
+    parser.parse()
+    assert parser.definitions == {'func1': ('def', 1, 2),
+                                  'func2': ('def', 4, 5),
+                                  'func3': ('def', 7, 9),
+                                  'Foo': ('class', 11, 15),
+                                  'Foo.method': ('def', 13, 15)}
--- a/tests/test_util_docstrings.py
+++ b/tests/test_util_docstrings.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+"""
+    test_util_docstrings
+    ~~~~~~~~~~~~~~~~~~~~
+
+    Test sphinx.util.docstrings.
+
+    :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
+
+
+def test_prepare_docstring():
+    docstring = """multiline docstring
+
+                Lorem ipsum dolor sit amet, consectetur adipiscing elit,
+                sed do eiusmod tempor incididunt ut labore et dolore magna
+                aliqua::
+
+                  Ut enim ad minim veniam, quis nostrud exercitation
+                    ullamco laboris nisi ut aliquip ex ea commodo consequat.
+                """
+
+    assert (prepare_docstring(docstring) ==
+            ["multiline docstring",
+             "",
+             "Lorem ipsum dolor sit amet, consectetur adipiscing elit,",
+             "sed do eiusmod tempor incididunt ut labore et dolore magna",
+             "aliqua::",
+             "",
+             "  Ut enim ad minim veniam, quis nostrud exercitation",
+             "    ullamco laboris nisi ut aliquip ex ea commodo consequat.",
+             ""])
+    assert (prepare_docstring(docstring, 5) ==
+            ["multiline docstring",
+             "",
+             "Lorem ipsum dolor sit amet, consectetur adipiscing elit,",
+             "sed do eiusmod tempor incididunt ut labore et dolore magna",
+             "aliqua::",
+             "",
+             "Ut enim ad minim veniam, quis nostrud exercitation",
+             "  ullamco laboris nisi ut aliquip ex ea commodo consequat.",
+             ""])
+
+    docstring = """
+
+                multiline docstring with leading empty lines
+                """
+    assert (prepare_docstring(docstring) ==
+            ["multiline docstring with leading empty lines",
+             ""])
+
+    docstring = "single line docstring"
+    assert (prepare_docstring(docstring) ==
+            ["single line docstring",
+             ""])
+
+
+def test_prepare_commentdoc():
+    assert prepare_commentdoc("hello world") == []
+    assert prepare_commentdoc("#: hello world") == ["hello world", ""]
+    assert prepare_commentdoc("#:  hello world") == [" hello world", ""]
+    assert prepare_commentdoc("#: hello\n#: world\n") == ["hello", "world", ""]
--- a/utils/release-checklist
+++ b/utils/release-checklist
@@ -20,7 +20,6 @@ Release checklist
 * Check diff by `git diff`
 * `git commit -am 'Bump to x.y.z final'`
 * `make clean`
-* `python setup.py compile_grammar`
 * `python setup.py release bdist_wheel sdist upload --identity=[your key]`
 * open https://pypi.python.org/pypi/Sphinx and check there are no obvious errors
 * `git tag x.y.z` with version number