pycode: Replace pgen2 by own parser

2025-02-25 18:55:22 -06:00 · 2017-07-10 01:20:19 +09:00
parent 6900d19b71
commit 8683823536
18 changed files with 74 additions and 7064 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -19,13 +19,11 @@ include sphinx/locale/.tx/config
 recursive-include sphinx/templates *
 recursive-include sphinx/texinputs *
 recursive-include sphinx/themes *
 recursive-include sphinx/pycode/pgen2 *.c *.pyx
 recursive-include sphinx/locale *.js *.pot *.po *.mo
 recursive-include sphinx/search/non-minified-js *.js
 recursive-include sphinx/ext/autosummary/templates *
 recursive-include tests *
 recursive-include utils *
 include sphinx/pycode/Grammar-py*
 recursive-include doc *
 prune doc/_build
--- a/sphinx/directives/code.py
+++ b/sphinx/directives/code.py
@@ -256,7 +256,7 @@ class LiteralIncludeReader(object):
            else:
                start = tags[pyobject][1]
                end = tags[pyobject][2]
-                lines = lines[start - 1:end - 1]
+                lines = lines[start - 1:end]
                if 'lineno-match' in self.options:
                    self.lineno_start = start
--- a/sphinx/pycode/Grammar-py2.txt
+++ b/sphinx/pycode/Grammar-py2.txt
@@ -1,135 +0,0 @@
 # Grammar for Python 2.x
 # IMPORTANT: when copying over a new Grammar file, make sure file_input
 # is the first nonterminal in the file!
 # Start symbols for the grammar:
 #       single_input is a single interactive statement;
 #       file_input is a module or sequence of commands read from an input file;
 #       eval_input is the input for the eval() and input() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
 file_input: (NEWLINE | stmt)* ENDMARKER
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
 eval_input: testlist NEWLINE* ENDMARKER
 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef)
 funcdef: 'def' NAME parameters ':' suite
 parameters: '(' [varargslist] ')'
 varargslist: ((fpdef ['=' test] ',')*
              ('*' NAME [',' '**' NAME] | '**' NAME) |
              fpdef ['=' test] (',' fpdef ['=' test])* [','])
 fpdef: NAME | '(' fplist ')'
 fplist: fpdef (',' fpdef)* [',']
 stmt: simple_stmt | compound_stmt
 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
 small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
             import_stmt | global_stmt | exec_stmt | assert_stmt)
 expr_stmt: testlist (augassign (yield_expr|testlist) |
                     ('=' (yield_expr|testlist))*)
 augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
            '<<=' | '>>=' | '**=' | '//=')
 # For normal assignments, additional restrictions enforced by the interpreter
 print_stmt: 'print' ( [ test (',' test)* [','] ] |
                      '>>' test [ (',' test)+ [','] ] )
 del_stmt: 'del' exprlist
 pass_stmt: 'pass'
 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
 break_stmt: 'break'
 continue_stmt: 'continue'
 return_stmt: 'return' [testlist]
 yield_stmt: yield_expr
 raise_stmt: 'raise' [test [',' test [',' test]]]
 import_stmt: import_name | import_from
 import_name: 'import' dotted_as_names
 import_from: ('from' ('.'* dotted_name | '.'+)
              'import' ('*' | '(' import_as_names ')' | import_as_names))
 import_as_name: NAME ['as' NAME]
 dotted_as_name: dotted_name ['as' NAME]
 import_as_names: import_as_name (',' import_as_name)* [',']
 dotted_as_names: dotted_as_name (',' dotted_as_name)*
 dotted_name: NAME ('.' NAME)*
 global_stmt: 'global' NAME (',' NAME)*
 exec_stmt: 'exec' expr ['in' test [',' test]]
 assert_stmt: 'assert' test [',' test]
 compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
 if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
 while_stmt: 'while' test ':' suite ['else' ':' suite]
 for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
 try_stmt: ('try' ':' suite
           ((except_clause ':' suite)+
            ['else' ':' suite]
            ['finally' ':' suite] |
           'finally' ':' suite))
 with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test [('as' | ',') test]]
 suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
 # Backward compatibility cruft to support:
 # [ x for x in lambda: True, lambda: False if x() ]
 # even while also allowing:
 # lambda x: 5 if x else 2
 # (But not a mix of the two)
 testlist_safe: old_test [(',' old_test)+ [',']]
 old_test: or_test | old_lambdef
 old_lambdef: 'lambda' [varargslist] ':' old_test
 test: or_test ['if' or_test 'else' test] | lambdef
 or_test: and_test ('or' and_test)*
 and_test: not_test ('and' not_test)*
 not_test: 'not' not_test | comparison
 comparison: expr (comp_op expr)*
 comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
 expr: xor_expr ('|' xor_expr)*
 xor_expr: and_expr ('^' and_expr)*
 and_expr: shift_expr ('&' shift_expr)*
 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
 arith_expr: term (('+'|'-') term)*
 term: factor (('*'|'/'|'%'|'//') factor)*
 factor: ('+'|'-'|'~') factor | power
 power: atom trailer* ['**' factor]
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [listmaker] ']' |
       '{' [dictorsetmaker] '}' |
       '`' testlist1 '`' |
       NAME | NUMBER | STRING+)
 listmaker: test ( list_for | (',' test)* [','] )
 testlist_comp: test ( comp_for | (',' test)* [','] )
 lambdef: 'lambda' [varargslist] ':' test
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
 sliceop: ':' [test]
 exprlist: expr (',' expr)* [',']
 testlist: test (',' test)* [',']
 dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
                  (test (comp_for | (',' test)* [','])) )
 classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
 arglist: (argument ',')* (argument [',']
                         |'*' test (',' argument)* [',' '**' test]
                         |'**' test)
 # The reason that keywords are test nodes instead of NAME is that using NAME
 # results in an ambiguity. ast.c makes sure it's a NAME.
 argument: test [comp_for] | test '=' test
 list_iter: list_for | list_if
 list_for: 'for' exprlist 'in' testlist_safe [list_iter]
 list_if: 'if' old_test [list_iter]
 comp_iter: comp_for | comp_if
 comp_for: 'for' exprlist 'in' or_test [comp_iter]
 comp_if: 'if' old_test [comp_iter]
 testlist1: test (',' test)*
 # not used in grammar, but may appear in "node" passed from Parser to Compiler
 encoding_decl: NAME
 yield_expr: 'yield' [testlist]
--- a/sphinx/pycode/Grammar-py3.txt
+++ b/sphinx/pycode/Grammar-py3.txt
@@ -1,143 +0,0 @@
 # Grammar for Python 3.x (with at least x <= 5)
 # IMPORTANT: when copying over a new Grammar file, make sure file_input
 # is the first nonterminal in the file!
 # Start symbols for the grammar:
 #       single_input is a single interactive statement;
 #       file_input is a module or sequence of commands read from an input file;
 #       eval_input is the input for the eval() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
 file_input: (NEWLINE | stmt)* ENDMARKER
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
 eval_input: testlist NEWLINE* ENDMARKER
 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef | async_funcdef)
 async_funcdef: ASYNC funcdef
 funcdef: 'def' NAME parameters ['->' test] ':' suite
 parameters: '(' [typedargslist] ')'
 typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
       ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
     |  '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
 tfpdef: NAME [':' test]
 varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
       ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
     |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
 vfpdef: NAME
 stmt: simple_stmt | compound_stmt
 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
 small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
 expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
                     ('=' (yield_expr|testlist_star_expr))*)
 testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
 augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
            '<<=' | '>>=' | '**=' | '//=')
 # For normal assignments, additional restrictions enforced by the interpreter
 del_stmt: 'del' exprlist
 pass_stmt: 'pass'
 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
 break_stmt: 'break'
 continue_stmt: 'continue'
 return_stmt: 'return' [testlist]
 yield_stmt: yield_expr
 raise_stmt: 'raise' [test ['from' test]]
 import_stmt: import_name | import_from
 import_name: 'import' dotted_as_names
 # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
 import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
              'import' ('*' | '(' import_as_names ')' | import_as_names))
 import_as_name: NAME ['as' NAME]
 dotted_as_name: dotted_name ['as' NAME]
 import_as_names: import_as_name (',' import_as_name)* [',']
 dotted_as_names: dotted_as_name (',' dotted_as_name)*
 dotted_name: NAME ('.' NAME)*
 global_stmt: 'global' NAME (',' NAME)*
 nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
 assert_stmt: 'assert' test [',' test]
 compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
 async_stmt: ASYNC (funcdef | with_stmt | for_stmt)
 if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
 while_stmt: 'while' test ':' suite ['else' ':' suite]
 for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
 try_stmt: ('try' ':' suite
           ((except_clause ':' suite)+
            ['else' ':' suite]
            ['finally' ':' suite] |
           'finally' ':' suite))
 with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test ['as' NAME]]
 suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
 test: or_test ['if' or_test 'else' test] | lambdef
 test_nocond: or_test | lambdef_nocond
 lambdef: 'lambda' [varargslist] ':' test
 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
 or_test: and_test ('or' and_test)*
 and_test: not_test ('and' not_test)*
 not_test: 'not' not_test | comparison
 comparison: expr (comp_op expr)*
 # <> isn't actually a valid comparison operator in Python. It's here for the
 # sake of a __future__ import described in PEP 401 (which really works :-)
 comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
 star_expr: '*' expr
 expr: xor_expr ('|' xor_expr)*
 xor_expr: and_expr ('^' and_expr)*
 and_expr: shift_expr ('&' shift_expr)*
 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
 arith_expr: term (('+'|'-') term)*
 term: factor (('*'|'@'|'/'|'%'|'//') factor)*
 factor: ('+'|'-'|'~') factor | power
 power: [AWAIT] atom trailer* ['**' factor]
 atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [testlist_comp] ']' |
       '{' [dictorsetmaker] '}' |
       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
 testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
 subscript: test | [test] ':' [test] [sliceop]
 sliceop: ':' [test]
 exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
 testlist: test (',' test)* [',']
 dictorsetmaker: ( ((test ':' test | '**' expr)
                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
                  ((test | star_expr)
                   (comp_for | (',' (test | star_expr))* [','])) )
 classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
 arglist: argument (',' argument)*  [',']
 # The reason that keywords are test nodes instead of NAME is that using NAME
 # results in an ambiguity. ast.c makes sure it's a NAME.
 # "test '=' test" is really "keyword '=' test", but we have no such token.
 # These need to be in a single rule to avoid grammar that is ambiguous
 # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
 # we explicitly match '*' here, too, to give it proper precedence.
 # Illegal combinations and orderings are blocked in ast.c:
 # multiple (test comp_for) arguements are blocked; keyword unpackings
 # that precede iterable unpackings are blocked; etc.
 argument: ( test [comp_for] |
            test '=' test |
            '**' test |
            '*' test )
 comp_iter: comp_for | comp_if
 comp_for: 'for' exprlist 'in' or_test [comp_iter]
 comp_if: 'if' test_nocond [comp_iter]
 # not used in grammar, but may appear in "node" passed from Parser to Compiler
 encoding_decl: NAME
 yield_expr: 'yield' [yield_arg]
 yield_arg: 'from' test | testlist
--- a/sphinx/pycode/init.py
+++ b/sphinx/pycode/init.py
@@ -11,175 +11,20 @@
 from __future__ import print_function
 import re
 import sys
 from os import path
-from six import iteritems, text_type, BytesIO, StringIO
+from six import iteritems, BytesIO, StringIO
 from sphinx import package_dir
 from sphinx.errors import PycodeError
-from sphinx.pycode import nodes
+from sphinx.pycode.parser import Parser
 from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
 from sphinx.util import get_module_source, detect_encoding
 from sphinx.util.pycompat import TextIOWrapper
 from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
 if False:
    # For type annotation
-    from typing import Any, Dict, List, Tuple  # NOQA
+    from typing import Any, Dict, IO, List, Tuple  # NOQA
 # load the Python grammar
 _grammarfile = path.join(package_dir, 'pycode',
                         'Grammar-py%d.txt' % sys.version_info[0])
 pygrammar = driver.load_grammar(_grammarfile)
 pydriver = driver.Driver(pygrammar, convert=nodes.convert)
 # an object with attributes corresponding to token and symbol names
 class sym(object):
    pass
 for k, v in iteritems(pygrammar.symbol2number):
    setattr(sym, k, v)
 for k, v in iteritems(token.tok_name):
    setattr(sym, v, k)
 # a dict mapping terminal and nonterminal numbers to their names
 number2name = pygrammar.number2symbol.copy()
 number2name.update(token.tok_name)
 _eq = nodes.Leaf(token.EQUAL, '=')
 emptyline_re = re.compile(r'^\s*(#.*)?$')
 class AttrDocVisitor(nodes.NodeVisitor):
    """
    Visitor that collects docstrings for attribute assignments on toplevel and
    in classes (class attributes and attributes set in __init__).
    The docstrings can either be in special '#:' comments before the assignment
    or in a docstring after it.
    """
    def init(self, scope, encoding):
        self.scope = scope
        self.in_init = 0
        self.encoding = encoding
        self.namespace = []  # type: List[unicode]
        self.collected = {}  # type: Dict[Tuple[unicode, unicode], unicode]
        self.tagnumber = 0
        self.tagorder = {}   # type: Dict[unicode, int]
    def add_tag(self, name):
        name = '.'.join(self.namespace + [name])
        self.tagorder[name] = self.tagnumber
        self.tagnumber += 1
    def visit_classdef(self, node):
        """Visit a class."""
        self.add_tag(node[1].value)
        self.namespace.append(node[1].value)
        self.generic_visit(node)
        self.namespace.pop()
    def visit_funcdef(self, node):
        """Visit a function (or method)."""
        # usually, don't descend into functions -- nothing interesting there
        self.add_tag(node[1].value)
        if node[1].value == '__init__':
            # however, collect attributes set in __init__ methods
            self.in_init += 1
            self.generic_visit(node)
            self.in_init -= 1
    def visit_expr_stmt(self, node):
        """Visit an assignment which may have a special comment before (or
        after) it.
        """
        if _eq not in node.children:
            # not an assignment (we don't care for augmented assignments)
            return
        # look *after* the node; there may be a comment prefixing the NEWLINE
        # of the simple_stmt
        parent = node.parent
        idx = parent.children.index(node) + 1
        while idx < len(parent):
            if parent[idx].type == sym.SEMI:  # type: ignore
                idx += 1
                continue  # skip over semicolon
            if parent[idx].type == sym.NEWLINE:  # type: ignore
                prefix = parent[idx].get_prefix()
                if not isinstance(prefix, text_type):
                    prefix = prefix.decode(self.encoding)
                docstring = prepare_commentdoc(prefix)
                if docstring:
                    self.add_docstring(node, docstring)
                    return  # don't allow docstrings both before and after
            break
        # now look *before* the node
        pnode = node[0]
        prefix = pnode.get_prefix()
        # if the assignment is the first statement on a new indentation
        # level, its preceding whitespace and comments are not assigned
        # to that token, but the first INDENT or DEDENT token
        while not prefix:
            pnode = pnode.get_prev_leaf()
            if not pnode or pnode.type not in (token.INDENT, token.DEDENT):
                break
            prefix = pnode.get_prefix()
        if not isinstance(prefix, text_type):
            prefix = prefix.decode(self.encoding)
        docstring = prepare_commentdoc(prefix)
        self.add_docstring(node, docstring)
    def visit_simple_stmt(self, node):
        """Visit a docstring statement which may have an assignment before."""
        if node[0].type != token.STRING:
            # not a docstring; but still need to visit children
            return self.generic_visit(node)
        prev = node.get_prev_sibling()
        if not prev:
            return
        if (prev.type == sym.simple_stmt and  # type: ignore
           prev[0].type == sym.expr_stmt and _eq in prev[0].children):  # type: ignore
            # need to "eval" the string because it's returned in its
            # original form
            docstring = literals.evalString(node[0].value, self.encoding)
            docstring = prepare_docstring(docstring)
            self.add_docstring(prev[0], docstring)
    def add_docstring(self, node, docstring):
        # add an item for each assignment target
        for i in range(0, len(node) - 1, 2):
            target = node[i]
            if self.in_init and self.number2name[target.type] == 'power':
                # maybe an attribute assignment -- check necessary conditions
                if (  # node must have two children
                        len(target) != 2 or
                        # first child must be "self"
                        target[0].type != token.NAME or target[0].value != 'self' or
                        # second child must be a "trailer" with two children
                        self.number2name[target[1].type] != 'trailer' or
                        len(target[1]) != 2 or
                        # first child must be a dot, second child a name
                        target[1][0].type != token.DOT or
                        target[1][1].type != token.NAME):
                    continue
                name = target[1][1].value
            elif target.type != token.NAME:
                # don't care about other complex targets
                continue
            else:
                name = target.value
            self.add_tag(name)
            if docstring:
                namespace = '.'.join(self.namespace)
                if namespace.startswith(self.scope):
                    self.collected[namespace, name] = docstring
 class ModuleAnalyzer(object):
    # cache for analyzer objects -- caches both by module and file name
    cache = {}  # type: Dict[Tuple[unicode, unicode], Any]
@@ -223,137 +68,59 @@ class ModuleAnalyzer(object):
        return obj
    def __init__(self, source, modname, srcname, decoded=False):
-        # name of the module
+        # type: (IO, unicode, unicode, bool) -> None
-        self.modname = modname
+        self.modname = modname  # name of the module
-        # name of the source file
+        self.srcname = srcname  # name of the source file
        self.srcname = srcname
        # file-like object yielding source lines
        self.source = source
        # cache the source code as well
-        pos = self.source.tell()
+        pos = source.tell()
        if not decoded:
-            self.encoding = detect_encoding(self.source.readline)
+            self.encoding = detect_encoding(source.readline)
-            self.source.seek(pos)
+            source.seek(pos)
-            self.code = self.source.read().decode(self.encoding)
+            self.code = source.read().decode(self.encoding)
            self.source.seek(pos)
            self.source = TextIOWrapper(self.source, self.encoding)
        else:
            self.encoding = None
-            self.code = self.source.read()
+            self.code = source.read()
            self.source.seek(pos)
        # will be filled by tokenize()
        self.tokens = None      # type: List[unicode]
        # will be filled by parse()
-        self.parsetree = None   # type: Any
+        self.attr_docs = None   # type: Dict[Tuple[unicode, unicode], List[unicode]]
        # will be filled by find_attr_docs()
        self.attr_docs = None   # type: List[unicode]
        self.tagorder = None    # type: Dict[unicode, int]
-        # will be filled by find_tags()
+        self.tags = None        # type: Dict[unicode, Tuple[unicode, int, int]]
        self.tags = None        # type: List[unicode]
    def tokenize(self):
        """Generate tokens from the source."""
        if self.tokens is not None:
            return
        try:
            self.tokens = list(tokenize.generate_tokens(self.source.readline))
        except tokenize.TokenError as err:
            raise PycodeError('tokenizing failed', err)
        self.source.close()
    def parse(self):
-        """Parse the generated source tokens."""
+        # type: () -> None
-        if self.parsetree is not None:
+        """Parse the source code."""
            return
        self.tokenize()
        try:
-            self.parsetree = pydriver.parse_tokens(self.tokens)
+            parser = Parser(self.code, self.encoding)
-        except parse.ParseError as err:
+            parser.parse()
            raise PycodeError('parsing failed', err)
-    def find_attr_docs(self, scope=''):
+            self.attr_docs = {}
            for (scope, comment) in iteritems(parser.comments):
                if comment:
                    self.attr_docs[scope] = comment.splitlines() + ['']
                else:
                    self.attr_docs[scope] = ['']
            self.tags = parser.definitions
            self.tagorder = parser.deforders
        except Exception as exc:
            raise PycodeError('parsing failed: %r' % exc)
    def find_attr_docs(self):
        # type: () -> Dict[Tuple[unicode, unicode], List[unicode]]
        """Find class and module-level attributes and their documentation."""
-        if self.attr_docs is not None:
+        if self.attr_docs is None:
-            return self.attr_docs
+            self.parse()
-        self.parse()
+
-        attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)
+        return self.attr_docs
        attr_visitor.visit(self.parsetree)
        self.attr_docs = attr_visitor.collected
        self.tagorder = attr_visitor.tagorder
        # now that we found everything we could in the tree, throw it away
        # (it takes quite a bit of memory for large modules)
        self.parsetree = None
        return attr_visitor.collected
    def find_tags(self):
        # type: () -> Dict[unicode, Tuple[unicode, int, int]]
        """Find class, function and method definitions and their location."""
-        if self.tags is not None:
+        if self.tags is None:
-            return self.tags
+            self.parse()
        self.tokenize()
        result = {}
        namespace = []  # type: List[unicode]
        stack = []      # type: List[Tuple[unicode, unicode, unicode, int]]
        indent = 0
        decopos = None
        defline = False
        expect_indent = False
        emptylines = 0
-        def tokeniter(ignore = (token.COMMENT,)):
+        return self.tags
            for tokentup in self.tokens:
                if tokentup[0] not in ignore:
                    yield tokentup
        tokeniter = tokeniter()
        for type, tok, spos, epos, line in tokeniter:  # type: ignore
            if expect_indent and type != token.NL:
                if type != token.INDENT:
                    # no suite -- one-line definition
                    assert stack
                    dtype, fullname, startline, _ = stack.pop()
                    endline = epos[0]
                    namespace.pop()
                    result[fullname] = (dtype, startline, endline - emptylines)
                expect_indent = False
            if tok in ('def', 'class'):
                name = next(tokeniter)[1]  # type: ignore
                namespace.append(name)
                fullname = '.'.join(namespace)
                stack.append((tok, fullname, decopos or spos[0], indent))
                defline = True
                decopos = None
            elif type == token.OP and tok == '@':
                if decopos is None:
                    decopos = spos[0]
            elif type == token.INDENT:
                expect_indent = False
                indent += 1
            elif type == token.DEDENT:
                indent -= 1
                # if the stacklevel is the same as it was before the last
                # def/class block, this dedent closes that block
                if stack and indent == stack[-1][3]:
                    dtype, fullname, startline, _ = stack.pop()
                    endline = spos[0]
                    namespace.pop()
                    result[fullname] = (dtype, startline, endline - emptylines)
            elif type == token.NEWLINE:
                # if this line contained a definition, expect an INDENT
                # to start the suite; if there is no such INDENT
                # it's a one-line definition
                if defline:
                    defline = False
                    expect_indent = True
                emptylines = 0
            elif type == token.NL:
                # count up if line is empty or comment only
                if emptyline_re.match(line):
                    emptylines += 1
                else:
                    emptylines = 0
        self.tags = result
        return result
 if __name__ == '__main__':
--- a/sphinx/pycode/nodes.py
+++ b/sphinx/pycode/nodes.py
@@ -1,212 +0,0 @@
 # -*- coding: utf-8 -*-
 """
    sphinx.pycode.nodes
    ~~~~~~~~~~~~~~~~~~~
    Parse tree node implementations.
    :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
    :license: BSD, see LICENSE for details.
 """
 if False:
    # For type annotation
    from typing import Callable  # NOQA
 class BaseNode(object):
    """
    Node superclass for both terminal and nonterminal nodes.
    """
    parent = None  # type: BaseNode
    def _eq(self, other):
        raise NotImplementedError
    def __eq__(self, other):
        if self.__class__ is not other.__class__:
            return NotImplemented
        return self._eq(other)
    def __ne__(self, other):
        if self.__class__ is not other.__class__:
            return NotImplemented
        return not self._eq(other)
    __hash__ = None  # type: Callable[[object], int]
    def get_prev_sibling(self):
        """Return previous child in parent's children, or None."""
        if self.parent is None:
            return None
        for i, child in enumerate(self.parent.children):
            if child is self:
                if i == 0:
                    return None
                return self.parent.children[i - 1]
    def get_next_sibling(self):
        """Return next child in parent's children, or None."""
        if self.parent is None:
            return None
        for i, child in enumerate(self.parent.children):
            if child is self:
                try:
                    return self.parent.children[i + 1]
                except IndexError:
                    return None
    def get_prev_leaf(self):
        """Return the leaf node that precedes this node in the parse tree."""
        def last_child(node):
            if isinstance(node, Leaf):
                return node
            elif not node.children:
                return None
            else:
                return last_child(node.children[-1])
        if self.parent is None:
            return None
        prev = self.get_prev_sibling()
        if isinstance(prev, Leaf):
            return prev
        elif prev is not None:
            return last_child(prev)
        return self.parent.get_prev_leaf()
    def get_next_leaf(self):
        """Return self if leaf, otherwise the leaf node that succeeds this
        node in the parse tree.
        """
        node = self
        while not isinstance(node, Leaf):
            assert node.children
            node = node.children[0]
        return node
    def get_lineno(self):
        """Return the line number which generated the invocant node."""
        return self.get_next_leaf().lineno
    def get_prefix(self):
        """Return the prefix of the next leaf node."""
        # only leaves carry a prefix
        return self.get_next_leaf().prefix
 class Node(BaseNode):
    """
    Node implementation for nonterminals.
    """
    def __init__(self, type, children, context=None):
        # type of nonterminals is >= 256
        # assert type >= 256, type
        self.type = type
        self.children = list(children)
        for ch in self.children:
            # assert ch.parent is None, repr(ch)
            ch.parent = self
    def __repr__(self):
        return '%s(%s, %r)' % (self.__class__.__name__,
                               self.type, self.children)
    def __str__(self):
        """This reproduces the input source exactly."""
        return ''.join(map(str, self.children))
    def _eq(self, other):
        return (self.type, self.children) == (other.type, other.children)
    # support indexing the node directly instead of .children
    def __getitem__(self, index):
        return self.children[index]
    def __iter__(self):
        return iter(self.children)
    def __len__(self):
        return len(self.children)
 class Leaf(BaseNode):
    """
    Node implementation for leaf nodes (terminals).
    """
    prefix = ''  # Whitespace and comments preceding this token in the input
    lineno = 0   # Line where this token starts in the input
    column = 0   # Column where this token tarts in the input
    def __init__(self, type, value, context=None):
        # type of terminals is below 256
        # assert 0 <= type < 256, type
        self.type = type
        self.value = value
        if context is not None:
            self.prefix, (self.lineno, self.column) = context
    def __repr__(self):
        return '%s(%r, %r, %r)' % (self.__class__.__name__,
                                   self.type, self.value, self.prefix)
    def __str__(self):
        """This reproduces the input source exactly."""
        return self.prefix + str(self.value)
    def _eq(self, other):
        """Compares two nodes for equality."""
        return (self.type, self.value) == (other.type, other.value)
 def convert(grammar, raw_node):
    """Convert raw node to a Node or Leaf instance."""
    type, value, context, children = raw_node
    if children or type in grammar.number2symbol:
        # If there's exactly one child, return that child instead of
        # creating a new node.
        if len(children) == 1:
            return children[0]
        return Node(type, children, context=context)
    else:
        return Leaf(type, value, context=context)
 def nice_repr(node, number2name, prefix=False):
    def _repr(node):
        if isinstance(node, Leaf):
            return "%s(%r)" % (number2name[node.type], node.value)
        else:
            return "%s(%s)" % (number2name[node.type],
                               ', '.join(map(_repr, node.children)))
    def _prepr(node):
        if isinstance(node, Leaf):
            return "%s(%r, %r)" % (number2name[node.type],
                                   node.prefix, node.value)
        else:
            return "%s(%s)" % (number2name[node.type],
                               ', '.join(map(_prepr, node.children)))
    return (prefix and _prepr or _repr)(node)
 class NodeVisitor(object):
    def __init__(self, number2name, *args):
        self.number2name = number2name
        self.init(*args)
    def init(self, *args):
        pass
    def visit(self, node):
        """Visit a node."""
        method = 'visit_' + self.number2name[node.type]
        visitor = getattr(self, method, self.generic_visit)
        return visitor(node)
    def generic_visit(self, node):
        """Called if no explicit visitor function exists for a node."""
        if isinstance(node, Node):
            for child in node:  # type: ignore
                self.visit(child)
--- a/sphinx/pycode/pgen2/init.py
+++ b/sphinx/pycode/pgen2/init.py
@@ -1,4 +0,0 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 """The pgen2 package."""
--- a/sphinx/pycode/pgen2/driver.py
+++ b/sphinx/pycode/pgen2/driver.py
@@ -1,154 +0,0 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Modifications:
 # Copyright 2006 Google, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 """Parser driver.
 This provides a high-level interface to parse a file into a syntax tree.
 """
 __author__ = "Guido van Rossum <guido@python.org>"
 __all__ = ["Driver", "load_grammar"]
 # Python imports
 import os
 import logging
 import sphinx
 # Pgen imports
 from sphinx.pycode.pgen2 import grammar, parse, token, tokenize, pgen
 class Driver(object):
    def __init__(self, grammar, convert=None, logger=None):
        self.grammar = grammar
        if logger is None:
            logger = logging.getLogger()
        self.logger = logger
        self.convert = convert
    def parse_tokens(self, tokens, debug=False):
        """Parse a series of tokens and return the syntax tree."""
        # X X X Move the prefix computation into a wrapper around tokenize.
        p = parse.Parser(self.grammar, self.convert)
        p.setup()
        lineno = 1
        column = 0
        type = value = start = end = line_text = None
        prefix = ""
        opmap = grammar.opmap
        for type, value, start, end, line_text in tokens:
            if start != (lineno, column):
                assert (lineno, column) <= start, ((lineno, column), start)
                s_lineno, s_column = start
                if lineno < s_lineno:
                    prefix += "\n" * (s_lineno - lineno)
                    lineno = s_lineno
                    column = 0
                if column < s_column:
                    prefix += line_text[column:s_column]
                    column = s_column
            if type in (tokenize.COMMENT, tokenize.NL):
                prefix += value
                lineno, column = end
                if value.endswith("\n"):
                    lineno += 1
                    column = 0
                continue
            if type == token.OP:
                type = opmap[value]
            # if debug:
            #     self.logger.debug("%s %r (prefix=%r)",
            #                       token.tok_name[type], value, prefix)
            if p.addtoken(type, value, (prefix, start)):
                # if debug:
                #     self.logger.debug("Stop.")
                break
            prefix = ""
            lineno, column = end
            if value.endswith("\n"):
                lineno += 1
                column = 0
        else:
            # We never broke out -- EOF is too soon (how can this happen???)
            raise parse.ParseError("incomplete input", type, value, line_text)
        return p.rootnode
    def parse_stream_raw(self, stream, debug=False):
        """Parse a stream and return the syntax tree."""
        tokens = tokenize.generate_tokens(stream.readline)
        return self.parse_tokens(tokens, debug)
    def parse_stream(self, stream, debug=False):
        """Parse a stream and return the syntax tree."""
        return self.parse_stream_raw(stream, debug)
    def parse_file(self, filename, debug=False):
        """Parse a file and return the syntax tree."""
        with open(filename) as stream:
            return self.parse_stream(stream, debug)
    def parse_string(self, text, debug=False):
        """Parse a string and return the syntax tree."""
        tokens = tokenize.generate_tokens(generate_lines(text).next)
        return self.parse_tokens(tokens, debug)
 def generate_lines(text):
    """Generator that behaves like readline without using StringIO."""
    for line in text.splitlines(True):
        yield line
    while True:
        yield ""
 def get_compiled_path(filename):
    head, tail = os.path.splitext(filename)
    if tail == ".txt":
        tail = ""
    return "%s%s.pickle" % (head, tail)
 def compile_grammar(gt='Grammar.txt', logger=None):
    """Compile the grammer."""
    if logger is None:
        logger = logging.getLogger()
    logger.info("Generating grammar tables from %s", gt)
    g = pgen.generate_grammar(gt)
    gp = get_compiled_path(gt)
    logger.info("Writing grammar tables to %s", gp)
    try:
        g.dump(gp)
    except IOError as e:
        logger.info("Writing failed:"+str(e))
 def load_grammar(gt="Grammar.txt", logger=None):
    """Load the grammar (maybe from a pickle)."""
    if logger is None:
        logger = logging.getLogger()
    gp = get_compiled_path(gt)
    if not os.path.exists(gp):
        logger.info("Generating grammar tables from %s", gt)
        g = pgen.generate_grammar(gt)
    else:
        g = grammar.Grammar()
        g.load(gp)
    return g
 def _newer(a, b):
    """Inquire whether file a was written since file b."""
    if not os.path.exists(a):
        return False
    if not os.path.exists(b):
        return True
    return os.path.getmtime(a) >= os.path.getmtime(b)
--- a/sphinx/pycode/pgen2/grammar.py
+++ b/sphinx/pycode/pgen2/grammar.py
@@ -1,177 +0,0 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 """This module defines the data structures used to represent a grammar.
 These are a bit arcane because they are derived from the data
 structures used by Python's 'pgen' parser generator.
 There's also a table here mapping operators to their names in the
 token module; the Python tokenize module reports all operators as the
 fallback token code OP, but the parser needs the actual token code.
 """
 from __future__ import print_function
 # Python imports
 import pickle
 # Local imports
 from sphinx.pycode.pgen2 import token
 if False:
    # For type annotation
    from typing import Dict, List, Tuple  # NOQA
 class Grammar(object):
    """Pgen parsing tables tables conversion class.
    Once initialized, this class supplies the grammar tables for the
    parsing engine implemented by parse.py.  The parsing engine
    accesses the instance variables directly.  The class here does not
    provide initialization of the tables; several subclasses exist to
    do this (see the conv and pgen modules).
    The load() method reads the tables from a pickle file, which is
    much faster than the other ways offered by subclasses.  The pickle
    file is written by calling dump() (after loading the grammar
    tables using a subclass).  The report() method prints a readable
    representation of the tables to stdout, for debugging.
    The instance variables are as follows:
    symbol2number -- a dict mapping symbol names to numbers.  Symbol
                     numbers are always 256 or higher, to distinguish
                     them from token numbers, which are between 0 and
                     255 (inclusive).
    number2symbol -- a dict mapping numbers to symbol names;
                     these two are each other's inverse.
    states        -- a list of DFAs, where each DFA is a list of
                     states, each state is is a list of arcs, and each
                     arc is a (i, j) pair where i is a label and j is
                     a state number.  The DFA number is the index into
                     this list.  (This name is slightly confusing.)
                     Final states are represented by a special arc of
                     the form (0, j) where j is its own state number.
    dfas          -- a dict mapping symbol numbers to (DFA, first)
                     pairs, where DFA is an item from the states list
                     above, and first is a set of tokens that can
                     begin this grammar rule (represented by a dict
                     whose values are always 1).
    labels        -- a list of (x, y) pairs where x is either a token
                     number or a symbol number, and y is either None
                     or a string; the strings are keywords.  The label
                     number is the index in this list; label numbers
                     are used to mark state transitions (arcs) in the
                     DFAs.
    start         -- the number of the grammar's start symbol.
    keywords      -- a dict mapping keyword strings to arc labels.
    tokens        -- a dict mapping token numbers to arc labels.
    """
    def __init__(self):
        self.symbol2number = {}         # type: Dict[unicode, int]
        self.number2symbol = {}         # type: Dict[int, unicode]
        self.states = []                # type: List[List[List[Tuple[int, int]]]]
        self.dfas = {}                  # type: Dict[int, Tuple[List[List[Tuple[int, int]]], unicode]]
        self.labels = [(0, "EMPTY")]
        self.keywords = {}              # type: Dict[unicode, unicode]
        self.tokens = {}                # type: Dict[unicode, unicode]
        self.symbol2label = {}          # type: Dict[unicode, unicode]
        self.start = 256
    def dump(self, filename):
        """Dump the grammar tables to a pickle file."""
        with open(filename, "wb") as f:
            pickle.dump(self.__dict__, f, 2)
    def load(self, filename):
        """Load the grammar tables from a pickle file."""
        f = open(filename, "rb")
        d = pickle.load(f)
        f.close()
        self.__dict__.update(d)
    def report(self):
        """Dump the grammar tables to standard output, for debugging."""
        from pprint import pprint
        print("s2n")
        pprint(self.symbol2number)
        print("n2s")
        pprint(self.number2symbol)
        print("states")
        pprint(self.states)
        print("dfas")
        pprint(self.dfas)
        print("labels")
        pprint(self.labels)
        print("start", self.start)
 # Map from operator to number (since tokenize doesn't do this)
 opmap_raw = """
 ( LPAR
 ) RPAR
 [ LSQB
 ] RSQB
 : COLON
 , COMMA
 ; SEMI
 + PLUS
 - MINUS
 * STAR
 / SLASH
 | VBAR
 & AMPER
 < LESS
 > GREATER
 = EQUAL
 . DOT
 % PERCENT
 ` BACKQUOTE
 { LBRACE
 } RBRACE
@ AT
@= ATEQUAL
 == EQEQUAL
 != NOTEQUAL
 <> NOTEQUAL
 <= LESSEQUAL
 >= GREATEREQUAL
 ~ TILDE
 ^ CIRCUMFLEX
 << LEFTSHIFT
 >> RIGHTSHIFT
 ** DOUBLESTAR
 += PLUSEQUAL
 -= MINEQUAL
 *= STAREQUAL
 /= SLASHEQUAL
 %= PERCENTEQUAL
 &= AMPEREQUAL
 |= VBAREQUAL
 ^= CIRCUMFLEXEQUAL
 <<= LEFTSHIFTEQUAL
 >>= RIGHTSHIFTEQUAL
 **= DOUBLESTAREQUAL
 // DOUBLESLASH
 //= DOUBLESLASHEQUAL
 -> RARROW
 ... ELLIPSIS
 """
 opmap = {}
 for line in opmap_raw.splitlines():
    if line:
        op, name = line.split()
        opmap[op] = getattr(token, name)
--- a/sphinx/pycode/pgen2/literals.py
+++ b/sphinx/pycode/pgen2/literals.py
@@ -1,100 +0,0 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Extended to handle raw and unicode literals by Georg Brandl.
 """Safely evaluate Python string literals without using eval()."""
 from __future__ import print_function
 import re
 from six import text_type
 simple_escapes = {"a": "\a",
                  "b": "\b",
                  "f": "\f",
                  "n": "\n",
                  "r": "\r",
                  "t": "\t",
                  "v": "\v",
                  "'": "'",
                  '"': '"',
                  "\\": "\\"}
 def convert_hex(x, n):
    if len(x) < n+1:
        raise ValueError("invalid hex string escape ('\\%s')" % x)
    try:
        return int(x[1:], 16)
    except ValueError:
        raise ValueError("invalid hex string escape ('\\%s')" % x)
 def escape(m):
    all, tail = m.group(0, 1)
    assert all.startswith("\\")
    esc = simple_escapes.get(tail)
    if esc is not None:
        return esc
    elif tail.startswith("x"):
        return chr(convert_hex(tail, 2))
    elif tail.startswith('u'):
        return unichr(convert_hex(tail, 4))
    elif tail.startswith('U'):
        return unichr(convert_hex(tail, 8))
    elif tail.startswith('N'):
        import unicodedata
        try:
            return unicodedata.lookup(tail[1:-1])
        except KeyError:
            raise ValueError("undefined character name %r" % tail[1:-1])
    else:
        try:
            return chr(int(tail, 8))
        except ValueError:
            raise ValueError("invalid octal string escape ('\\%s')" % tail)
 def escaperaw(m):
    all, tail = m.group(0, 1)
    if tail.startswith('u'):
        return unichr(convert_hex(tail, 4))
    elif tail.startswith('U'):
        return unichr(convert_hex(tail, 8))
    else:
        return all
 escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})")
 uni_escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3}|"
                           r"u[0-9a-fA-F]{0,4}|U[0-9a-fA-F]{0,8}|N\{.+?\})")
 def evalString(s, encoding=None):
    regex = escape_re
    repl = escape
    if encoding and not isinstance(s, text_type):
        s = s.decode(encoding)
    if s.startswith('u') or s.startswith('U'):
        regex = uni_escape_re
        s = s[1:]
    if s.startswith('r') or s.startswith('R'):
        repl = escaperaw
        s = s[1:]
    assert s.startswith("'") or s.startswith('"'), repr(s[:1])
    q = s[0]
    if s[:3] == q*3:
        q = q*3
    assert s.endswith(q), repr(s[-len(q):])
    assert len(s) >= 2*len(q)
    s = s[len(q):-len(q)]
    return regex.sub(repl, s)
 def test():
    for i in range(256):
        c = chr(i)
        s = repr(c)
        e = evalString(s)
        if e != c:
            print(i, c, s, e)
 if __name__ == "__main__":
    test()
--- a/sphinx/pycode/pgen2/parse.c
+++ b/sphinx/pycode/pgen2/parse.c
--- a/sphinx/pycode/pgen2/parse.py
+++ b/sphinx/pycode/pgen2/parse.py
@@ -1,206 +0,0 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 """Parser engine for the grammar tables generated by pgen.
 The grammar table must be loaded first.
 See Parser/parser.c in the Python distribution for additional info on
 how this parsing engine works.
 """
 # Local imports
 from sphinx.pycode.pgen2 import token
 if False:
    # For type annotation
    from typing import Any, List, Set, Tuple  # NOQA
 class ParseError(Exception):
    """Exception to signal the parser is stuck."""
    def __init__(self, msg, type, value, context):
        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
                           (msg, type, value, context))
        self.msg = msg
        self.type = type
        self.value = value
        self.context = context
 class Parser(object):
    """Parser engine.
    The proper usage sequence is:
    p = Parser(grammar, [converter])  # create instance
    p.setup([start])                  # prepare for parsing
    <for each input token>:
        if p.addtoken(...):           # parse a token; may raise ParseError
            break
    root = p.rootnode                 # root of abstract syntax tree
    A Parser instance may be reused by calling setup() repeatedly.
    A Parser instance contains state pertaining to the current token
    sequence, and should not be used concurrently by different threads
    to parse separate token sequences.
    See driver.py for how to get input tokens by tokenizing a file or
    string.
    Parsing is complete when addtoken() returns True; the root of the
    abstract syntax tree can then be retrieved from the rootnode
    instance variable.  When a syntax error occurs, addtoken() raises
    the ParseError exception.  There is no error recovery; the parser
    cannot be used after a syntax error was reported (but it can be
    reinitialized by calling setup()).
    """
    def __init__(self, grammar, convert=None):
        """Constructor.
        The grammar argument is a grammar.Grammar instance; see the
        grammar module for more information.
        The parser is not ready yet for parsing; you must call the
        setup() method to get it started.
        The optional convert argument is a function mapping concrete
        syntax tree nodes to abstract syntax tree nodes.  If not
        given, no conversion is done and the syntax tree produced is
        the concrete syntax tree.  If given, it must be a function of
        two arguments, the first being the grammar (a grammar.Grammar
        instance), and the second being the concrete syntax tree node
        to be converted.  The syntax tree is converted from the bottom
        up.
        A concrete syntax tree node is a (type, value, context, nodes)
        tuple, where type is the node type (a token or symbol number),
        value is None for symbols and a string for tokens, context is
        None or an opaque value used for error reporting (typically a
        (lineno, offset) pair), and nodes is a list of children for
        symbols, and None for tokens.
        An abstract syntax tree node may be anything; this is entirely
        up to the converter function.
        """
        self.grammar = grammar
        self.convert = convert or (lambda grammar, node: node)
    def setup(self, start=None):
        """Prepare for parsing.
        This *must* be called before starting to parse.
        The optional argument is an alternative start symbol; it
        defaults to the grammar's start symbol.
        You can use a Parser instance to parse any number of programs;
        each time you call setup() the parser is reset to an initial
        state determined by the (implicit or explicit) start symbol.
        """
        if start is None:
            start = self.grammar.start
        # Each stack entry is a tuple: (dfa, state, node).
        # A node is a tuple: (type, value, context, children),
        # where children is a list of nodes or None, and context may be None.
        newnode = (start, None, None, [])  # type: Tuple[unicode, unicode, unicode, List]
        stackentry = (self.grammar.dfas[start], 0, newnode)
        self.stack = [stackentry]
        self.rootnode = None        # type: Any
        self.used_names = set()     # type: Set[unicode]
                                    # Aliased to self.rootnode.used_names in pop()
    def addtoken(self, type, value, context):
        """Add a token; return True iff this is the end of the program."""
        # Map from token to label
        ilabel = self.classify(type, value, context)
        # Loop until the token is shifted; may raise exceptions
        while True:
            dfa, state, node = self.stack[-1]
            states, first = dfa
            arcs = states[state]
            # Look for a state with this label
            for i, newstate in arcs:
                t, v = self.grammar.labels[i]
                if ilabel == i:
                    # Look it up in the list of labels
                    assert t < 256
                    # Shift a token; we're done with it
                    self.shift(type, value, newstate, context)
                    # Pop while we are in an accept-only state
                    state = newstate
                    while states[state] == [(0, state)]:
                        self.pop()
                        if not self.stack:
                            # Done parsing!
                            return True
                        dfa, state, node = self.stack[-1]
                        states, first = dfa
                    # Done with this token
                    return False
                elif t >= 256:
                    # See if it's a symbol and if we're in its first set
                    itsdfa = self.grammar.dfas[t]
                    itsstates, itsfirst = itsdfa
                    if ilabel in itsfirst:
                        # Push a symbol
                        self.push(t, self.grammar.dfas[t], newstate, context)
                        break # To continue the outer while loop
            else:
                if (0, state) in arcs:
                    # An accepting state, pop it and try something else
                    self.pop()
                    if not self.stack:
                        # Done parsing, but another token is input
                        raise ParseError("too much input",
                                         type, value, context)
                else:
                    # No success finding a transition
                    raise ParseError("bad input", type, value, context)
    def classify(self, type, value, context):
        """Turn a token into a label.  (Internal)"""
        if type == token.NAME:
            # Keep a listing of all used names
            self.used_names.add(value)
            # Check for reserved words
            ilabel = self.grammar.keywords.get(value)
            if ilabel is not None:
                return ilabel
        ilabel = self.grammar.tokens.get(type)
        if ilabel is None:
            raise ParseError("bad token", type, value, context)
        return ilabel
    def shift(self, type, value, newstate, context):
        """Shift a token.  (Internal)"""
        dfa, state, node = self.stack[-1]
        newnode = (type, value, context, None)  # type: Tuple[unicode, unicode, unicode, List]
        newnode = self.convert(self.grammar, newnode)
        if newnode is not None:
            node[-1].append(newnode)
        self.stack[-1] = (dfa, newstate, node)
    def push(self, type, newdfa, newstate, context):
        """Push a nonterminal.  (Internal)"""
        dfa, state, node = self.stack[-1]
        newnode = (type, None, context, [])  # type: Tuple[unicode, unicode, unicode, List]
        self.stack[-1] = (dfa, newstate, node)
        self.stack.append((newdfa, 0, newnode))
    def pop(self):
        """Pop a nonterminal.  (Internal)"""
        popdfa, popstate, popnode = self.stack.pop()
        newnode = self.convert(self.grammar, popnode)
        if newnode is not None:
            if self.stack:
                dfa, state, node = self.stack[-1]
                node[-1].append(newnode)
            else:
                self.rootnode = newnode
                self.rootnode.used_names = self.used_names
--- a/sphinx/pycode/pgen2/parse.pyx
+++ b/sphinx/pycode/pgen2/parse.pyx
@@ -1,165 +0,0 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 # Adapted from parse.py to be compiled with Cython by Georg Brandl.
 """Parser engine for the grammar tables generated by pgen.
 The grammar table must be loaded first.
 See Parser/parser.c in the Python distribution for additional info on
 how this parsing engine works.
 """
 from sphinx.pycode.nodes import Node, Leaf
 DEF NAME = 1
 class ParseError(Exception):
    """Exception to signal the parser is stuck."""
    def __init__(self, msg, type, value, context):
        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
                           (msg, type, value, context))
        self.msg = msg
        self.type = type
        self.value = value
        self.context = context
 cdef class Parser:
    cdef public object grammar
    cdef public object rootnode
    cdef public list stack
    cdef public set used_names
    cdef int  _grammar_start
    cdef list _grammar_labels
    cdef dict _grammar_dfas
    cdef dict _grammar_keywords
    cdef dict _grammar_tokens
    cdef dict _grammar_number2symbol
    def __init__(self, grammar, convert=None):
        self.grammar = grammar
        #self.convert = convert or noconvert
        self._grammar_dfas = grammar.dfas
        self._grammar_labels = grammar.labels
        self._grammar_keywords = grammar.keywords
        self._grammar_tokens = grammar.tokens
        self._grammar_number2symbol = grammar.number2symbol
        self._grammar_start = grammar.start
    def setup(self, start=None):
        if start is None:
            start = self._grammar_start
        # Each stack entry is a tuple: (dfa, state, node).
        # A node is a tuple: (type, value, context, children),
        # where children is a list of nodes or None, and context may be None.
        newnode = (start, None, None, [])
        stackentry = (self._grammar_dfas[start], 0, newnode)
        self.stack = [stackentry]
        self.rootnode = None
        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
    def addtoken(self, int type, value, context):
        """Add a token; return True iff this is the end of the program."""
        cdef int ilabel, i, t, state, newstate
        # Map from token to label
        ilabel = self.classify(type, value, context)
        # Loop until the token is shifted; may raise exceptions
        while True:
            dfa, state, node = self.stack[-1]
            states, first = dfa
            arcs = states[state]
            # Look for a state with this label
            for i, newstate in arcs:
                t, v = self._grammar_labels[i]
                if ilabel == i:
                    # Look it up in the list of labels
                    ## assert t < 256
                    # Shift a token; we're done with it
                    self.shift(type, value, newstate, context)
                    # Pop while we are in an accept-only state
                    state = newstate
                    while states[state] == [(0, state)]:
                        self.pop()
                        if not self.stack:
                            # Done parsing!
                            return True
                        dfa, state, node = self.stack[-1]
                        states, first = dfa
                    # Done with this token
                    return False
                elif t >= 256:
                    # See if it's a symbol and if we're in its first set
                    itsdfa = self._grammar_dfas[t]
                    itsstates, itsfirst = itsdfa
                    if ilabel in itsfirst:
                        # Push a symbol
                        self.push(t, itsdfa, newstate, context)
                        break # To continue the outer while loop
            else:
                if (0, state) in arcs:
                    # An accepting state, pop it and try something else
                    self.pop()
                    if not self.stack:
                        # Done parsing, but another token is input
                        raise ParseError("too much input",
                                         type, value, context)
                else:
                    # No success finding a transition
                    raise ParseError("bad input", type, value, context)
    cdef int classify(self, int type, value, context):
        """Turn a token into a label.  (Internal)"""
        if type == NAME:
            # Keep a listing of all used names
            self.used_names.add(value)
            # Check for reserved words
            if value in self._grammar_keywords:
                return self._grammar_keywords[value]
        if type not in self._grammar_tokens:
            raise ParseError("bad token", type, value, context)
        return self._grammar_tokens[type]
    cdef void shift(self, type, value, newstate, context):
        """Shift a token.  (Internal)"""
        cdef tuple node
        dfa, state, node = self.stack[-1]
        newnode = (type, value, context, None)
        newnode = self.convert(newnode)
        if newnode is not None:
            node[-1].append(newnode)
        self.stack[-1] = (dfa, newstate, node)
    cdef void push(self, type, newdfa, newstate, context):
        """Push a nonterminal.  (Internal)"""
        dfa, state, node = self.stack[-1]
        newnode = (type, None, context, [])
        self.stack[-1] = (dfa, newstate, node)
        self.stack.append((newdfa, 0, newnode))
    cdef void pop(self):
        """Pop a nonterminal.  (Internal)"""
        popdfa, popstate, popnode = self.stack.pop()
        newnode = self.convert(popnode)
        if newnode is not None:
            if self.stack:
                dfa, state, node = self.stack[-1]
                node[-1].append(newnode)
            else:
                self.rootnode = newnode
                self.rootnode.used_names = self.used_names
    cdef convert(self, tuple raw_node):
        type, value, context, children = raw_node
        if children or type in self._grammar_number2symbol:
            # If there's exactly one child, return that child instead of
            # creating a new node.
            if len(children) == 1:
                return children[0]
            return Node(type, children, context=context)
        else:
            return Leaf(type, value, context=context)
--- a/sphinx/pycode/pgen2/pgen.py
+++ b/sphinx/pycode/pgen2/pgen.py
@@ -1,403 +0,0 @@
 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 from __future__ import print_function
 from six import iteritems
 from collections import OrderedDict
 # Pgen imports
 from sphinx.pycode.pgen2 import grammar, token, tokenize
 if False:
    # For type annotation
    from typing import Any, Dict, List, Tuple  # NOQA
 class PgenGrammar(grammar.Grammar):
    pass
 class ParserGenerator(object):
    def __init__(self, filename, stream=None):
        close_stream = None
        if stream is None:
            stream = open(filename)
            close_stream = stream.close
        self.filename = filename
        self.stream = stream
        self.generator = tokenize.generate_tokens(stream.readline)
        self.gettoken() # Initialize lookahead
        self.dfas, self.startsymbol = self.parse()
        if close_stream is not None:
            close_stream()
        self.first = {}     # type: Dict[unicode, List[unicode]]
                            # map from symbol name to set of tokens
        self.addfirstsets()
    def make_grammar(self):
        c = PgenGrammar()
        names = list(self.dfas.keys())
        names.sort()
        names.remove(self.startsymbol)
        names.insert(0, self.startsymbol)
        for name in names:
            i = 256 + len(c.symbol2number)
            c.symbol2number[name] = i
            c.number2symbol[i] = name
        for name in names:
            dfa = self.dfas[name]
            states = []  # type: List[List[Tuple[int, int]]]
            for state in dfa:
                arcs = []
                for label, next in iteritems(state.arcs):
                    arcs.append((self.make_label(c, label), dfa.index(next)))
                if state.isfinal:
                    arcs.append((0, dfa.index(state)))
                states.append(arcs)
            c.states.append(states)
            c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
        c.start = c.symbol2number[self.startsymbol]
        return c
    def make_first(self, c, name):
        rawfirst = self.first[name]
        first = {}
        for label in sorted(rawfirst):
            ilabel = self.make_label(c, label)
            ##assert ilabel not in first # X X X failed on <> ... !=
            first[ilabel] = 1
        return first
    def make_label(self, c, label):
        # X X X Maybe this should be a method on a subclass of converter?
        ilabel = len(c.labels)
        if label[0].isalpha():
            # Either a symbol name or a named token
            if label in c.symbol2number:
                # A symbol name (a non-terminal)
                if label in c.symbol2label:
                    return c.symbol2label[label]
                else:
                    c.labels.append((c.symbol2number[label], None))
                    c.symbol2label[label] = ilabel
                    return ilabel
            else:
                # A named token (NAME, NUMBER, STRING)
                itoken = getattr(token, label, None)
                assert isinstance(itoken, int), label
                assert itoken in token.tok_name, label
                if itoken in c.tokens:
                    return c.tokens[itoken]
                else:
                    c.labels.append((itoken, None))
                    c.tokens[itoken] = ilabel
                    return ilabel
        else:
            # Either a keyword or an operator
            assert label[0] in ('"', "'"), label
            value = eval(label)
            if value[0].isalpha():
                # A keyword
                if value in c.keywords:
                    return c.keywords[value]
                else:
                    c.labels.append((token.NAME, value))
                    c.keywords[value] = ilabel
                    return ilabel
            else:
                # An operator (any non-numeric token)
                itoken = grammar.opmap[value] # Fails if unknown token
                if itoken in c.tokens:
                    return c.tokens[itoken]
                else:
                    c.labels.append((itoken, None))
                    c.tokens[itoken] = ilabel
                    return ilabel
    def addfirstsets(self):
        names = list(self.dfas.keys())
        names.sort()
        for name in names:
            if name not in self.first:
                self.calcfirst(name)
            #print name, self.first[name].keys()
    def calcfirst(self, name):
        dfa = self.dfas[name]
        self.first[name] = None # dummy to detect left recursion
        state = dfa[0]
        totalset = {}  # type: Dict[unicode, int]
        overlapcheck = {}
        for label, next in iteritems(state.arcs):
            if label in self.dfas:
                if label in self.first:
                    fset = self.first[label]
                    if fset is None:
                        raise ValueError("recursion for rule %r" % name)
                else:
                    self.calcfirst(label)
                    fset = self.first[label]
                totalset.update(fset)
                overlapcheck[label] = fset
            else:
                totalset[label] = 1
                overlapcheck[label] = {label: 1}
        inverse = {}  # type: Dict[unicode, unicode]
        for label, itsfirst in sorted(overlapcheck.items()):
            for symbol in sorted(itsfirst):
                if symbol in inverse:
                    raise ValueError("rule %s is ambiguous; %s is in the"
                                     " first sets of %s as well as %s" %
                                     (name, symbol, label, inverse[symbol]))
                inverse[symbol] = label
        self.first[name] = totalset
    def parse(self):
        dfas = {}
        startsymbol = None
        # MSTART: (NEWLINE | RULE)* ENDMARKER
        while self.type != token.ENDMARKER:
            while self.type == token.NEWLINE:
                self.gettoken()
            # RULE: NAME ':' RHS NEWLINE
            name = self.expect(token.NAME)
            self.expect(token.OP, ":")
            a, z = self.parse_rhs()
            self.expect(token.NEWLINE)
            #self.dump_nfa(name, a, z)
            dfa = self.make_dfa(a, z)
            #self.dump_dfa(name, dfa)
            #oldlen = len(dfa)
            self.simplify_dfa(dfa)
            #newlen = len(dfa)
            dfas[name] = dfa
            #print name, oldlen, newlen
            if startsymbol is None:
                startsymbol = name
        return dfas, startsymbol
    def make_dfa(self, start, finish):
        # To turn an NFA into a DFA, we define the states of the DFA
        # to correspond to *sets* of states of the NFA.  Then do some
        # state reduction.  Let's represent sets as dicts with 1 for
        # values.
        assert isinstance(start, NFAState)
        assert isinstance(finish, NFAState)
        def closure(state):
            base = {}  # type: Dict
            addclosure(state, base)
            return base
        def addclosure(state, base):
            assert isinstance(state, NFAState)
            if state in base:
                return
            base[state] = 1
            for label, next in state.arcs:
                if label is None:
                    addclosure(next, base)
        states = [DFAState(closure(start), finish)]
        for state in states: # NB states grows while we're iterating
            arcs = {}  # type: Dict[unicode, Dict]
            for nfastate in state.nfaset:
                for label, next in nfastate.arcs:
                    if label is not None:
                        addclosure(next, arcs.setdefault(label, {}))
            for label, nfaset in iteritems(arcs):
                for st in states:
                    if st.nfaset == nfaset:
                        break
                else:
                    st = DFAState(nfaset, finish)
                    states.append(st)
                state.addarc(st, label)
        return states # List of DFAState instances; first one is start
    def dump_nfa(self, name, start, finish):
        print("Dump of NFA for", name)
        todo = [start]
        for i, state in enumerate(todo):
            print("  State", i, state is finish and "(final)" or "")
            for label, next in state.arcs:
                if next in todo:
                    j = todo.index(next)
                else:
                    j = len(todo)
                    todo.append(next)
                if label is None:
                    print("    -> %d" % j)
                else:
                    print("    %s -> %d" % (label, j))
    def dump_dfa(self, name, dfa):
        print("Dump of DFA for", name)
        for i, state in enumerate(dfa):
            print("  State", i, state.isfinal and "(final)" or "")
            for label, next in iteritems(state.arcs):
                print("    %s -> %d" % (label, dfa.index(next)))
    def simplify_dfa(self, dfa):
        # This is not theoretically optimal, but works well enough.
        # Algorithm: repeatedly look for two states that have the same
        # set of arcs (same labels pointing to the same nodes) and
        # unify them, until things stop changing.
        # dfa is a list of DFAState instances
        changes = True
        while changes:
            changes = False
            for i, state_i in enumerate(dfa):
                for j in range(i+1, len(dfa)):
                    state_j = dfa[j]
                    if state_i == state_j:
                        #print "  unify", i, j
                        del dfa[j]
                        for state in dfa:
                            state.unifystate(state_j, state_i)
                        changes = True
                        break
    def parse_rhs(self):
        # RHS: ALT ('|' ALT)*
        a, z = self.parse_alt()
        if self.value != "|":
            return a, z
        else:
            aa = NFAState()
            zz = NFAState()
            aa.addarc(a)
            z.addarc(zz)
            while self.value == "|":
                self.gettoken()
                a, z = self.parse_alt()
                aa.addarc(a)
                z.addarc(zz)
            return aa, zz
    def parse_alt(self):
        # ALT: ITEM+
        a, b = self.parse_item()
        while (self.value in ("(", "[") or
               self.type in (token.NAME, token.STRING)):
            c, d = self.parse_item()
            b.addarc(c)
            b = d
        return a, b
    def parse_item(self):
        # ITEM: '[' RHS ']' | ATOM ['+' | '*']
        if self.value == "[":
            self.gettoken()
            a, z = self.parse_rhs()
            self.expect(token.OP, "]")
            a.addarc(z)
            return a, z
        else:
            a, z = self.parse_atom()
            value = self.value
            if value not in ("+", "*"):
                return a, z
            self.gettoken()
            z.addarc(a)
            if value == "+":
                return a, z
            else:
                return a, a
    def parse_atom(self):
        # ATOM: '(' RHS ')' | NAME | STRING
        if self.value == "(":
            self.gettoken()
            a, z = self.parse_rhs()
            self.expect(token.OP, ")")
            return a, z
        elif self.type in (token.NAME, token.STRING):
            a = NFAState()
            z = NFAState()
            a.addarc(z, self.value)
            self.gettoken()
            return a, z
        else:
            self.raise_error("expected (...) or NAME or STRING, got %s/%s",
                             self.type, self.value)
    def expect(self, type, value=None):
        if self.type != type or (value is not None and self.value != value):
            self.raise_error("expected %s/%s, got %s/%s",
                             type, value, self.type, self.value)
        value = self.value
        self.gettoken()
        return value
    def gettoken(self):
        tup = next(self.generator)
        while tup[0] in (tokenize.COMMENT, tokenize.NL):
            tup = next(self.generator)
        self.type, self.value, self.begin, self.end, self.line = tup
        #print token.tok_name[self.type], repr(self.value)
    def raise_error(self, msg, *args):
        if args:
            try:
                msg = msg % args
            except:
                msg = " ".join([msg] + [str(x) for x in args])
        raise SyntaxError(msg, (self.filename, self.end[0],
                                self.end[1], self.line))
 class NFAState(object):
    def __init__(self):
        self.arcs = []  # type: List[Tuple[unicode, Any]]
                        # list of (label, NFAState) pairs
    def addarc(self, next, label=None):
        assert label is None or isinstance(label, str)
        assert isinstance(next, NFAState)
        self.arcs.append((label, next))
    def __hash__(self):
        return hash(tuple(x[0] for x in self.arcs))
 class DFAState(object):
    def __init__(self, nfaset, final):
        assert isinstance(nfaset, dict)
        assert isinstance(next(iter(nfaset)), NFAState)
        assert isinstance(final, NFAState)
        self.nfaset = nfaset
        self.isfinal = final in nfaset
        self.arcs = OrderedDict()   # type: OrderedDict
                                    # map from label to DFAState
    def __hash__(self):
        return hash(tuple(self.arcs))
    def addarc(self, next, label):
        assert isinstance(label, str)
        assert label not in self.arcs
        assert isinstance(next, DFAState)
        self.arcs[label] = next
    def unifystate(self, old, new):
        for label, next in iteritems(self.arcs):
            if next is old:
                self.arcs[label] = new
    def __eq__(self, other):
        # Equality test -- ignore the nfaset instance variable
        assert isinstance(other, DFAState)
        if self.isfinal != other.isfinal:
            return False
        # Can't just return self.arcs == other.arcs, because that
        # would invoke this method recursively, with cycles...
        if len(self.arcs) != len(other.arcs):
            return False
        for label, next in iteritems(self.arcs):
            if next is not other.arcs.get(label):
                return False
        return True
 def generate_grammar(filename="Grammar.txt"):
    p = ParserGenerator(filename)
    return p.make_grammar()
--- a/sphinx/pycode/pgen2/token.py
+++ b/sphinx/pycode/pgen2/token.py
@@ -1,86 +0,0 @@
 #! /usr/bin/env python
 """Token constants (from "token.h")."""
 #  Taken from Python (r53757) and modified to include some tokens
 #   originally monkeypatched in by pgen2.tokenize
 #--start constants--
 ENDMARKER = 0
 NAME = 1
 NUMBER = 2
 STRING = 3
 NEWLINE = 4
 INDENT = 5
 DEDENT = 6
 LPAR = 7
 RPAR = 8
 LSQB = 9
 RSQB = 10
 COLON = 11
 COMMA = 12
 SEMI = 13
 PLUS = 14
 MINUS = 15
 STAR = 16
 SLASH = 17
 VBAR = 18
 AMPER = 19
 LESS = 20
 GREATER = 21
 EQUAL = 22
 DOT = 23
 PERCENT = 24
 BACKQUOTE = 25
 LBRACE = 26
 RBRACE = 27
 EQEQUAL = 28
 NOTEQUAL = 29
 LESSEQUAL = 30
 GREATEREQUAL = 31
 TILDE = 32
 CIRCUMFLEX = 33
 LEFTSHIFT = 34
 RIGHTSHIFT = 35
 DOUBLESTAR = 36
 PLUSEQUAL = 37
 MINEQUAL = 38
 STAREQUAL = 39
 SLASHEQUAL = 40
 PERCENTEQUAL = 41
 AMPEREQUAL = 42
 VBAREQUAL = 43
 CIRCUMFLEXEQUAL = 44
 LEFTSHIFTEQUAL = 45
 RIGHTSHIFTEQUAL = 46
 DOUBLESTAREQUAL = 47
 DOUBLESLASH = 48
 DOUBLESLASHEQUAL = 49
 AT = 50
 ATEQUAL = 51
 RARROW = 52
 ELLIPSIS = 53
 OP = 54
 AWAIT = 55
 ASYNC = 56
 COMMENT = 57
 NL = 58
 ERRORTOKEN = 59
 N_TOKENS = 60
 NT_OFFSET = 256
 #--end constants--
 tok_name = {}
 for _name, _value in list(globals().items()):
    if type(_value) is type(0):
        tok_name[_value] = _name
 def ISTERMINAL(x):
    return x < NT_OFFSET
 def ISNONTERMINAL(x):
    return x >= NT_OFFSET
 def ISEOF(x):
    return x == ENDMARKER
--- a/sphinx/pycode/pgen2/tokenize.py
+++ b/sphinx/pycode/pgen2/tokenize.py
@@ -1,441 +0,0 @@
 # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
 # All rights reserved.
 """Tokenization help for Python programs.
 generate_tokens(readline) is a generator that breaks a stream of
 text into Python tokens.  It accepts a readline-like method which is called
 repeatedly to get the next line of input (or "" for EOF).  It generates
 5-tuples with these members:
    the token type (see token.py)
    the token (a string)
    the starting (row, column) indices of the token (a 2-tuple of ints)
    the ending (row, column) indices of the token (a 2-tuple of ints)
    the original line (string)
 It is designed to match the working of the Python tokenizer exactly, except
 that it produces COMMENT tokens for comments and gives type OP for all
 operators
 Older entry points
    tokenize_loop(readline, tokeneater)
    tokenize(readline, tokeneater=printtoken)
 are the same, except instead of generating tokens, tokeneater is a callback
 function to which the 5 fields described above are passed as 5 arguments,
 each time a new token is found.
 """
 from __future__ import print_function
 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
 __credits__ = \
    'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
 import string, re
 from six import PY3
 from sphinx.pycode.pgen2.token import *
 from sphinx.pycode.pgen2 import token
 if False:
    # For type annotation
    from typing import List  # NOQA
 __all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
           "generate_tokens", "untokenize"]
 del token
 def group(*choices): return '(' + '|'.join(choices) + ')'
 def any(*choices): return group(*choices) + '*'
 def maybe(*choices): return group(*choices) + '?'
 Whitespace = r'[ \f\t]*'
 Comment = r'#[^\r\n]*'
 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
 Name = r'[a-zA-Z_]\w*'
 Binnumber = r'0[bB][01]*'
 Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
 Octnumber = r'0[oO]?[0-7]*[lL]?'
 Decnumber = r'[1-9]\d*[lL]?'
 Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
 Exponent = r'[eE][-+]?\d+'
 Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
 Expfloat = r'\d+' + Exponent
 Floatnumber = group(Pointfloat, Expfloat)
 Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
 Number = group(Imagnumber, Floatnumber, Intnumber)
 # Tail end of ' string.
 Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
 # Tail end of " string.
 Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
 # Tail end of ''' string.
 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
 # Tail end of """ string.
 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
 Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""')
 # Single-line ' or " string.
 String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
               r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
 # recognized as two instances of =).
 Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
                 r"//=?", r"->",
                 r"[+\-*/%&|^=<>]=?",
                 r"~")
 Bracket = '[][(){}]'
 Special = group(r'\r?\n', r'[:;.,`@]')
 if PY3:
    Ellipsis_ = r'\.{3}'
    Special = group(Ellipsis_, Special)
 Funny = group(Operator, Bracket, Special)
 PlainToken = group(Number, Funny, String, Name)
 Token = Ignore + PlainToken
 # First (or only) line of ' or " string.
 ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
                group("'", r'\\\r?\n'),
                r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
                group('"', r'\\\r?\n'))
 PseudoExtras = group(r'\\\r?\n', Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
 tokenprog, pseudoprog, single3prog, double3prog = [
    re.compile(x) for x in (Token, PseudoToken, Single3, Double3)
 ]
 endprogs = {"'": re.compile(Single), '"': re.compile(Double),
            "'''": single3prog, '"""': double3prog,
            "r'''": single3prog, 'r"""': double3prog,
            "u'''": single3prog, 'u"""': double3prog,
            "b'''": single3prog, 'b"""': double3prog,
            "ur'''": single3prog, 'ur"""': double3prog,
            "br'''": single3prog, 'br"""': double3prog,
            "R'''": single3prog, 'R"""': double3prog,
            "U'''": single3prog, 'U"""': double3prog,
            "B'''": single3prog, 'B"""': double3prog,
            "uR'''": single3prog, 'uR"""': double3prog,
            "Ur'''": single3prog, 'Ur"""': double3prog,
            "UR'''": single3prog, 'UR"""': double3prog,
            "bR'''": single3prog, 'bR"""': double3prog,
            "Br'''": single3prog, 'Br"""': double3prog,
            "BR'''": single3prog, 'BR"""': double3prog,
            'r': None, 'R': None,
            'u': None, 'U': None,
            'b': None, 'B': None}
 triple_quoted = {}
 for t in ("'''", '"""',
          "r'''", 'r"""', "R'''", 'R"""',
          "u'''", 'u"""', "U'''", 'U"""',
          "b'''", 'b"""', "B'''", 'B"""',
          "ur'''", 'ur"""', "Ur'''", 'Ur"""',
          "uR'''", 'uR"""', "UR'''", 'UR"""',
          "br'''", 'br"""', "Br'''", 'Br"""',
          "bR'''", 'bR"""', "BR'''", 'BR"""',):
    triple_quoted[t] = t
 single_quoted = {}
 for t in ("'", '"',
          "r'", 'r"', "R'", 'R"',
          "u'", 'u"', "U'", 'U"',
          "b'", 'b"', "B'", 'B"',
          "ur'", 'ur"', "Ur'", 'Ur"',
          "uR'", 'uR"', "UR'", 'UR"',
          "br'", 'br"', "Br'", 'Br"',
          "bR'", 'bR"', "BR'", 'BR"', ):
    single_quoted[t] = t
 tabsize = 8
 class TokenError(Exception): pass
 class StopTokenizing(Exception): pass
 def printtoken(type, token, scell, ecell, line): # for testing
    srow, scol = scell
    erow, ecol = ecell
    print("%d,%d-%d,%d:\t%s\t%s" %
          (srow, scol, erow, ecol, tok_name[type], repr(token)))
 def tokenize(readline, tokeneater=printtoken):
    """
    The tokenize() function accepts two parameters: one representing the
    input stream, and one providing an output mechanism for tokenize().
    The first parameter, readline, must be a callable object which provides
    the same interface as the readline() method of built-in file objects.
    Each call to the function should return one line of input as a string.
    The second parameter, tokeneater, must also be a callable object. It is
    called once for each token, with five arguments, corresponding to the
    tuples generated by generate_tokens().
    """
    try:
        tokenize_loop(readline, tokeneater)
    except StopTokenizing:
        pass
 # backwards compatible interface
 def tokenize_loop(readline, tokeneater):
    for token_info in generate_tokens(readline):
        tokeneater(*token_info)
 class Untokenizer:
    def __init__(self):
        self.tokens = []  # type: List[unicode]
        self.prev_row = 1
        self.prev_col = 0
    def add_whitespace(self, start):
        row, col = start
        assert row <= self.prev_row
        col_offset = col - self.prev_col
        if col_offset:
            self.tokens.append(" " * col_offset)
    def untokenize(self, iterable):
        for t in iterable:
            if len(t) == 2:
                self.compat(t, iterable)
                break
            tok_type, token, start, end, line = t
            self.add_whitespace(start)
            self.tokens.append(token)
            self.prev_row, self.prev_col = end
            if tok_type in (NEWLINE, NL):
                self.prev_row += 1
                self.prev_col = 0
        return "".join(self.tokens)
    def compat(self, token, iterable):
        startline = False
        indents = []
        toks_append = self.tokens.append
        toknum, tokval = token
        if toknum in (NAME, NUMBER):
            tokval += ' '
        if toknum in (NEWLINE, NL):
            startline = True
        for tok in iterable:
            toknum, tokval = tok[:2]
            if toknum in (NAME, NUMBER):
                tokval += ' '
            if toknum == INDENT:
                indents.append(tokval)
                continue
            elif toknum == DEDENT:
                indents.pop()
                continue
            elif toknum in (NEWLINE, NL):
                startline = True
            elif startline and indents:
                toks_append(indents[-1])
                startline = False
            toks_append(tokval)
 def untokenize(iterable):
    """Transform tokens back into Python source code.
    Each element returned by the iterable must be a token sequence
    with at least two elements, a token number and token value.  If
    only two tokens are passed, the resulting output is poor.
    Round-trip invariant for full input:
        Untokenized source will match input source exactly
    Round-trip invariant for limited intput:
        # Output text will tokenize the back to the input
        t1 = [tok[:2] for tok in generate_tokens(f.readline)]
        newcode = untokenize(t1)
        readline = iter(newcode.splitlines(1)).next
        t2 = [tok[:2] for tokin generate_tokens(readline)]
        assert t1 == t2
    """
    ut = Untokenizer()
    return ut.untokenize(iterable)
 def generate_tokens(readline):
    """
    The generate_tokens() generator requires one argment, readline, which
    must be a callable object which provides the same interface as the
    readline() method of built-in file objects. Each call to the function
    should return one line of input as a string.  Alternately, readline
    can be a callable function terminating with StopIteration:
        readline = open(myfile).next    # Example of alternate readline
    The generator produces 5-tuples with these members: the token type; the
    token string; a 2-tuple (srow, scol) of ints specifying the row and
    column where the token begins in the source; a 2-tuple (erow, ecol) of
    ints specifying the row and column where the token ends in the source;
    and the line on which the token was found. The line passed is the
    logical line; continuation lines are included.
    """
    lnum = parenlev = continued = 0
    namechars, numchars = string.ascii_letters + '_', '0123456789'
    contstr, needcont = '', 0
    contline = None
    indents = [0]
    while 1:                                   # loop over lines in stream
        try:
            line = readline()
        except StopIteration:
            line = ''
        # if we are not at the end of the file make sure the
        # line ends with a newline because the parser depends
        # on that.
        if line:
            line = line.rstrip() + '\n'
        lnum = lnum + 1
        pos, max = 0, len(line)
        if contstr:                            # continued string
            if not line:
                raise TokenError("EOF in multi-line string", strstart)  # type: ignore
            endmatch = endprog.match(line)  # type: ignore
            if endmatch:
                pos = end = endmatch.end(0)
                yield (STRING, contstr + line[:end],
                       strstart, (lnum, end), contline + line)  # type: ignore
                contstr, needcont = '', 0
                contline = None
            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
                yield (ERRORTOKEN, contstr + line,
                           strstart, (lnum, len(line)), contline)  # type: ignore
                contstr = ''
                contline = None
                continue
            else:
                contstr = contstr + line
                contline = contline + line
                continue
        elif parenlev == 0 and not continued:  # new statement
            if not line: break
            column = 0
            while pos < max:                   # measure leading whitespace
                if line[pos] == ' ': column = column + 1
                elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
                elif line[pos] == '\f': column = 0
                else: break
                pos = pos + 1
            if pos == max: break
            if line[pos] in '#\r\n':           # skip comments or blank lines
                if line[pos] == '#':
                    comment_token = line[pos:].rstrip('\r\n')
                    nl_pos = pos + len(comment_token)
                    yield (COMMENT, comment_token,
                           (lnum, pos), (lnum, pos + len(comment_token)), line)
                    yield (NL, line[nl_pos:],
                           (lnum, nl_pos), (lnum, len(line)), line)
                else:
                    yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
                           (lnum, pos), (lnum, len(line)), line)
                continue
            if column > indents[-1]:           # count indents or dedents
                indents.append(column)
                yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
            while column < indents[-1]:
                if column not in indents:
                    raise IndentationError(
                        "unindent does not match any outer indentation level",
                        ("<tokenize>", lnum, pos, line))
                indents = indents[:-1]
                yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
        else:                                  # continued statement
            if not line:
                raise TokenError("EOF in multi-line statement", (lnum, 0))
            continued = 0
        while pos < max:
            pseudomatch = pseudoprog.match(line, pos)
            if pseudomatch:                                # scan for tokens
                start, end = pseudomatch.span(1)
                spos, epos, pos = (lnum, start), (lnum, end), end
                token, initial = line[start:end], line[start]
                if end < max:
                    next_pseudomatch = pseudoprog.match(line, end)
                    if next_pseudomatch:
                        n_start, n_end = next_pseudomatch.span(1)
                        n_token  = line[n_start:n_end]
                    else:
                        n_token = None
                else:
                    n_token = None
                if initial in numchars or (
                   initial == '.' and token not in ('.', '...')
                   ):                                      # ordinary number
                    yield (NUMBER, token, spos, epos, line)
                elif initial in '\r\n':
                    newline = NEWLINE
                    if parenlev > 0:
                        newline = NL
                    yield (newline, token, spos, epos, line)
                elif initial == '#':
                    assert not token.endswith("\n")
                    yield (COMMENT, token, spos, epos, line)
                elif token in triple_quoted:
                    endprog = endprogs[token]
                    endmatch = endprog.match(line, pos)
                    if endmatch:                           # all on one line
                        pos = endmatch.end(0)
                        token = line[start:pos]
                        yield (STRING, token, spos, (lnum, pos), line)
                    else:
                        strstart = (lnum, start)           # multiple lines
                        contstr = line[start:]
                        contline = line
                        break
                elif initial in single_quoted or \
                    token[:2] in single_quoted or \
                    token[:3] in single_quoted:
                    if token[-1] == '\n':                  # continued string
                        strstart = (lnum, start)
                        endprog = (endprogs[initial] or endprogs[token[1]] or
                                   endprogs[token[2]])
                        contstr, needcont = line[start:], 1
                        contline = line
                        break
                    else:                                  # ordinary string
                        yield (STRING, token, spos, epos, line)
                elif token == 'await' and n_token:
                    yield (AWAIT, token, spos, epos, line)
                elif token == 'async' and n_token in ('def', 'for', 'with'):
                    yield (ASYNC, token, spos, epos, line)
                elif initial in namechars:                 # ordinary name
                    yield (NAME, token, spos, epos, line)
                elif token in ('...',):                    # ordinary name
                    yield (NAME, token, spos, epos, line)
                elif initial == '\\':                      # continued stmt
                    # This yield is new; needed for better idempotency:
                    yield (NL, token, spos, (lnum, pos), line)
                    continued = 1
                else:
                    if initial in '([{': parenlev = parenlev + 1
                    elif initial in ')]}': parenlev = parenlev - 1
                    yield (OP, token, spos, epos, line)
            else:
                yield (ERRORTOKEN, line[pos],
                           (lnum, pos), (lnum, pos+1), line)
                pos = pos + 1
    for _ in indents[1:]:                      # pop remaining indent levels
        yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
    yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
 if __name__ == '__main__':                     # testing
    import sys
    if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
    else: tokenize(sys.stdin.readline)
--- a/tests/test_pycode.py
+++ b/tests/test_pycode.py
@@ -63,20 +63,30 @@ def test_ModuleAnalyzer_find_tags():
            '   """function baz"""\n'
            '   pass\n'
            '\n'
-            '@decorator\n'
+            '@decorator1\n'
            '@decorator2\n'
            'def quux():\n'
-            '   pass\n')
+            '   pass\n'  # line: 21
            '\n'
            'class Corge(object):\n'
            '    @decorator1\n'
            '    @decorator2\n'
            '    def grault(self):\n'
            '        pass\n')
    analyzer = ModuleAnalyzer.for_string(code, 'module')
    tags = analyzer.find_tags()
    assert set(tags.keys()) == {'Foo', 'Foo.__init__', 'Foo.bar',
-                                'Foo.Baz', 'Foo.Baz.__init__', 'qux', 'quux'}
+                                'Foo.Baz', 'Foo.Baz.__init__', 'qux', 'quux',
-    assert tags['Foo'] == ('class', 1, 13)  # type, start, end
+                                'Corge', 'Corge.grault'}
-    assert tags['Foo.__init__'] == ('def', 3, 5)
+    assert tags['Foo'] == ('class', 1, 12)  # type, start, end
-    assert tags['Foo.bar'] == ('def', 6, 9)
+    assert tags['Foo.__init__'] == ('def', 3, 4)
-    assert tags['Foo.Baz'] == ('class', 10, 13)
+    assert tags['Foo.bar'] == ('def', 6, 8)
-    assert tags['Foo.Baz.__init__'] == ('def', 11, 13)
+    assert tags['Foo.Baz'] == ('class', 10, 12)
-    assert tags['qux'] == ('def', 14, 17)
+    assert tags['Foo.Baz.__init__'] == ('def', 11, 12)
-    assert tags['quux'] == ('def', 18, 21)  # decorator
+    assert tags['qux'] == ('def', 14, 16)
    assert tags['quux'] == ('def', 18, 21)
    assert tags['Corge'] == ('class', 23, 27)
    assert tags['Corge.grault'] == ('def', 24, 27)
 def test_ModuleAnalyzer_find_attr_docs():
@@ -114,6 +124,8 @@ def test_ModuleAnalyzer_find_attr_docs():
                         ('Foo', 'attr3'),
                         ('Foo', 'attr4'),
                         ('Foo', 'attr5'),
                         ('Foo', 'attr6'),
                         ('Foo', 'attr7'),
                         ('Foo', 'attr8'),
                         ('Foo', 'attr9')}
    assert docs[('Foo', 'attr1')] == ['comment before attr1', '']
@@ -121,19 +133,23 @@ def test_ModuleAnalyzer_find_attr_docs():
    assert docs[('Foo', 'attr4')] == ['long attribute comment', '']
    assert docs[('Foo', 'attr4')] == ['long attribute comment', '']
    assert docs[('Foo', 'attr5')] == ['attribute comment for attr5', '']
    assert docs[('Foo', 'attr6')] == ['this comment is ignored', '']
    assert docs[('Foo', 'attr7')] == ['this comment is ignored', '']
    assert docs[('Foo', 'attr8')] == ['attribute comment for attr8', '']
    assert docs[('Foo', 'attr9')] == ['string after attr9', '']
    assert analyzer.tagorder == {'Foo': 0,
-                                 'Foo.__init__': 6,
+                                 'Foo.__init__': 8,
                                 'Foo.attr1': 1,
                                 'Foo.attr2': 2,
                                 'Foo.attr3': 3,
                                 'Foo.attr4': 4,
                                 'Foo.attr5': 5,
-                                 'Foo.attr8': 8,
+                                 'Foo.attr6': 6,
-                                 'Foo.attr9': 10,
+                                 'Foo.attr7': 7,
-                                 'Foo.bar': 11,
+                                 'Foo.attr8': 10,
-                                 'baz': 12,
+                                 'Foo.attr9': 12,
-                                 'Qux': 13,
+                                 'Foo.bar': 13,
-                                 'Qux.attr1': 14,
+                                 'baz': 14,
-                                 'Qux.attr2': 15}
+                                 'Qux': 15,
                                 'Qux.attr1': 16,
                                 'Qux.attr2': 17}
--- a/utils/release-checklist
+++ b/utils/release-checklist
@@ -20,7 +20,6 @@ Release checklist
 * Check diff by `git diff`
 * `git commit -am 'Bump to x.y.z final'`
 * `make clean`
 * `python setup.py compile_grammar`
 * `python setup.py release bdist_wheel sdist upload --identity=[your key]`
 * open https://pypi.python.org/pypi/Sphinx and check there are no obvious errors
 * `git tag x.y.z` with version number