Closes #1152: Fix pycode parsing errors of Python 3 code by including two grammar

versions for Python 2 and 3, and loading the appropriate version for the running Python version.
2025-02-25 18:55:22 -06:00 · 2014-01-12 19:44:04 +01:00 · 2014-01-12 19:44:04 +01:00 · 42a8138c30
commit 42a8138c30
parent 531d70989f
8 changed files with 172 additions and 51 deletions
--- a/4
+++ b/4
@ -72,6 +72,10 @@ Bugs fixed
 * #1198: Allow "image" for the "figwidth" option of the :rst:dir:`figure`
  directive as documented by docutils.

+* #1152: Fix pycode parsing errors of Python 3 code by including two grammar
+  versions for Python 2 and 3, and loading the appropriate version for the
+  running Python version.
+
 Documentation
 -------------

--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -20,7 +20,8 @@ recursive-include sphinx/ext/autosummary/templates *
 recursive-include tests *
 recursive-include utils *
 recursive-include custom_fixers *
-include sphinx/pycode/Grammar.txt
+include sphinx/pycode/Grammar-py2.txt
+include sphinx/pycode/Grammar-py3.txt

 recursive-include doc *
 prune doc/_build
--- a/sphinx/pycode/Grammar-py2.txt
+++ b/sphinx/pycode/Grammar-py2.txt
@ -1,49 +1,31 @@
-# Grammar for Python. This grammar supports Python 2.x and 3.x.
-
-# Note:  Changing the grammar specified in this file will most likely
-#        require corresponding changes in the parser module
-#        (../Modules/parsermodule.c).  If you can't make the changes to
-#        that module yourself, please co-ordinate the required changes
-#        with someone who can; ask around on python-dev for help.  Fred
-#        Drake <fdrake@acm.org> will probably be listening there.
-
-# NOTE WELL: You should also follow all the steps listed in PEP 306,
-# "How to Change Python's Grammar"
+# Grammar for Python 2.x

 # Start symbols for the grammar:
-#       file_input is a module or sequence of commands read from an input file;
 #       single_input is a single interactive statement;
+#       file_input is a module or sequence of commands read from an input file;
 #       eval_input is the input for the eval() and input() functions.
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
-file_input: (NEWLINE | stmt)* ENDMARKER
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
 eval_input: testlist NEWLINE* ENDMARKER

 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
 decorated: decorators (classdef | funcdef)
-funcdef: 'def' NAME parameters ['->' test] ':' suite
-parameters: '(' [typedargslist] ')'
-typedargslist: ((tfpdef ['=' test] ',')*
-                ('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname)
-                | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
-tname: NAME [':' test]
-tfpdef: tname | '(' tfplist ')'
-tfplist: tfpdef (',' tfpdef)* [',']
-varargslist: ((vfpdef ['=' test] ',')*
-              ('*' [vname] (',' vname ['=' test])*  [',' '**' vname] | '**' vname)
-              | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
-vname: NAME
-vfpdef: vname | '(' vfplist ')'
-vfplist: vfpdef (',' vfpdef)* [',']
+funcdef: 'def' NAME parameters ':' suite
+parameters: '(' [varargslist] ')'
+varargslist: ((fpdef ['=' test] ',')*
+              ('*' NAME [',' '**' NAME] | '**' NAME) |
+              fpdef ['=' test] (',' fpdef ['=' test])* [','])
+fpdef: NAME | '(' fplist ')'
+fplist: fpdef (',' fpdef)* [',']

 stmt: simple_stmt | compound_stmt
 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
 small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
             import_stmt | global_stmt | exec_stmt | assert_stmt)
-expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
-                     ('=' (yield_expr|testlist_star_expr))*)
-testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+expr_stmt: testlist (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist))*)
 augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
            '<<=' | '>>=' | '**=' | '//=')
 # For normal assignments, additional restrictions enforced by the interpreter
@ -56,7 +38,7 @@ break_stmt: 'break'
 continue_stmt: 'continue'
 return_stmt: 'return' [testlist]
 yield_stmt: yield_expr
-raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]]
+raise_stmt: 'raise' [test [',' test [',' test]]]
 import_stmt: import_name | import_from
 import_name: 'import' dotted_as_names
 import_from: ('from' ('.'* dotted_name | '.'+)
@ -66,7 +48,7 @@ dotted_as_name: dotted_name ['as' NAME]
 import_as_names: import_as_name (',' import_as_name)* [',']
 dotted_as_names: dotted_as_name (',' dotted_as_name)*
 dotted_name: NAME ('.' NAME)*
-global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
+global_stmt: 'global' NAME (',' NAME)*
 exec_stmt: 'exec' expr ['in' test [',' test]]
 assert_stmt: 'assert' test [',' test]

@ -82,7 +64,7 @@ try_stmt: ('try' ':' suite
 with_stmt: 'with' with_item (',' with_item)*  ':' suite
 with_item: test ['as' expr]
 # NB compile.c makes sure that the default except clause is last
-except_clause: 'except' [test [(',' | 'as') test]]
+except_clause: 'except' [test [('as' | ',') test]]
 suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT

 # Backward compatibility cruft to support:
@ -100,7 +82,6 @@ and_test: not_test ('and' not_test)*
 not_test: 'not' not_test | comparison
 comparison: expr (comp_op expr)*
 comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
-star_expr: '*' expr
 expr: xor_expr ('|' xor_expr)*
 xor_expr: and_expr ('^' and_expr)*
 and_expr: shift_expr ('&' shift_expr)*
@ -109,32 +90,39 @@ arith_expr: term (('+'|'-') term)*
 term: factor (('*'|'/'|'%'|'//') factor)*
 factor: ('+'|'-'|'~') factor | power
 power: atom trailer* ['**' factor]
-atom: ('(' [yield_expr|testlist_gexp] ')' |
+atom: ('(' [yield_expr|testlist_comp] ')' |
       '[' [listmaker] ']' |
-       '{' [dictsetmaker] '}' |
+       '{' [dictorsetmaker] '}' |
       '`' testlist1 '`' |
-       NAME | NUMBER | STRING+ | '.' '.' '.')
-listmaker: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
-testlist_gexp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+#       '>>' test |
+       NAME | NUMBER | STRING+)
+listmaker: test ( list_for | (',' test)* [','] )
+testlist_comp: test ( comp_for | (',' test)* [','] )
 lambdef: 'lambda' [varargslist] ':' test
 trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
 subscriptlist: subscript (',' subscript)* [',']
-subscript: test | [test] ':' [test] [sliceop]
+subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
 sliceop: ':' [test]
-exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+exprlist: expr (',' expr)* [',']
 testlist: test (',' test)* [',']
-dictsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
-                (test (comp_for | (',' test)* [','])) )
+dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
+                  (test (comp_for | (',' test)* [','])) )

-classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+classdef: 'class' NAME ['(' [testlist] ')'] ':' suite

 arglist: (argument ',')* (argument [',']
-                         |'*' test (',' argument)* [',' '**' test] 
+                         |'*' test (',' argument)* [',' '**' test]
                         |'**' test)
-argument: test [comp_for] | test '=' test  # Really [keyword '='] test
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+argument: test [comp_for] | test '=' test
+
+list_iter: list_for | list_if
+list_for: 'for' exprlist 'in' testlist_safe [list_iter]
+list_if: 'if' old_test [list_iter]

 comp_iter: comp_for | comp_if
-comp_for: 'for' exprlist 'in' testlist_safe [comp_iter]
+comp_for: 'for' exprlist 'in' or_test [comp_iter]
 comp_if: 'if' old_test [comp_iter]

 testlist1: test (',' test)*
--- a/sphinx/pycode/Grammar-py3.txt
+++ b/sphinx/pycode/Grammar-py3.txt
@ -0,0 +1,123 @@
+# Grammar for Python 3.x (with at least x <= 4)
+
+# Start symbols for the grammar:
+#       single_input is a single interactive statement;
+#       file_input is a module or sequence of commands read from an input file;
+#       eval_input is the input for the eval() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef)
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+parameters: '(' [typedargslist] ')'
+typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
+       ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
+     |  '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
+tfpdef: NAME [':' test]
+varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
+       ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
+     |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
+vfpdef: NAME
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
+expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist_star_expr))*)
+testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal assignments, additional restrictions enforced by the interpreter
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: 'global' NAME (',' NAME)*
+nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+            ['else' ':' suite]
+            ['finally' ':' suite] |
+           'finally' ':' suite))
+with_stmt: 'with' with_item (',' with_item)*  ':' suite
+with_item: test ['as' expr]
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test ['as' NAME]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+test: or_test ['if' or_test 'else' test] | lambdef
+test_nocond: or_test | lambdef_nocond
+lambdef: 'lambda' [varargslist] ':' test
+lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+# <> isn't actually a valid comparison operator in Python. It's here for the
+# sake of a __future__ import described in PEP 401
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+star_expr: '*' expr
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom trailer* ['**' factor]
+atom: ('(' [yield_expr|testlist_comp] ')' |
+       '[' [testlist_comp] ']' |
+       '{' [dictorsetmaker] '}' |
+       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
+testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+testlist: test (',' test)* [',']
+dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
+                  (test (comp_for | (',' test)* [','])) )
+
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+arglist: (argument ',')* (argument [',']
+                         |'*' test (',' argument)* [',' '**' test]
+                         |'**' test)
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+argument: test [comp_for] | test '=' test  # Really [keyword '='] test
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist 'in' or_test [comp_iter]
+comp_if: 'if' test_nocond [comp_iter]
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [yield_arg]
+yield_arg: 'from' test | testlist
--- a/sphinx/pycode/init.py
+++ b/sphinx/pycode/init.py
@ -9,6 +9,7 @@
    :license: BSD, see LICENSE for details.
 """

+import sys
 from os import path

 from sphinx import package_dir
@ -21,7 +22,8 @@ from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc


 # load the Python grammar
-_grammarfile = path.join(package_dir, 'pycode', 'Grammar.txt')
+_grammarfile = path.join(package_dir, 'pycode',
+                         'Grammar-py%d.txt' % sys.version_info[0])
 pygrammar = driver.load_grammar(_grammarfile)
 pydriver = driver.Driver(pygrammar, convert=nodes.convert)

--- a/sphinx/pycode/pgen2/driver.py
+++ b/sphinx/pycode/pgen2/driver.py
@ -122,7 +122,8 @@ def load_grammar(gt="Grammar.txt", gp=None,
        if tail == ".txt":
            tail = ""
        # embed Sphinx major version for the case we ever change the grammar...
-        gp = head + tail + ".".join(map(str, sphinx.version_info[:2])) + ".pickle"
+        gp = head + tail + "-sphinx" + \
+             ".".join(map(str, sphinx.version_info[:2])) + ".pickle"
    if force or not _newer(gp, gt):
        logger.info("Generating grammar tables from %s", gt)
        g = pgen.generate_grammar(gt)
--- a/sphinx/pycode/pgen2/grammar.py
+++ b/sphinx/pycode/pgen2/grammar.py
@ -162,6 +162,7 @@ opmap_raw = """
 // DOUBLESLASH
 //= DOUBLESLASHEQUAL
 -> RARROW
+... ELLIPSIS
 """

 opmap = {}
--- a/sphinx/pycode/pgen2/token.py
+++ b/sphinx/pycode/pgen2/token.py
@ -62,7 +62,8 @@ COMMENT = 52
 NL = 53
 RARROW = 54
 ERRORTOKEN = 55
-N_TOKENS = 56
+ELLIPSIS = 56
+N_TOKENS = 57
 NT_OFFSET = 256
 #--end constants--