From 42a8138c30ca2415c068f6dc19f9dbffddca0cac Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 12 Jan 2014 19:44:04 +0100 Subject: [PATCH] Closes #1152: Fix pycode parsing errors of Python 3 code by including two grammar versions for Python 2 and 3, and loading the appropriate version for the running Python version. --- CHANGES | 4 + MANIFEST.in | 3 +- .../pycode/{Grammar.txt => Grammar-py2.txt} | 82 +++++------- sphinx/pycode/Grammar-py3.txt | 123 ++++++++++++++++++ sphinx/pycode/__init__.py | 4 +- sphinx/pycode/pgen2/driver.py | 3 +- sphinx/pycode/pgen2/grammar.py | 1 + sphinx/pycode/pgen2/token.py | 3 +- 8 files changed, 172 insertions(+), 51 deletions(-) rename sphinx/pycode/{Grammar.txt => Grammar-py2.txt} (63%) create mode 100644 sphinx/pycode/Grammar-py3.txt diff --git a/CHANGES b/CHANGES index 480ee39fa..294e6580b 100644 --- a/CHANGES +++ b/CHANGES @@ -72,6 +72,10 @@ Bugs fixed * #1198: Allow "image" for the "figwidth" option of the :rst:dir:`figure` directive as documented by docutils. +* #1152: Fix pycode parsing errors of Python 3 code by including two grammar + versions for Python 2 and 3, and loading the appropriate version for the + running Python version. + Documentation ------------- diff --git a/MANIFEST.in b/MANIFEST.in index ddd0186d6..5db26b81e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -20,7 +20,8 @@ recursive-include sphinx/ext/autosummary/templates * recursive-include tests * recursive-include utils * recursive-include custom_fixers * -include sphinx/pycode/Grammar.txt +include sphinx/pycode/Grammar-py2.txt +include sphinx/pycode/Grammar-py3.txt recursive-include doc * prune doc/_build diff --git a/sphinx/pycode/Grammar.txt b/sphinx/pycode/Grammar-py2.txt similarity index 63% rename from sphinx/pycode/Grammar.txt rename to sphinx/pycode/Grammar-py2.txt index fcab0b697..54d6fa970 100644 --- a/sphinx/pycode/Grammar.txt +++ b/sphinx/pycode/Grammar-py2.txt @@ -1,49 +1,31 @@ -# Grammar for Python. This grammar supports Python 2.x and 3.x. - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed in PEP 306, -# "How to Change Python's Grammar" +# Grammar for Python 2.x # Start symbols for the grammar: -# file_input is a module or sequence of commands read from an input file; # single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; # eval_input is the input for the eval() and input() functions. # NB: compound_stmt in single_input is followed by extra NEWLINE! -file_input: (NEWLINE | stmt)* ENDMARKER single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: (NEWLINE | stmt)* ENDMARKER eval_input: testlist NEWLINE* ENDMARKER decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE decorators: decorator+ decorated: decorators (classdef | funcdef) -funcdef: 'def' NAME parameters ['->' test] ':' suite -parameters: '(' [typedargslist] ')' -typedargslist: ((tfpdef ['=' test] ',')* - ('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname) - | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) -tname: NAME [':' test] -tfpdef: tname | '(' tfplist ')' -tfplist: tfpdef (',' tfpdef)* [','] -varargslist: ((vfpdef ['=' test] ',')* - ('*' [vname] (',' vname ['=' test])* [',' '**' vname] | '**' vname) - | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) -vname: NAME -vfpdef: vname | '(' vfplist ')' -vfplist: vfpdef (',' vfpdef)* [','] +funcdef: 'def' NAME parameters ':' suite +parameters: '(' [varargslist] ')' +varargslist: ((fpdef ['=' test] ',')* + ('*' NAME [',' '**' NAME] | '**' NAME) | + fpdef ['=' test] (',' fpdef ['=' test])* [',']) +fpdef: NAME | '(' fplist ')' +fplist: fpdef (',' fpdef)* [','] stmt: simple_stmt | compound_stmt simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | exec_stmt | assert_stmt) -expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +expr_stmt: testlist (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist))*) augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//=') # For normal assignments, additional restrictions enforced by the interpreter @@ -56,7 +38,7 @@ break_stmt: 'break' continue_stmt: 'continue' return_stmt: 'return' [testlist] yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]] +raise_stmt: 'raise' [test [',' test [',' test]]] import_stmt: import_name | import_from import_name: 'import' dotted_as_names import_from: ('from' ('.'* dotted_name | '.'+) @@ -66,7 +48,7 @@ dotted_as_name: dotted_name ['as' NAME] import_as_names: import_as_name (',' import_as_name)* [','] dotted_as_names: dotted_as_name (',' dotted_as_name)* dotted_name: NAME ('.' NAME)* -global_stmt: ('global' | 'nonlocal') NAME (',' NAME)* +global_stmt: 'global' NAME (',' NAME)* exec_stmt: 'exec' expr ['in' test [',' test]] assert_stmt: 'assert' test [',' test] @@ -82,7 +64,7 @@ try_stmt: ('try' ':' suite with_stmt: 'with' with_item (',' with_item)* ':' suite with_item: test ['as' expr] # NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test [(',' | 'as') test]] +except_clause: 'except' [test [('as' | ',') test]] suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT # Backward compatibility cruft to support: @@ -100,7 +82,6 @@ and_test: not_test ('and' not_test)* not_test: 'not' not_test | comparison comparison: expr (comp_op expr)* comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr expr: xor_expr ('|' xor_expr)* xor_expr: and_expr ('^' and_expr)* and_expr: shift_expr ('&' shift_expr)* @@ -109,32 +90,39 @@ arith_expr: term (('+'|'-') term)* term: factor (('*'|'/'|'%'|'//') factor)* factor: ('+'|'-'|'~') factor | power power: atom trailer* ['**' factor] -atom: ('(' [yield_expr|testlist_gexp] ')' | +atom: ('(' [yield_expr|testlist_comp] ')' | '[' [listmaker] ']' | - '{' [dictsetmaker] '}' | + '{' [dictorsetmaker] '}' | '`' testlist1 '`' | - NAME | NUMBER | STRING+ | '.' '.' '.') -listmaker: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) -testlist_gexp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +# '>>' test | + NAME | NUMBER | STRING+) +listmaker: test ( list_for | (',' test)* [','] ) +testlist_comp: test ( comp_for | (',' test)* [','] ) lambdef: 'lambda' [varargslist] ':' test trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] +subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +exprlist: expr (',' expr)* [','] testlist: test (',' test)* [','] -dictsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | - (test (comp_for | (',' test)* [','])) ) +dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | + (test (comp_for | (',' test)* [','])) ) -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite +classdef: 'class' NAME ['(' [testlist] ')'] ':' suite arglist: (argument ',')* (argument [','] - |'*' test (',' argument)* [',' '**' test] + |'*' test (',' argument)* [',' '**' test] |'**' test) -argument: test [comp_for] | test '=' test # Really [keyword '='] test +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +argument: test [comp_for] | test '=' test + +list_iter: list_for | list_if +list_for: 'for' exprlist 'in' testlist_safe [list_iter] +list_if: 'if' old_test [list_iter] comp_iter: comp_for | comp_if -comp_for: 'for' exprlist 'in' testlist_safe [comp_iter] +comp_for: 'for' exprlist 'in' or_test [comp_iter] comp_if: 'if' old_test [comp_iter] testlist1: test (',' test)* diff --git a/sphinx/pycode/Grammar-py3.txt b/sphinx/pycode/Grammar-py3.txt new file mode 100644 index 000000000..92922accf --- /dev/null +++ b/sphinx/pycode/Grammar-py3.txt @@ -0,0 +1,123 @@ +# Grammar for Python 3.x (with at least x <= 4) + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: (NEWLINE | stmt)* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef) +funcdef: 'def' NAME parameters ['->' test] ':' suite +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' + ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' + ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') +testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | + (test (comp_for | (',' test)* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: (argument ',')* (argument [','] + |'*' test (',' argument)* [',' '**' test] + |'**' test) +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +argument: test [comp_for] | test '=' test # Really [keyword '='] test +comp_iter: comp_for | comp_if +comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist diff --git a/sphinx/pycode/__init__.py b/sphinx/pycode/__init__.py index 64999df88..2c0708f91 100644 --- a/sphinx/pycode/__init__.py +++ b/sphinx/pycode/__init__.py @@ -9,6 +9,7 @@ :license: BSD, see LICENSE for details. """ +import sys from os import path from sphinx import package_dir @@ -21,7 +22,8 @@ from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc # load the Python grammar -_grammarfile = path.join(package_dir, 'pycode', 'Grammar.txt') +_grammarfile = path.join(package_dir, 'pycode', + 'Grammar-py%d.txt' % sys.version_info[0]) pygrammar = driver.load_grammar(_grammarfile) pydriver = driver.Driver(pygrammar, convert=nodes.convert) diff --git a/sphinx/pycode/pgen2/driver.py b/sphinx/pycode/pgen2/driver.py index e9e907dcc..422671dbc 100644 --- a/sphinx/pycode/pgen2/driver.py +++ b/sphinx/pycode/pgen2/driver.py @@ -122,7 +122,8 @@ def load_grammar(gt="Grammar.txt", gp=None, if tail == ".txt": tail = "" # embed Sphinx major version for the case we ever change the grammar... - gp = head + tail + ".".join(map(str, sphinx.version_info[:2])) + ".pickle" + gp = head + tail + "-sphinx" + \ + ".".join(map(str, sphinx.version_info[:2])) + ".pickle" if force or not _newer(gp, gt): logger.info("Generating grammar tables from %s", gt) g = pgen.generate_grammar(gt) diff --git a/sphinx/pycode/pgen2/grammar.py b/sphinx/pycode/pgen2/grammar.py index 5a433578b..01d843461 100644 --- a/sphinx/pycode/pgen2/grammar.py +++ b/sphinx/pycode/pgen2/grammar.py @@ -162,6 +162,7 @@ opmap_raw = """ // DOUBLESLASH //= DOUBLESLASHEQUAL -> RARROW +... ELLIPSIS """ opmap = {} diff --git a/sphinx/pycode/pgen2/token.py b/sphinx/pycode/pgen2/token.py index 61468b313..56a40ce75 100755 --- a/sphinx/pycode/pgen2/token.py +++ b/sphinx/pycode/pgen2/token.py @@ -62,7 +62,8 @@ COMMENT = 52 NL = 53 RARROW = 54 ERRORTOKEN = 55 -N_TOKENS = 56 +ELLIPSIS = 56 +N_TOKENS = 57 NT_OFFSET = 256 #--end constants--