mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
@@ -19,13 +19,11 @@ include sphinx/locale/.tx/config
|
||||
recursive-include sphinx/templates *
|
||||
recursive-include sphinx/texinputs *
|
||||
recursive-include sphinx/themes *
|
||||
recursive-include sphinx/pycode/pgen2 *.c *.pyx
|
||||
recursive-include sphinx/locale *.js *.pot *.po *.mo
|
||||
recursive-include sphinx/search/non-minified-js *.js
|
||||
recursive-include sphinx/ext/autosummary/templates *
|
||||
recursive-include tests *
|
||||
recursive-include utils *
|
||||
include sphinx/pycode/Grammar-py*
|
||||
|
||||
recursive-include doc *
|
||||
prune doc/_build
|
||||
|
||||
@@ -256,7 +256,7 @@ class LiteralIncludeReader(object):
|
||||
else:
|
||||
start = tags[pyobject][1]
|
||||
end = tags[pyobject][2]
|
||||
lines = lines[start - 1:end - 1]
|
||||
lines = lines[start - 1:end]
|
||||
if 'lineno-match' in self.options:
|
||||
self.lineno_start = start
|
||||
|
||||
|
||||
@@ -1,135 +0,0 @@
|
||||
# Grammar for Python 2.x
|
||||
|
||||
# IMPORTANT: when copying over a new Grammar file, make sure file_input
|
||||
# is the first nonterminal in the file!
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() and input() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef)
|
||||
funcdef: 'def' NAME parameters ':' suite
|
||||
parameters: '(' [varargslist] ')'
|
||||
varargslist: ((fpdef ['=' test] ',')*
|
||||
('*' NAME [',' '**' NAME] | '**' NAME) |
|
||||
fpdef ['=' test] (',' fpdef ['=' test])* [','])
|
||||
fpdef: NAME | '(' fplist ')'
|
||||
fplist: fpdef (',' fpdef)* [',']
|
||||
|
||||
stmt: simple_stmt | compound_stmt
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | exec_stmt | assert_stmt)
|
||||
expr_stmt: testlist (augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist))*)
|
||||
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal assignments, additional restrictions enforced by the interpreter
|
||||
print_stmt: 'print' ( [ test (',' test)* [','] ] |
|
||||
'>>' test [ (',' test)+ [','] ] )
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test [',' test [',' test]]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
import_from: ('from' ('.'* dotted_name | '.'+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
exec_stmt: 'exec' expr ['in' test [',' test]]
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test [('as' | ',') test]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
# Backward compatibility cruft to support:
|
||||
# [ x for x in lambda: True, lambda: False if x() ]
|
||||
# even while also allowing:
|
||||
# lambda x: 5 if x else 2
|
||||
# (But not a mix of the two)
|
||||
testlist_safe: old_test [(',' old_test)+ [',']]
|
||||
old_test: or_test | old_lambdef
|
||||
old_lambdef: 'lambda' [varargslist] ':' old_test
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [listmaker] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
'`' testlist1 '`' |
|
||||
NAME | NUMBER | STRING+)
|
||||
listmaker: test ( list_for | (',' test)* [','] )
|
||||
testlist_comp: test ( comp_for | (',' test)* [','] )
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: expr (',' expr)* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
|
||||
(test (comp_for | (',' test)* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
|
||||
|
||||
arglist: (argument ',')* (argument [',']
|
||||
|'*' test (',' argument)* [',' '**' test]
|
||||
|'**' test)
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
argument: test [comp_for] | test '=' test
|
||||
|
||||
list_iter: list_for | list_if
|
||||
list_for: 'for' exprlist 'in' testlist_safe [list_iter]
|
||||
list_if: 'if' old_test [list_iter]
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_if: 'if' old_test [comp_iter]
|
||||
|
||||
testlist1: test (',' test)*
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [testlist]
|
||||
@@ -1,143 +0,0 @@
|
||||
# Grammar for Python 3.x (with at least x <= 5)
|
||||
|
||||
|
||||
# IMPORTANT: when copying over a new Grammar file, make sure file_input
|
||||
# is the first nonterminal in the file!
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
|
||||
async_funcdef: ASYNC funcdef
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
|
||||
['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
|
||||
['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||
async_stmt: ASYNC (funcdef | with_stmt | for_stmt)
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: [AWAIT] atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
((test | star_expr)
|
||||
(comp_for | (',' (test | star_expr))* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: argument (',' argument)* [',']
|
||||
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||
# we explicitly match '*' here, too, to give it proper precedence.
|
||||
# Illegal combinations and orderings are blocked in ast.c:
|
||||
# multiple (test comp_for) arguements are blocked; keyword unpackings
|
||||
# that precede iterable unpackings are blocked; etc.
|
||||
argument: ( test [comp_for] |
|
||||
test '=' test |
|
||||
'**' test |
|
||||
'*' test )
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
||||
@@ -10,174 +10,15 @@
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
||||
import re
|
||||
import sys
|
||||
from os import path
|
||||
from six import iteritems, BytesIO, StringIO
|
||||
|
||||
from six import iteritems, text_type, BytesIO, StringIO
|
||||
|
||||
from sphinx import package_dir
|
||||
from sphinx.errors import PycodeError
|
||||
from sphinx.pycode import nodes
|
||||
from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
|
||||
from sphinx.pycode.parser import Parser
|
||||
from sphinx.util import get_module_source, detect_encoding
|
||||
from sphinx.util.pycompat import TextIOWrapper
|
||||
from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
|
||||
|
||||
if False:
|
||||
# For type annotation
|
||||
from typing import Any, Dict, List, Tuple # NOQA
|
||||
|
||||
|
||||
# load the Python grammar
|
||||
_grammarfile = path.join(package_dir, 'pycode',
|
||||
'Grammar-py%d.txt' % sys.version_info[0])
|
||||
pygrammar = driver.load_grammar(_grammarfile)
|
||||
pydriver = driver.Driver(pygrammar, convert=nodes.convert)
|
||||
|
||||
|
||||
# an object with attributes corresponding to token and symbol names
|
||||
class sym(object):
|
||||
pass
|
||||
|
||||
|
||||
for k, v in iteritems(pygrammar.symbol2number):
|
||||
setattr(sym, k, v)
|
||||
for k, v in iteritems(token.tok_name):
|
||||
setattr(sym, v, k)
|
||||
|
||||
# a dict mapping terminal and nonterminal numbers to their names
|
||||
number2name = pygrammar.number2symbol.copy()
|
||||
number2name.update(token.tok_name)
|
||||
|
||||
_eq = nodes.Leaf(token.EQUAL, '=')
|
||||
|
||||
emptyline_re = re.compile(r'^\s*(#.*)?$')
|
||||
|
||||
|
||||
class AttrDocVisitor(nodes.NodeVisitor):
|
||||
"""
|
||||
Visitor that collects docstrings for attribute assignments on toplevel and
|
||||
in classes (class attributes and attributes set in __init__).
|
||||
|
||||
The docstrings can either be in special '#:' comments before the assignment
|
||||
or in a docstring after it.
|
||||
"""
|
||||
def init(self, scope, encoding):
|
||||
self.scope = scope
|
||||
self.in_init = 0
|
||||
self.encoding = encoding
|
||||
self.namespace = [] # type: List[unicode]
|
||||
self.collected = {} # type: Dict[Tuple[unicode, unicode], unicode]
|
||||
self.tagnumber = 0
|
||||
self.tagorder = {} # type: Dict[unicode, int]
|
||||
|
||||
def add_tag(self, name):
|
||||
name = '.'.join(self.namespace + [name])
|
||||
self.tagorder[name] = self.tagnumber
|
||||
self.tagnumber += 1
|
||||
|
||||
def visit_classdef(self, node):
|
||||
"""Visit a class."""
|
||||
self.add_tag(node[1].value)
|
||||
self.namespace.append(node[1].value)
|
||||
self.generic_visit(node)
|
||||
self.namespace.pop()
|
||||
|
||||
def visit_funcdef(self, node):
|
||||
"""Visit a function (or method)."""
|
||||
# usually, don't descend into functions -- nothing interesting there
|
||||
self.add_tag(node[1].value)
|
||||
if node[1].value == '__init__':
|
||||
# however, collect attributes set in __init__ methods
|
||||
self.in_init += 1
|
||||
self.generic_visit(node)
|
||||
self.in_init -= 1
|
||||
|
||||
def visit_expr_stmt(self, node):
|
||||
"""Visit an assignment which may have a special comment before (or
|
||||
after) it.
|
||||
"""
|
||||
if _eq not in node.children:
|
||||
# not an assignment (we don't care for augmented assignments)
|
||||
return
|
||||
# look *after* the node; there may be a comment prefixing the NEWLINE
|
||||
# of the simple_stmt
|
||||
parent = node.parent
|
||||
idx = parent.children.index(node) + 1
|
||||
while idx < len(parent):
|
||||
if parent[idx].type == sym.SEMI: # type: ignore
|
||||
idx += 1
|
||||
continue # skip over semicolon
|
||||
if parent[idx].type == sym.NEWLINE: # type: ignore
|
||||
prefix = parent[idx].get_prefix()
|
||||
if not isinstance(prefix, text_type):
|
||||
prefix = prefix.decode(self.encoding)
|
||||
docstring = prepare_commentdoc(prefix)
|
||||
if docstring:
|
||||
self.add_docstring(node, docstring)
|
||||
return # don't allow docstrings both before and after
|
||||
break
|
||||
# now look *before* the node
|
||||
pnode = node[0]
|
||||
prefix = pnode.get_prefix()
|
||||
# if the assignment is the first statement on a new indentation
|
||||
# level, its preceding whitespace and comments are not assigned
|
||||
# to that token, but the first INDENT or DEDENT token
|
||||
while not prefix:
|
||||
pnode = pnode.get_prev_leaf()
|
||||
if not pnode or pnode.type not in (token.INDENT, token.DEDENT):
|
||||
break
|
||||
prefix = pnode.get_prefix()
|
||||
if not isinstance(prefix, text_type):
|
||||
prefix = prefix.decode(self.encoding)
|
||||
docstring = prepare_commentdoc(prefix)
|
||||
self.add_docstring(node, docstring)
|
||||
|
||||
def visit_simple_stmt(self, node):
|
||||
"""Visit a docstring statement which may have an assignment before."""
|
||||
if node[0].type != token.STRING:
|
||||
# not a docstring; but still need to visit children
|
||||
return self.generic_visit(node)
|
||||
prev = node.get_prev_sibling()
|
||||
if not prev:
|
||||
return
|
||||
if (prev.type == sym.simple_stmt and # type: ignore
|
||||
prev[0].type == sym.expr_stmt and _eq in prev[0].children): # type: ignore
|
||||
# need to "eval" the string because it's returned in its
|
||||
# original form
|
||||
docstring = literals.evalString(node[0].value, self.encoding)
|
||||
docstring = prepare_docstring(docstring)
|
||||
self.add_docstring(prev[0], docstring)
|
||||
|
||||
def add_docstring(self, node, docstring):
|
||||
# add an item for each assignment target
|
||||
for i in range(0, len(node) - 1, 2):
|
||||
target = node[i]
|
||||
if self.in_init and self.number2name[target.type] == 'power':
|
||||
# maybe an attribute assignment -- check necessary conditions
|
||||
if ( # node must have two children
|
||||
len(target) != 2 or
|
||||
# first child must be "self"
|
||||
target[0].type != token.NAME or target[0].value != 'self' or
|
||||
# second child must be a "trailer" with two children
|
||||
self.number2name[target[1].type] != 'trailer' or
|
||||
len(target[1]) != 2 or
|
||||
# first child must be a dot, second child a name
|
||||
target[1][0].type != token.DOT or
|
||||
target[1][1].type != token.NAME):
|
||||
continue
|
||||
name = target[1][1].value
|
||||
elif target.type != token.NAME:
|
||||
# don't care about other complex targets
|
||||
continue
|
||||
else:
|
||||
name = target.value
|
||||
self.add_tag(name)
|
||||
if docstring:
|
||||
namespace = '.'.join(self.namespace)
|
||||
if namespace.startswith(self.scope):
|
||||
self.collected[namespace, name] = docstring
|
||||
from typing import Any, Dict, IO, List, Tuple # NOQA
|
||||
|
||||
|
||||
class ModuleAnalyzer(object):
|
||||
@@ -223,137 +64,59 @@ class ModuleAnalyzer(object):
|
||||
return obj
|
||||
|
||||
def __init__(self, source, modname, srcname, decoded=False):
|
||||
# name of the module
|
||||
self.modname = modname
|
||||
# name of the source file
|
||||
self.srcname = srcname
|
||||
# file-like object yielding source lines
|
||||
self.source = source
|
||||
# type: (IO, unicode, unicode, bool) -> None
|
||||
self.modname = modname # name of the module
|
||||
self.srcname = srcname # name of the source file
|
||||
|
||||
# cache the source code as well
|
||||
pos = self.source.tell()
|
||||
pos = source.tell()
|
||||
if not decoded:
|
||||
self.encoding = detect_encoding(self.source.readline)
|
||||
self.source.seek(pos)
|
||||
self.code = self.source.read().decode(self.encoding)
|
||||
self.source.seek(pos)
|
||||
self.source = TextIOWrapper(self.source, self.encoding)
|
||||
self.encoding = detect_encoding(source.readline)
|
||||
source.seek(pos)
|
||||
self.code = source.read().decode(self.encoding)
|
||||
else:
|
||||
self.encoding = None
|
||||
self.code = self.source.read()
|
||||
self.source.seek(pos)
|
||||
self.code = source.read()
|
||||
|
||||
# will be filled by tokenize()
|
||||
self.tokens = None # type: List[unicode]
|
||||
# will be filled by parse()
|
||||
self.parsetree = None # type: Any
|
||||
# will be filled by find_attr_docs()
|
||||
self.attr_docs = None # type: List[unicode]
|
||||
self.attr_docs = None # type: Dict[Tuple[unicode, unicode], List[unicode]]
|
||||
self.tagorder = None # type: Dict[unicode, int]
|
||||
# will be filled by find_tags()
|
||||
self.tags = None # type: List[unicode]
|
||||
|
||||
def tokenize(self):
|
||||
"""Generate tokens from the source."""
|
||||
if self.tokens is not None:
|
||||
return
|
||||
try:
|
||||
self.tokens = list(tokenize.generate_tokens(self.source.readline))
|
||||
except tokenize.TokenError as err:
|
||||
raise PycodeError('tokenizing failed', err)
|
||||
self.source.close()
|
||||
self.tags = None # type: Dict[unicode, Tuple[unicode, int, int]]
|
||||
|
||||
def parse(self):
|
||||
"""Parse the generated source tokens."""
|
||||
if self.parsetree is not None:
|
||||
return
|
||||
self.tokenize()
|
||||
# type: () -> None
|
||||
"""Parse the source code."""
|
||||
try:
|
||||
self.parsetree = pydriver.parse_tokens(self.tokens)
|
||||
except parse.ParseError as err:
|
||||
raise PycodeError('parsing failed', err)
|
||||
parser = Parser(self.code, self.encoding)
|
||||
parser.parse()
|
||||
|
||||
def find_attr_docs(self, scope=''):
|
||||
self.attr_docs = {}
|
||||
for (scope, comment) in iteritems(parser.comments):
|
||||
if comment:
|
||||
self.attr_docs[scope] = comment.splitlines() + ['']
|
||||
else:
|
||||
self.attr_docs[scope] = ['']
|
||||
|
||||
self.tags = parser.definitions
|
||||
self.tagorder = parser.deforders
|
||||
except Exception as exc:
|
||||
raise PycodeError('parsing failed: %r' % exc)
|
||||
|
||||
def find_attr_docs(self):
|
||||
# type: () -> Dict[Tuple[unicode, unicode], List[unicode]]
|
||||
"""Find class and module-level attributes and their documentation."""
|
||||
if self.attr_docs is not None:
|
||||
return self.attr_docs
|
||||
self.parse()
|
||||
attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)
|
||||
attr_visitor.visit(self.parsetree)
|
||||
self.attr_docs = attr_visitor.collected
|
||||
self.tagorder = attr_visitor.tagorder
|
||||
# now that we found everything we could in the tree, throw it away
|
||||
# (it takes quite a bit of memory for large modules)
|
||||
self.parsetree = None
|
||||
return attr_visitor.collected
|
||||
if self.attr_docs is None:
|
||||
self.parse()
|
||||
|
||||
return self.attr_docs
|
||||
|
||||
def find_tags(self):
|
||||
# type: () -> Dict[unicode, Tuple[unicode, int, int]]
|
||||
"""Find class, function and method definitions and their location."""
|
||||
if self.tags is not None:
|
||||
return self.tags
|
||||
self.tokenize()
|
||||
result = {}
|
||||
namespace = [] # type: List[unicode]
|
||||
stack = [] # type: List[Tuple[unicode, unicode, unicode, int]]
|
||||
indent = 0
|
||||
decopos = None
|
||||
defline = False
|
||||
expect_indent = False
|
||||
emptylines = 0
|
||||
if self.tags is None:
|
||||
self.parse()
|
||||
|
||||
def tokeniter(ignore = (token.COMMENT,)):
|
||||
for tokentup in self.tokens:
|
||||
if tokentup[0] not in ignore:
|
||||
yield tokentup
|
||||
tokeniter = tokeniter()
|
||||
for type, tok, spos, epos, line in tokeniter: # type: ignore
|
||||
if expect_indent and type != token.NL:
|
||||
if type != token.INDENT:
|
||||
# no suite -- one-line definition
|
||||
assert stack
|
||||
dtype, fullname, startline, _ = stack.pop()
|
||||
endline = epos[0]
|
||||
namespace.pop()
|
||||
result[fullname] = (dtype, startline, endline - emptylines)
|
||||
expect_indent = False
|
||||
if tok in ('def', 'class'):
|
||||
name = next(tokeniter)[1] # type: ignore
|
||||
namespace.append(name)
|
||||
fullname = '.'.join(namespace)
|
||||
stack.append((tok, fullname, decopos or spos[0], indent))
|
||||
defline = True
|
||||
decopos = None
|
||||
elif type == token.OP and tok == '@':
|
||||
if decopos is None:
|
||||
decopos = spos[0]
|
||||
elif type == token.INDENT:
|
||||
expect_indent = False
|
||||
indent += 1
|
||||
elif type == token.DEDENT:
|
||||
indent -= 1
|
||||
# if the stacklevel is the same as it was before the last
|
||||
# def/class block, this dedent closes that block
|
||||
if stack and indent == stack[-1][3]:
|
||||
dtype, fullname, startline, _ = stack.pop()
|
||||
endline = spos[0]
|
||||
namespace.pop()
|
||||
result[fullname] = (dtype, startline, endline - emptylines)
|
||||
elif type == token.NEWLINE:
|
||||
# if this line contained a definition, expect an INDENT
|
||||
# to start the suite; if there is no such INDENT
|
||||
# it's a one-line definition
|
||||
if defline:
|
||||
defline = False
|
||||
expect_indent = True
|
||||
emptylines = 0
|
||||
elif type == token.NL:
|
||||
# count up if line is empty or comment only
|
||||
if emptyline_re.match(line):
|
||||
emptylines += 1
|
||||
else:
|
||||
emptylines = 0
|
||||
self.tags = result
|
||||
return result
|
||||
return self.tags
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -1,212 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
sphinx.pycode.nodes
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Parse tree node implementations.
|
||||
|
||||
:copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
|
||||
if False:
|
||||
# For type annotation
|
||||
from typing import Callable # NOQA
|
||||
|
||||
|
||||
class BaseNode(object):
|
||||
"""
|
||||
Node superclass for both terminal and nonterminal nodes.
|
||||
"""
|
||||
parent = None # type: BaseNode
|
||||
|
||||
def _eq(self, other):
|
||||
raise NotImplementedError
|
||||
|
||||
def __eq__(self, other):
|
||||
if self.__class__ is not other.__class__:
|
||||
return NotImplemented
|
||||
return self._eq(other)
|
||||
|
||||
def __ne__(self, other):
|
||||
if self.__class__ is not other.__class__:
|
||||
return NotImplemented
|
||||
return not self._eq(other)
|
||||
|
||||
__hash__ = None # type: Callable[[object], int]
|
||||
|
||||
def get_prev_sibling(self):
|
||||
"""Return previous child in parent's children, or None."""
|
||||
if self.parent is None:
|
||||
return None
|
||||
for i, child in enumerate(self.parent.children):
|
||||
if child is self:
|
||||
if i == 0:
|
||||
return None
|
||||
return self.parent.children[i - 1]
|
||||
|
||||
def get_next_sibling(self):
|
||||
"""Return next child in parent's children, or None."""
|
||||
if self.parent is None:
|
||||
return None
|
||||
for i, child in enumerate(self.parent.children):
|
||||
if child is self:
|
||||
try:
|
||||
return self.parent.children[i + 1]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
def get_prev_leaf(self):
|
||||
"""Return the leaf node that precedes this node in the parse tree."""
|
||||
def last_child(node):
|
||||
if isinstance(node, Leaf):
|
||||
return node
|
||||
elif not node.children:
|
||||
return None
|
||||
else:
|
||||
return last_child(node.children[-1])
|
||||
if self.parent is None:
|
||||
return None
|
||||
prev = self.get_prev_sibling()
|
||||
if isinstance(prev, Leaf):
|
||||
return prev
|
||||
elif prev is not None:
|
||||
return last_child(prev)
|
||||
return self.parent.get_prev_leaf()
|
||||
|
||||
def get_next_leaf(self):
|
||||
"""Return self if leaf, otherwise the leaf node that succeeds this
|
||||
node in the parse tree.
|
||||
"""
|
||||
node = self
|
||||
while not isinstance(node, Leaf):
|
||||
assert node.children
|
||||
node = node.children[0]
|
||||
return node
|
||||
|
||||
def get_lineno(self):
|
||||
"""Return the line number which generated the invocant node."""
|
||||
return self.get_next_leaf().lineno
|
||||
|
||||
def get_prefix(self):
|
||||
"""Return the prefix of the next leaf node."""
|
||||
# only leaves carry a prefix
|
||||
return self.get_next_leaf().prefix
|
||||
|
||||
|
||||
class Node(BaseNode):
|
||||
"""
|
||||
Node implementation for nonterminals.
|
||||
"""
|
||||
|
||||
def __init__(self, type, children, context=None):
|
||||
# type of nonterminals is >= 256
|
||||
# assert type >= 256, type
|
||||
self.type = type
|
||||
self.children = list(children)
|
||||
for ch in self.children:
|
||||
# assert ch.parent is None, repr(ch)
|
||||
ch.parent = self
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s, %r)' % (self.__class__.__name__,
|
||||
self.type, self.children)
|
||||
|
||||
def __str__(self):
|
||||
"""This reproduces the input source exactly."""
|
||||
return ''.join(map(str, self.children))
|
||||
|
||||
def _eq(self, other):
|
||||
return (self.type, self.children) == (other.type, other.children)
|
||||
|
||||
# support indexing the node directly instead of .children
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self.children[index]
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.children)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.children)
|
||||
|
||||
|
||||
class Leaf(BaseNode):
|
||||
"""
|
||||
Node implementation for leaf nodes (terminals).
|
||||
"""
|
||||
prefix = '' # Whitespace and comments preceding this token in the input
|
||||
lineno = 0 # Line where this token starts in the input
|
||||
column = 0 # Column where this token tarts in the input
|
||||
|
||||
def __init__(self, type, value, context=None):
|
||||
# type of terminals is below 256
|
||||
# assert 0 <= type < 256, type
|
||||
self.type = type
|
||||
self.value = value
|
||||
if context is not None:
|
||||
self.prefix, (self.lineno, self.column) = context
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r, %r, %r)' % (self.__class__.__name__,
|
||||
self.type, self.value, self.prefix)
|
||||
|
||||
def __str__(self):
|
||||
"""This reproduces the input source exactly."""
|
||||
return self.prefix + str(self.value)
|
||||
|
||||
def _eq(self, other):
|
||||
"""Compares two nodes for equality."""
|
||||
return (self.type, self.value) == (other.type, other.value)
|
||||
|
||||
|
||||
def convert(grammar, raw_node):
|
||||
"""Convert raw node to a Node or Leaf instance."""
|
||||
type, value, context, children = raw_node
|
||||
if children or type in grammar.number2symbol:
|
||||
# If there's exactly one child, return that child instead of
|
||||
# creating a new node.
|
||||
if len(children) == 1:
|
||||
return children[0]
|
||||
return Node(type, children, context=context)
|
||||
else:
|
||||
return Leaf(type, value, context=context)
|
||||
|
||||
|
||||
def nice_repr(node, number2name, prefix=False):
|
||||
def _repr(node):
|
||||
if isinstance(node, Leaf):
|
||||
return "%s(%r)" % (number2name[node.type], node.value)
|
||||
else:
|
||||
return "%s(%s)" % (number2name[node.type],
|
||||
', '.join(map(_repr, node.children)))
|
||||
|
||||
def _prepr(node):
|
||||
if isinstance(node, Leaf):
|
||||
return "%s(%r, %r)" % (number2name[node.type],
|
||||
node.prefix, node.value)
|
||||
else:
|
||||
return "%s(%s)" % (number2name[node.type],
|
||||
', '.join(map(_prepr, node.children)))
|
||||
return (prefix and _prepr or _repr)(node)
|
||||
|
||||
|
||||
class NodeVisitor(object):
|
||||
def __init__(self, number2name, *args):
|
||||
self.number2name = number2name
|
||||
self.init(*args)
|
||||
|
||||
def init(self, *args):
|
||||
pass
|
||||
|
||||
def visit(self, node):
|
||||
"""Visit a node."""
|
||||
method = 'visit_' + self.number2name[node.type]
|
||||
visitor = getattr(self, method, self.generic_visit)
|
||||
return visitor(node)
|
||||
|
||||
def generic_visit(self, node):
|
||||
"""Called if no explicit visitor function exists for a node."""
|
||||
if isinstance(node, Node):
|
||||
for child in node: # type: ignore
|
||||
self.visit(child)
|
||||
463
sphinx/pycode/parser.py
Normal file
463
sphinx/pycode/parser.py
Normal file
@@ -0,0 +1,463 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
sphinx.pycode.parser
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Utilities parsing and analyzing Python code.
|
||||
|
||||
:copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
import re
|
||||
import ast
|
||||
import inspect
|
||||
import tokenize
|
||||
import itertools
|
||||
from token import NAME, NEWLINE, INDENT, DEDENT, NUMBER, OP, STRING
|
||||
from tokenize import COMMENT, NL
|
||||
|
||||
from six import PY2, text_type
|
||||
|
||||
if False:
|
||||
# For type annotation
|
||||
from typing import Any, Dict, IO, List, Tuple # NOQA
|
||||
|
||||
comment_re = re.compile(u'^\\s*#: ?(.*)\r?\n?$')
|
||||
indent_re = re.compile(u'^\\s*$')
|
||||
emptyline_re = re.compile(u'^\\s*(#.*)?$')
|
||||
|
||||
|
||||
def get_lvar_names(node, self=None):
|
||||
# type: (ast.AST, ast.expr) -> List[unicode]
|
||||
"""Convert assignment-AST to variable names.
|
||||
|
||||
This raises `TypeError` if the assignment does not create new variable::
|
||||
|
||||
ary[0] = 'foo'
|
||||
dic["bar"] = 'baz'
|
||||
# => TypeError
|
||||
"""
|
||||
if self:
|
||||
if PY2:
|
||||
self_id = self.id # type: ignore
|
||||
else:
|
||||
self_id = self.arg
|
||||
|
||||
node_name = node.__class__.__name__
|
||||
if node_name in ('Index', 'Num', 'Slice', 'Str', 'Subscript'):
|
||||
raise TypeError('%r does not create new variable' % node)
|
||||
elif node_name == 'Name':
|
||||
if self is None or node.id == self_id: # type: ignore
|
||||
return [node.id] # type: ignore
|
||||
else:
|
||||
raise TypeError('The assignment %r is not instance variable' % node)
|
||||
elif node_name == 'Tuple':
|
||||
members = [get_lvar_names(elt) for elt in node.elts] # type: ignore
|
||||
return sum(members, [])
|
||||
elif node_name == 'Attribute':
|
||||
if node.value.__class__.__name__ == 'Name' and self and node.value.id == self_id: # type: ignore # NOQA
|
||||
# instance variable
|
||||
return ["%s" % get_lvar_names(node.attr, self)[0]] # type: ignore
|
||||
else:
|
||||
raise TypeError('The assignment %r is not instance variable' % node)
|
||||
elif node_name == 'str':
|
||||
return [node] # type: ignore
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def dedent_docstring(s):
|
||||
# type: (unicode) -> unicode
|
||||
"""Remove common leading indentation from docstring."""
|
||||
def dummy():
|
||||
# dummy function to mock `inspect.getdoc`.
|
||||
pass
|
||||
|
||||
dummy.__doc__ = s # type: ignore
|
||||
docstring = inspect.getdoc(dummy)
|
||||
return docstring.lstrip("\r\n").rstrip("\r\n")
|
||||
|
||||
|
||||
class Token(object):
|
||||
"""Better token wrapper for tokenize module."""
|
||||
|
||||
def __init__(self, kind, value, start, end, source):
|
||||
# type: (int, Any, Tuple[int, int], Tuple[int, int], unicode) -> None # NOQA
|
||||
self.kind = kind
|
||||
self.value = value
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.source = source
|
||||
|
||||
def __eq__(self, other):
|
||||
# type: (Any) -> bool
|
||||
if isinstance(other, int):
|
||||
return self.kind == other
|
||||
elif isinstance(other, str):
|
||||
return self.value == other
|
||||
elif isinstance(other, (list, tuple)):
|
||||
return [self.kind, self.value] == list(other)
|
||||
elif other is None:
|
||||
return False
|
||||
else:
|
||||
raise ValueError('Unknown value: %r' % other)
|
||||
|
||||
def __ne__(self, other):
|
||||
# type: (Any) -> bool
|
||||
return not (self == other)
|
||||
|
||||
def match(self, *conditions):
|
||||
# type: (Any) -> bool
|
||||
return any(self == candidate for candidate in conditions)
|
||||
|
||||
def __repr__(self):
|
||||
# type: () -> str
|
||||
return '<Token kind=%r value=%r>' % (tokenize.tok_name[self.kind],
|
||||
self.value.strip())
|
||||
|
||||
|
||||
class TokenProcessor(object):
|
||||
def __init__(self, buffers):
|
||||
# type: (List[unicode]) -> None
|
||||
lines = iter(buffers)
|
||||
self.buffers = buffers
|
||||
self.tokens = tokenize.generate_tokens(lambda: next(lines)) # type: ignore # NOQA
|
||||
self.current = None # type: Token
|
||||
self.previous = None # type: Token
|
||||
|
||||
def get_line(self, lineno):
|
||||
# type: (int) -> unicode
|
||||
"""Returns specified line."""
|
||||
return self.buffers[lineno - 1]
|
||||
|
||||
def fetch_token(self):
|
||||
# type: () -> Token
|
||||
"""Fetch a next token from source code.
|
||||
|
||||
Returns ``False`` if sequence finished.
|
||||
"""
|
||||
try:
|
||||
self.previous = self.current
|
||||
self.current = Token(*next(self.tokens))
|
||||
except StopIteration:
|
||||
self.current = None
|
||||
|
||||
return self.current
|
||||
|
||||
def fetch_until(self, condition):
|
||||
# type: (Any) -> List[Token]
|
||||
"""Fetch tokens until specified token appeared.
|
||||
|
||||
.. note:: This also handles parenthesis well.
|
||||
"""
|
||||
tokens = []
|
||||
while self.fetch_token():
|
||||
tokens.append(self.current)
|
||||
if self.current == condition:
|
||||
break
|
||||
elif self.current == [OP, '(']:
|
||||
tokens += self.fetch_until([OP, ')'])
|
||||
elif self.current == [OP, '{']:
|
||||
tokens += self.fetch_until([OP, '}'])
|
||||
elif self.current == [OP, '[']:
|
||||
tokens += self.fetch_until([OP, ']'])
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
class AfterCommentParser(TokenProcessor):
|
||||
"""Python source code parser to pick up comment after assignment.
|
||||
|
||||
This parser takes a python code starts with assignment statement,
|
||||
and returns the comments for variable if exists.
|
||||
"""
|
||||
|
||||
def __init__(self, lines):
|
||||
# type: (List[unicode]) -> None
|
||||
super(AfterCommentParser, self).__init__(lines)
|
||||
self.comment = None # type: unicode
|
||||
|
||||
def fetch_rvalue(self):
|
||||
# type: () -> List[Token]
|
||||
"""Fetch right-hand value of assignment."""
|
||||
tokens = []
|
||||
while self.fetch_token():
|
||||
tokens.append(self.current)
|
||||
if self.current == [OP, '(']:
|
||||
tokens += self.fetch_until([OP, ')'])
|
||||
elif self.current == [OP, '{']:
|
||||
tokens += self.fetch_until([OP, '}'])
|
||||
elif self.current == [OP, '[']:
|
||||
tokens += self.fetch_until([OP, ']'])
|
||||
elif self.current == INDENT:
|
||||
tokens += self.fetch_until(DEDENT)
|
||||
elif self.current == [OP, ';']:
|
||||
break
|
||||
elif self.current.kind not in (OP, NAME, NUMBER, STRING):
|
||||
break
|
||||
|
||||
return tokens
|
||||
|
||||
def parse(self):
|
||||
# type: () -> None
|
||||
"""Parse the code and obtain comment after assignment."""
|
||||
# skip lvalue (until '=' operator)
|
||||
while self.fetch_token() != [OP, '=']:
|
||||
assert self.current
|
||||
|
||||
# skip rvalue
|
||||
self.fetch_rvalue()
|
||||
|
||||
if self.current == COMMENT:
|
||||
self.comment = self.current.value
|
||||
|
||||
|
||||
class VariableCommentPicker(ast.NodeVisitor):
|
||||
"""Python source code parser to pick up variable comments."""
|
||||
|
||||
def __init__(self, buffers, encoding):
|
||||
# type: (List[unicode], unicode) -> None
|
||||
self.counter = itertools.count()
|
||||
self.buffers = buffers
|
||||
self.encoding = encoding
|
||||
self.context = [] # type: List[unicode]
|
||||
self.current_classes = [] # type: List[unicode]
|
||||
self.current_function = None # type: ast.FunctionDef
|
||||
self.comments = {} # type: Dict[Tuple[unicode, unicode], unicode]
|
||||
self.previous = None # type: ast.AST
|
||||
self.deforders = {} # type: Dict[unicode, int]
|
||||
super(VariableCommentPicker, self).__init__()
|
||||
|
||||
def add_entry(self, name):
|
||||
# type: (unicode) -> None
|
||||
if self.current_function:
|
||||
if self.current_classes and self.context[-1] == "__init__":
|
||||
# store variable comments inside __init__ method of classes
|
||||
definition = self.context[:-1] + [name]
|
||||
else:
|
||||
return
|
||||
else:
|
||||
definition = self.context + [name]
|
||||
|
||||
self.deforders[".".join(definition)] = next(self.counter)
|
||||
|
||||
def add_variable_comment(self, name, comment):
|
||||
# type: (unicode, unicode) -> None
|
||||
if self.current_function:
|
||||
if self.current_classes and self.context[-1] == "__init__":
|
||||
# store variable comments inside __init__ method of classes
|
||||
context = ".".join(self.context[:-1])
|
||||
else:
|
||||
return
|
||||
else:
|
||||
context = ".".join(self.context)
|
||||
|
||||
self.comments[(context, name)] = comment
|
||||
|
||||
def get_self(self):
|
||||
# type: () -> ast.expr
|
||||
"""Returns the name of first argument if in function."""
|
||||
if self.current_function and self.current_function.args.args:
|
||||
return self.current_function.args.args[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_line(self, lineno):
|
||||
# type: (int) -> unicode
|
||||
"""Returns specified line."""
|
||||
return self.buffers[lineno - 1]
|
||||
|
||||
def visit(self, node):
|
||||
# type: (ast.AST) -> None
|
||||
"""Updates self.previous to ."""
|
||||
super(VariableCommentPicker, self).visit(node)
|
||||
self.previous = node
|
||||
|
||||
def visit_Assign(self, node):
|
||||
# type: (ast.Assign) -> None
|
||||
"""Handles Assign node and pick up a variable comment."""
|
||||
try:
|
||||
varnames = sum([get_lvar_names(t, self=self.get_self()) for t in node.targets], []) # type: ignore # NOQA
|
||||
current_line = self.get_line(node.lineno)
|
||||
except TypeError:
|
||||
return # this assignment is not new definition!
|
||||
|
||||
# check comments after assignment
|
||||
parser = AfterCommentParser([current_line[node.col_offset:]] +
|
||||
self.buffers[node.lineno:])
|
||||
parser.parse()
|
||||
if parser.comment and comment_re.match(parser.comment):
|
||||
for varname in varnames:
|
||||
self.add_variable_comment(varname, comment_re.sub('\\1', parser.comment))
|
||||
self.add_entry(varname)
|
||||
return
|
||||
|
||||
# check comments before assignment
|
||||
if indent_re.match(current_line[:node.col_offset]):
|
||||
comment_lines = []
|
||||
for i in range(node.lineno - 1):
|
||||
before_line = self.get_line(node.lineno - 1 - i)
|
||||
if comment_re.match(before_line):
|
||||
comment_lines.append(comment_re.sub('\\1', before_line))
|
||||
else:
|
||||
break
|
||||
|
||||
if comment_lines:
|
||||
comment = dedent_docstring('\n'.join(reversed(comment_lines)))
|
||||
for varname in varnames:
|
||||
self.add_variable_comment(varname, comment)
|
||||
self.add_entry(varname)
|
||||
return
|
||||
|
||||
# not commented (record deforders only)
|
||||
for varname in varnames:
|
||||
self.add_entry(varname)
|
||||
|
||||
def visit_Expr(self, node):
|
||||
# type: (ast.Expr) -> None
|
||||
"""Handles Expr node and pick up a comment if string."""
|
||||
if (isinstance(self.previous, ast.Assign) and isinstance(node.value, ast.Str)):
|
||||
try:
|
||||
varnames = get_lvar_names(self.previous.targets[0], self.get_self())
|
||||
for varname in varnames:
|
||||
if isinstance(node.value.s, text_type):
|
||||
docstring = node.value.s
|
||||
else:
|
||||
docstring = node.value.s.decode(self.encoding or 'utf-8')
|
||||
|
||||
self.add_variable_comment(varname, dedent_docstring(docstring))
|
||||
self.add_entry(varname)
|
||||
except TypeError:
|
||||
pass # this assignment is not new definition!
|
||||
|
||||
def visit_ClassDef(self, node):
|
||||
# type: (ast.ClassDef) -> None
|
||||
"""Handles ClassDef node and set context."""
|
||||
self.current_classes.append(node.name)
|
||||
self.add_entry(node.name)
|
||||
self.context.append(node.name)
|
||||
for child in node.body:
|
||||
self.visit(child)
|
||||
self.context.pop()
|
||||
self.current_classes.pop()
|
||||
|
||||
def visit_FunctionDef(self, node):
|
||||
# type: (ast.FunctionDef) -> None
|
||||
"""Handles FunctionDef node and set context."""
|
||||
if self.current_function is None:
|
||||
self.add_entry(node.name) # should be called before setting self.current_function
|
||||
self.context.append(node.name)
|
||||
self.current_function = node
|
||||
for child in node.body:
|
||||
self.visit(child)
|
||||
self.context.pop()
|
||||
self.current_function = None
|
||||
|
||||
|
||||
class DefinitionFinder(TokenProcessor):
|
||||
def __init__(self, lines):
|
||||
# type: (List[unicode]) -> None
|
||||
super(DefinitionFinder, self).__init__(lines)
|
||||
self.decorator = None # type: Token
|
||||
self.context = [] # type: List[unicode]
|
||||
self.indents = [] # type: List
|
||||
self.definitions = {} # type: Dict[unicode, Tuple[unicode, int, int]]
|
||||
|
||||
def add_definition(self, name, entry):
|
||||
# type: (unicode, Tuple[unicode, int, int]) -> None
|
||||
if self.indents and self.indents[-1][0] == 'def' and entry[0] == 'def':
|
||||
# ignore definition of inner function
|
||||
pass
|
||||
else:
|
||||
self.definitions[name] = entry
|
||||
|
||||
def parse(self):
|
||||
# type: () -> None
|
||||
while True:
|
||||
token = self.fetch_token()
|
||||
if token is None:
|
||||
break
|
||||
elif token == COMMENT:
|
||||
pass
|
||||
elif token == [OP, '@'] and (self.previous is None or
|
||||
self.previous.match(NEWLINE, NL, INDENT, DEDENT)):
|
||||
if self.decorator is None:
|
||||
self.decorator = token
|
||||
elif token.match([NAME, 'class']):
|
||||
self.parse_definition('class')
|
||||
elif token.match([NAME, 'def']):
|
||||
self.parse_definition('def')
|
||||
elif token == INDENT:
|
||||
self.indents.append(('other', None, None))
|
||||
elif token == DEDENT:
|
||||
self.finalize_block()
|
||||
|
||||
def parse_definition(self, typ):
|
||||
# type: (unicode) -> None
|
||||
name = self.fetch_token()
|
||||
self.context.append(name.value)
|
||||
funcname = '.'.join(self.context)
|
||||
|
||||
if self.decorator:
|
||||
start_pos = self.decorator.start[0]
|
||||
self.decorator = None
|
||||
else:
|
||||
start_pos = name.start[0]
|
||||
|
||||
self.fetch_until([OP, ':'])
|
||||
if self.fetch_token().match(COMMENT, NEWLINE):
|
||||
self.fetch_until(INDENT)
|
||||
self.indents.append((typ, funcname, start_pos))
|
||||
else:
|
||||
# one-liner
|
||||
self.add_definition(funcname, (typ, start_pos, name.end[0]))
|
||||
self.context.pop()
|
||||
|
||||
def finalize_block(self):
|
||||
# type: () -> None
|
||||
definition = self.indents.pop()
|
||||
if definition[0] != 'other':
|
||||
typ, funcname, start_pos = definition
|
||||
end_pos = self.current.end[0] - 1
|
||||
while emptyline_re.match(self.get_line(end_pos)):
|
||||
end_pos -= 1
|
||||
|
||||
self.add_definition(funcname, (typ, start_pos, end_pos))
|
||||
self.context.pop()
|
||||
|
||||
|
||||
class Parser(object):
|
||||
"""Python source code parser to pick up variable comments.
|
||||
|
||||
This is a better wrapper for ``VariableCommentPicker``.
|
||||
"""
|
||||
|
||||
def __init__(self, code, encoding='utf-8'):
|
||||
# type: (unicode, unicode) -> None
|
||||
self.code = code
|
||||
self.encoding = encoding
|
||||
self.comments = {} # type: Dict[Tuple[unicode, unicode], unicode]
|
||||
self.deforders = {} # type: Dict[unicode, int]
|
||||
self.definitions = {} # type: Dict[unicode, Tuple[unicode, int, int]]
|
||||
|
||||
def parse(self):
|
||||
# type: () -> None
|
||||
"""Parse the source code."""
|
||||
self.parse_comments()
|
||||
self.parse_definition()
|
||||
|
||||
def parse_comments(self):
|
||||
# type: () -> None
|
||||
"""Parse the code and pick up comments."""
|
||||
tree = ast.parse(self.code.encode('utf-8'))
|
||||
picker = VariableCommentPicker(self.code.splitlines(True), self.encoding)
|
||||
picker.visit(tree)
|
||||
self.comments = picker.comments
|
||||
self.deforders = picker.deforders
|
||||
|
||||
def parse_definition(self):
|
||||
# type: () -> None
|
||||
"""Parse the location of definitions from the code."""
|
||||
parser = DefinitionFinder(self.code.splitlines(True))
|
||||
parser.parse()
|
||||
self.definitions = parser.definitions
|
||||
@@ -1,4 +0,0 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
"""The pgen2 package."""
|
||||
@@ -1,154 +0,0 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright 2006 Google, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
"""Parser driver.
|
||||
|
||||
This provides a high-level interface to parse a file into a syntax tree.
|
||||
|
||||
"""
|
||||
|
||||
__author__ = "Guido van Rossum <guido@python.org>"
|
||||
|
||||
__all__ = ["Driver", "load_grammar"]
|
||||
|
||||
# Python imports
|
||||
import os
|
||||
import logging
|
||||
|
||||
import sphinx
|
||||
|
||||
# Pgen imports
|
||||
from sphinx.pycode.pgen2 import grammar, parse, token, tokenize, pgen
|
||||
|
||||
|
||||
class Driver(object):
|
||||
|
||||
def __init__(self, grammar, convert=None, logger=None):
|
||||
self.grammar = grammar
|
||||
if logger is None:
|
||||
logger = logging.getLogger()
|
||||
self.logger = logger
|
||||
self.convert = convert
|
||||
|
||||
def parse_tokens(self, tokens, debug=False):
|
||||
"""Parse a series of tokens and return the syntax tree."""
|
||||
# X X X Move the prefix computation into a wrapper around tokenize.
|
||||
p = parse.Parser(self.grammar, self.convert)
|
||||
p.setup()
|
||||
lineno = 1
|
||||
column = 0
|
||||
type = value = start = end = line_text = None
|
||||
prefix = ""
|
||||
opmap = grammar.opmap
|
||||
for type, value, start, end, line_text in tokens:
|
||||
if start != (lineno, column):
|
||||
assert (lineno, column) <= start, ((lineno, column), start)
|
||||
s_lineno, s_column = start
|
||||
if lineno < s_lineno:
|
||||
prefix += "\n" * (s_lineno - lineno)
|
||||
lineno = s_lineno
|
||||
column = 0
|
||||
if column < s_column:
|
||||
prefix += line_text[column:s_column]
|
||||
column = s_column
|
||||
if type in (tokenize.COMMENT, tokenize.NL):
|
||||
prefix += value
|
||||
lineno, column = end
|
||||
if value.endswith("\n"):
|
||||
lineno += 1
|
||||
column = 0
|
||||
continue
|
||||
if type == token.OP:
|
||||
type = opmap[value]
|
||||
# if debug:
|
||||
# self.logger.debug("%s %r (prefix=%r)",
|
||||
# token.tok_name[type], value, prefix)
|
||||
if p.addtoken(type, value, (prefix, start)):
|
||||
# if debug:
|
||||
# self.logger.debug("Stop.")
|
||||
break
|
||||
prefix = ""
|
||||
lineno, column = end
|
||||
if value.endswith("\n"):
|
||||
lineno += 1
|
||||
column = 0
|
||||
else:
|
||||
# We never broke out -- EOF is too soon (how can this happen???)
|
||||
raise parse.ParseError("incomplete input", type, value, line_text)
|
||||
return p.rootnode
|
||||
|
||||
def parse_stream_raw(self, stream, debug=False):
|
||||
"""Parse a stream and return the syntax tree."""
|
||||
tokens = tokenize.generate_tokens(stream.readline)
|
||||
return self.parse_tokens(tokens, debug)
|
||||
|
||||
def parse_stream(self, stream, debug=False):
|
||||
"""Parse a stream and return the syntax tree."""
|
||||
return self.parse_stream_raw(stream, debug)
|
||||
|
||||
def parse_file(self, filename, debug=False):
|
||||
"""Parse a file and return the syntax tree."""
|
||||
with open(filename) as stream:
|
||||
return self.parse_stream(stream, debug)
|
||||
|
||||
def parse_string(self, text, debug=False):
|
||||
"""Parse a string and return the syntax tree."""
|
||||
tokens = tokenize.generate_tokens(generate_lines(text).next)
|
||||
return self.parse_tokens(tokens, debug)
|
||||
|
||||
|
||||
def generate_lines(text):
|
||||
"""Generator that behaves like readline without using StringIO."""
|
||||
for line in text.splitlines(True):
|
||||
yield line
|
||||
while True:
|
||||
yield ""
|
||||
|
||||
|
||||
def get_compiled_path(filename):
|
||||
head, tail = os.path.splitext(filename)
|
||||
if tail == ".txt":
|
||||
tail = ""
|
||||
return "%s%s.pickle" % (head, tail)
|
||||
|
||||
|
||||
def compile_grammar(gt='Grammar.txt', logger=None):
|
||||
"""Compile the grammer."""
|
||||
if logger is None:
|
||||
logger = logging.getLogger()
|
||||
|
||||
logger.info("Generating grammar tables from %s", gt)
|
||||
g = pgen.generate_grammar(gt)
|
||||
gp = get_compiled_path(gt)
|
||||
logger.info("Writing grammar tables to %s", gp)
|
||||
try:
|
||||
g.dump(gp)
|
||||
except IOError as e:
|
||||
logger.info("Writing failed:"+str(e))
|
||||
|
||||
|
||||
def load_grammar(gt="Grammar.txt", logger=None):
|
||||
"""Load the grammar (maybe from a pickle)."""
|
||||
if logger is None:
|
||||
logger = logging.getLogger()
|
||||
gp = get_compiled_path(gt)
|
||||
if not os.path.exists(gp):
|
||||
logger.info("Generating grammar tables from %s", gt)
|
||||
g = pgen.generate_grammar(gt)
|
||||
else:
|
||||
g = grammar.Grammar()
|
||||
g.load(gp)
|
||||
return g
|
||||
|
||||
|
||||
def _newer(a, b):
|
||||
"""Inquire whether file a was written since file b."""
|
||||
if not os.path.exists(a):
|
||||
return False
|
||||
if not os.path.exists(b):
|
||||
return True
|
||||
return os.path.getmtime(a) >= os.path.getmtime(b)
|
||||
@@ -1,177 +0,0 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
"""This module defines the data structures used to represent a grammar.
|
||||
|
||||
These are a bit arcane because they are derived from the data
|
||||
structures used by Python's 'pgen' parser generator.
|
||||
|
||||
There's also a table here mapping operators to their names in the
|
||||
token module; the Python tokenize module reports all operators as the
|
||||
fallback token code OP, but the parser needs the actual token code.
|
||||
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
||||
# Python imports
|
||||
import pickle
|
||||
|
||||
# Local imports
|
||||
from sphinx.pycode.pgen2 import token
|
||||
|
||||
if False:
|
||||
# For type annotation
|
||||
from typing import Dict, List, Tuple # NOQA
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
"""Pgen parsing tables tables conversion class.
|
||||
|
||||
Once initialized, this class supplies the grammar tables for the
|
||||
parsing engine implemented by parse.py. The parsing engine
|
||||
accesses the instance variables directly. The class here does not
|
||||
provide initialization of the tables; several subclasses exist to
|
||||
do this (see the conv and pgen modules).
|
||||
|
||||
The load() method reads the tables from a pickle file, which is
|
||||
much faster than the other ways offered by subclasses. The pickle
|
||||
file is written by calling dump() (after loading the grammar
|
||||
tables using a subclass). The report() method prints a readable
|
||||
representation of the tables to stdout, for debugging.
|
||||
|
||||
The instance variables are as follows:
|
||||
|
||||
symbol2number -- a dict mapping symbol names to numbers. Symbol
|
||||
numbers are always 256 or higher, to distinguish
|
||||
them from token numbers, which are between 0 and
|
||||
255 (inclusive).
|
||||
|
||||
number2symbol -- a dict mapping numbers to symbol names;
|
||||
these two are each other's inverse.
|
||||
|
||||
states -- a list of DFAs, where each DFA is a list of
|
||||
states, each state is is a list of arcs, and each
|
||||
arc is a (i, j) pair where i is a label and j is
|
||||
a state number. The DFA number is the index into
|
||||
this list. (This name is slightly confusing.)
|
||||
Final states are represented by a special arc of
|
||||
the form (0, j) where j is its own state number.
|
||||
|
||||
dfas -- a dict mapping symbol numbers to (DFA, first)
|
||||
pairs, where DFA is an item from the states list
|
||||
above, and first is a set of tokens that can
|
||||
begin this grammar rule (represented by a dict
|
||||
whose values are always 1).
|
||||
|
||||
labels -- a list of (x, y) pairs where x is either a token
|
||||
number or a symbol number, and y is either None
|
||||
or a string; the strings are keywords. The label
|
||||
number is the index in this list; label numbers
|
||||
are used to mark state transitions (arcs) in the
|
||||
DFAs.
|
||||
|
||||
start -- the number of the grammar's start symbol.
|
||||
|
||||
keywords -- a dict mapping keyword strings to arc labels.
|
||||
|
||||
tokens -- a dict mapping token numbers to arc labels.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.symbol2number = {} # type: Dict[unicode, int]
|
||||
self.number2symbol = {} # type: Dict[int, unicode]
|
||||
self.states = [] # type: List[List[List[Tuple[int, int]]]]
|
||||
self.dfas = {} # type: Dict[int, Tuple[List[List[Tuple[int, int]]], unicode]]
|
||||
self.labels = [(0, "EMPTY")]
|
||||
self.keywords = {} # type: Dict[unicode, unicode]
|
||||
self.tokens = {} # type: Dict[unicode, unicode]
|
||||
self.symbol2label = {} # type: Dict[unicode, unicode]
|
||||
self.start = 256
|
||||
|
||||
def dump(self, filename):
|
||||
"""Dump the grammar tables to a pickle file."""
|
||||
with open(filename, "wb") as f:
|
||||
pickle.dump(self.__dict__, f, 2)
|
||||
|
||||
def load(self, filename):
|
||||
"""Load the grammar tables from a pickle file."""
|
||||
f = open(filename, "rb")
|
||||
d = pickle.load(f)
|
||||
f.close()
|
||||
self.__dict__.update(d)
|
||||
|
||||
def report(self):
|
||||
"""Dump the grammar tables to standard output, for debugging."""
|
||||
from pprint import pprint
|
||||
print("s2n")
|
||||
pprint(self.symbol2number)
|
||||
print("n2s")
|
||||
pprint(self.number2symbol)
|
||||
print("states")
|
||||
pprint(self.states)
|
||||
print("dfas")
|
||||
pprint(self.dfas)
|
||||
print("labels")
|
||||
pprint(self.labels)
|
||||
print("start", self.start)
|
||||
|
||||
|
||||
# Map from operator to number (since tokenize doesn't do this)
|
||||
|
||||
opmap_raw = """
|
||||
( LPAR
|
||||
) RPAR
|
||||
[ LSQB
|
||||
] RSQB
|
||||
: COLON
|
||||
, COMMA
|
||||
; SEMI
|
||||
+ PLUS
|
||||
- MINUS
|
||||
* STAR
|
||||
/ SLASH
|
||||
| VBAR
|
||||
& AMPER
|
||||
< LESS
|
||||
> GREATER
|
||||
= EQUAL
|
||||
. DOT
|
||||
% PERCENT
|
||||
` BACKQUOTE
|
||||
{ LBRACE
|
||||
} RBRACE
|
||||
@ AT
|
||||
@= ATEQUAL
|
||||
== EQEQUAL
|
||||
!= NOTEQUAL
|
||||
<> NOTEQUAL
|
||||
<= LESSEQUAL
|
||||
>= GREATEREQUAL
|
||||
~ TILDE
|
||||
^ CIRCUMFLEX
|
||||
<< LEFTSHIFT
|
||||
>> RIGHTSHIFT
|
||||
** DOUBLESTAR
|
||||
+= PLUSEQUAL
|
||||
-= MINEQUAL
|
||||
*= STAREQUAL
|
||||
/= SLASHEQUAL
|
||||
%= PERCENTEQUAL
|
||||
&= AMPEREQUAL
|
||||
|= VBAREQUAL
|
||||
^= CIRCUMFLEXEQUAL
|
||||
<<= LEFTSHIFTEQUAL
|
||||
>>= RIGHTSHIFTEQUAL
|
||||
**= DOUBLESTAREQUAL
|
||||
// DOUBLESLASH
|
||||
//= DOUBLESLASHEQUAL
|
||||
-> RARROW
|
||||
... ELLIPSIS
|
||||
"""
|
||||
|
||||
opmap = {}
|
||||
for line in opmap_raw.splitlines():
|
||||
if line:
|
||||
op, name = line.split()
|
||||
opmap[op] = getattr(token, name)
|
||||
@@ -1,100 +0,0 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Extended to handle raw and unicode literals by Georg Brandl.
|
||||
|
||||
"""Safely evaluate Python string literals without using eval()."""
|
||||
from __future__ import print_function
|
||||
|
||||
import re
|
||||
|
||||
from six import text_type
|
||||
|
||||
|
||||
simple_escapes = {"a": "\a",
|
||||
"b": "\b",
|
||||
"f": "\f",
|
||||
"n": "\n",
|
||||
"r": "\r",
|
||||
"t": "\t",
|
||||
"v": "\v",
|
||||
"'": "'",
|
||||
'"': '"',
|
||||
"\\": "\\"}
|
||||
|
||||
def convert_hex(x, n):
|
||||
if len(x) < n+1:
|
||||
raise ValueError("invalid hex string escape ('\\%s')" % x)
|
||||
try:
|
||||
return int(x[1:], 16)
|
||||
except ValueError:
|
||||
raise ValueError("invalid hex string escape ('\\%s')" % x)
|
||||
|
||||
def escape(m):
|
||||
all, tail = m.group(0, 1)
|
||||
assert all.startswith("\\")
|
||||
esc = simple_escapes.get(tail)
|
||||
if esc is not None:
|
||||
return esc
|
||||
elif tail.startswith("x"):
|
||||
return chr(convert_hex(tail, 2))
|
||||
elif tail.startswith('u'):
|
||||
return unichr(convert_hex(tail, 4))
|
||||
elif tail.startswith('U'):
|
||||
return unichr(convert_hex(tail, 8))
|
||||
elif tail.startswith('N'):
|
||||
import unicodedata
|
||||
try:
|
||||
return unicodedata.lookup(tail[1:-1])
|
||||
except KeyError:
|
||||
raise ValueError("undefined character name %r" % tail[1:-1])
|
||||
else:
|
||||
try:
|
||||
return chr(int(tail, 8))
|
||||
except ValueError:
|
||||
raise ValueError("invalid octal string escape ('\\%s')" % tail)
|
||||
|
||||
def escaperaw(m):
|
||||
all, tail = m.group(0, 1)
|
||||
if tail.startswith('u'):
|
||||
return unichr(convert_hex(tail, 4))
|
||||
elif tail.startswith('U'):
|
||||
return unichr(convert_hex(tail, 8))
|
||||
else:
|
||||
return all
|
||||
|
||||
escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})")
|
||||
uni_escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3}|"
|
||||
r"u[0-9a-fA-F]{0,4}|U[0-9a-fA-F]{0,8}|N\{.+?\})")
|
||||
|
||||
def evalString(s, encoding=None):
|
||||
regex = escape_re
|
||||
repl = escape
|
||||
if encoding and not isinstance(s, text_type):
|
||||
s = s.decode(encoding)
|
||||
if s.startswith('u') or s.startswith('U'):
|
||||
regex = uni_escape_re
|
||||
s = s[1:]
|
||||
if s.startswith('r') or s.startswith('R'):
|
||||
repl = escaperaw
|
||||
s = s[1:]
|
||||
assert s.startswith("'") or s.startswith('"'), repr(s[:1])
|
||||
q = s[0]
|
||||
if s[:3] == q*3:
|
||||
q = q*3
|
||||
assert s.endswith(q), repr(s[-len(q):])
|
||||
assert len(s) >= 2*len(q)
|
||||
s = s[len(q):-len(q)]
|
||||
return regex.sub(repl, s)
|
||||
|
||||
def test():
|
||||
for i in range(256):
|
||||
c = chr(i)
|
||||
s = repr(c)
|
||||
e = evalString(s)
|
||||
if e != c:
|
||||
print(i, c, s, e)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,206 +0,0 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
"""Parser engine for the grammar tables generated by pgen.
|
||||
|
||||
The grammar table must be loaded first.
|
||||
|
||||
See Parser/parser.c in the Python distribution for additional info on
|
||||
how this parsing engine works.
|
||||
|
||||
"""
|
||||
|
||||
# Local imports
|
||||
from sphinx.pycode.pgen2 import token
|
||||
|
||||
if False:
|
||||
# For type annotation
|
||||
from typing import Any, List, Set, Tuple # NOQA
|
||||
|
||||
class ParseError(Exception):
|
||||
"""Exception to signal the parser is stuck."""
|
||||
|
||||
def __init__(self, msg, type, value, context):
|
||||
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
|
||||
(msg, type, value, context))
|
||||
self.msg = msg
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.context = context
|
||||
|
||||
class Parser(object):
|
||||
"""Parser engine.
|
||||
|
||||
The proper usage sequence is:
|
||||
|
||||
p = Parser(grammar, [converter]) # create instance
|
||||
p.setup([start]) # prepare for parsing
|
||||
<for each input token>:
|
||||
if p.addtoken(...): # parse a token; may raise ParseError
|
||||
break
|
||||
root = p.rootnode # root of abstract syntax tree
|
||||
|
||||
A Parser instance may be reused by calling setup() repeatedly.
|
||||
|
||||
A Parser instance contains state pertaining to the current token
|
||||
sequence, and should not be used concurrently by different threads
|
||||
to parse separate token sequences.
|
||||
|
||||
See driver.py for how to get input tokens by tokenizing a file or
|
||||
string.
|
||||
|
||||
Parsing is complete when addtoken() returns True; the root of the
|
||||
abstract syntax tree can then be retrieved from the rootnode
|
||||
instance variable. When a syntax error occurs, addtoken() raises
|
||||
the ParseError exception. There is no error recovery; the parser
|
||||
cannot be used after a syntax error was reported (but it can be
|
||||
reinitialized by calling setup()).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, grammar, convert=None):
|
||||
"""Constructor.
|
||||
|
||||
The grammar argument is a grammar.Grammar instance; see the
|
||||
grammar module for more information.
|
||||
|
||||
The parser is not ready yet for parsing; you must call the
|
||||
setup() method to get it started.
|
||||
|
||||
The optional convert argument is a function mapping concrete
|
||||
syntax tree nodes to abstract syntax tree nodes. If not
|
||||
given, no conversion is done and the syntax tree produced is
|
||||
the concrete syntax tree. If given, it must be a function of
|
||||
two arguments, the first being the grammar (a grammar.Grammar
|
||||
instance), and the second being the concrete syntax tree node
|
||||
to be converted. The syntax tree is converted from the bottom
|
||||
up.
|
||||
|
||||
A concrete syntax tree node is a (type, value, context, nodes)
|
||||
tuple, where type is the node type (a token or symbol number),
|
||||
value is None for symbols and a string for tokens, context is
|
||||
None or an opaque value used for error reporting (typically a
|
||||
(lineno, offset) pair), and nodes is a list of children for
|
||||
symbols, and None for tokens.
|
||||
|
||||
An abstract syntax tree node may be anything; this is entirely
|
||||
up to the converter function.
|
||||
|
||||
"""
|
||||
self.grammar = grammar
|
||||
self.convert = convert or (lambda grammar, node: node)
|
||||
|
||||
def setup(self, start=None):
|
||||
"""Prepare for parsing.
|
||||
|
||||
This *must* be called before starting to parse.
|
||||
|
||||
The optional argument is an alternative start symbol; it
|
||||
defaults to the grammar's start symbol.
|
||||
|
||||
You can use a Parser instance to parse any number of programs;
|
||||
each time you call setup() the parser is reset to an initial
|
||||
state determined by the (implicit or explicit) start symbol.
|
||||
|
||||
"""
|
||||
if start is None:
|
||||
start = self.grammar.start
|
||||
# Each stack entry is a tuple: (dfa, state, node).
|
||||
# A node is a tuple: (type, value, context, children),
|
||||
# where children is a list of nodes or None, and context may be None.
|
||||
newnode = (start, None, None, []) # type: Tuple[unicode, unicode, unicode, List]
|
||||
stackentry = (self.grammar.dfas[start], 0, newnode)
|
||||
self.stack = [stackentry]
|
||||
self.rootnode = None # type: Any
|
||||
self.used_names = set() # type: Set[unicode]
|
||||
# Aliased to self.rootnode.used_names in pop()
|
||||
|
||||
def addtoken(self, type, value, context):
|
||||
"""Add a token; return True iff this is the end of the program."""
|
||||
# Map from token to label
|
||||
ilabel = self.classify(type, value, context)
|
||||
# Loop until the token is shifted; may raise exceptions
|
||||
while True:
|
||||
dfa, state, node = self.stack[-1]
|
||||
states, first = dfa
|
||||
arcs = states[state]
|
||||
# Look for a state with this label
|
||||
for i, newstate in arcs:
|
||||
t, v = self.grammar.labels[i]
|
||||
if ilabel == i:
|
||||
# Look it up in the list of labels
|
||||
assert t < 256
|
||||
# Shift a token; we're done with it
|
||||
self.shift(type, value, newstate, context)
|
||||
# Pop while we are in an accept-only state
|
||||
state = newstate
|
||||
while states[state] == [(0, state)]:
|
||||
self.pop()
|
||||
if not self.stack:
|
||||
# Done parsing!
|
||||
return True
|
||||
dfa, state, node = self.stack[-1]
|
||||
states, first = dfa
|
||||
# Done with this token
|
||||
return False
|
||||
elif t >= 256:
|
||||
# See if it's a symbol and if we're in its first set
|
||||
itsdfa = self.grammar.dfas[t]
|
||||
itsstates, itsfirst = itsdfa
|
||||
if ilabel in itsfirst:
|
||||
# Push a symbol
|
||||
self.push(t, self.grammar.dfas[t], newstate, context)
|
||||
break # To continue the outer while loop
|
||||
else:
|
||||
if (0, state) in arcs:
|
||||
# An accepting state, pop it and try something else
|
||||
self.pop()
|
||||
if not self.stack:
|
||||
# Done parsing, but another token is input
|
||||
raise ParseError("too much input",
|
||||
type, value, context)
|
||||
else:
|
||||
# No success finding a transition
|
||||
raise ParseError("bad input", type, value, context)
|
||||
|
||||
def classify(self, type, value, context):
|
||||
"""Turn a token into a label. (Internal)"""
|
||||
if type == token.NAME:
|
||||
# Keep a listing of all used names
|
||||
self.used_names.add(value)
|
||||
# Check for reserved words
|
||||
ilabel = self.grammar.keywords.get(value)
|
||||
if ilabel is not None:
|
||||
return ilabel
|
||||
ilabel = self.grammar.tokens.get(type)
|
||||
if ilabel is None:
|
||||
raise ParseError("bad token", type, value, context)
|
||||
return ilabel
|
||||
|
||||
def shift(self, type, value, newstate, context):
|
||||
"""Shift a token. (Internal)"""
|
||||
dfa, state, node = self.stack[-1]
|
||||
newnode = (type, value, context, None) # type: Tuple[unicode, unicode, unicode, List]
|
||||
newnode = self.convert(self.grammar, newnode)
|
||||
if newnode is not None:
|
||||
node[-1].append(newnode)
|
||||
self.stack[-1] = (dfa, newstate, node)
|
||||
|
||||
def push(self, type, newdfa, newstate, context):
|
||||
"""Push a nonterminal. (Internal)"""
|
||||
dfa, state, node = self.stack[-1]
|
||||
newnode = (type, None, context, []) # type: Tuple[unicode, unicode, unicode, List]
|
||||
self.stack[-1] = (dfa, newstate, node)
|
||||
self.stack.append((newdfa, 0, newnode))
|
||||
|
||||
def pop(self):
|
||||
"""Pop a nonterminal. (Internal)"""
|
||||
popdfa, popstate, popnode = self.stack.pop()
|
||||
newnode = self.convert(self.grammar, popnode)
|
||||
if newnode is not None:
|
||||
if self.stack:
|
||||
dfa, state, node = self.stack[-1]
|
||||
node[-1].append(newnode)
|
||||
else:
|
||||
self.rootnode = newnode
|
||||
self.rootnode.used_names = self.used_names
|
||||
@@ -1,165 +0,0 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Adapted from parse.py to be compiled with Cython by Georg Brandl.
|
||||
|
||||
"""Parser engine for the grammar tables generated by pgen.
|
||||
|
||||
The grammar table must be loaded first.
|
||||
|
||||
See Parser/parser.c in the Python distribution for additional info on
|
||||
how this parsing engine works.
|
||||
|
||||
"""
|
||||
|
||||
from sphinx.pycode.nodes import Node, Leaf
|
||||
|
||||
DEF NAME = 1
|
||||
|
||||
class ParseError(Exception):
|
||||
"""Exception to signal the parser is stuck."""
|
||||
|
||||
def __init__(self, msg, type, value, context):
|
||||
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
|
||||
(msg, type, value, context))
|
||||
self.msg = msg
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.context = context
|
||||
|
||||
|
||||
cdef class Parser:
|
||||
cdef public object grammar
|
||||
cdef public object rootnode
|
||||
cdef public list stack
|
||||
cdef public set used_names
|
||||
cdef int _grammar_start
|
||||
cdef list _grammar_labels
|
||||
cdef dict _grammar_dfas
|
||||
cdef dict _grammar_keywords
|
||||
cdef dict _grammar_tokens
|
||||
cdef dict _grammar_number2symbol
|
||||
|
||||
def __init__(self, grammar, convert=None):
|
||||
self.grammar = grammar
|
||||
#self.convert = convert or noconvert
|
||||
|
||||
self._grammar_dfas = grammar.dfas
|
||||
self._grammar_labels = grammar.labels
|
||||
self._grammar_keywords = grammar.keywords
|
||||
self._grammar_tokens = grammar.tokens
|
||||
self._grammar_number2symbol = grammar.number2symbol
|
||||
self._grammar_start = grammar.start
|
||||
|
||||
def setup(self, start=None):
|
||||
if start is None:
|
||||
start = self._grammar_start
|
||||
# Each stack entry is a tuple: (dfa, state, node).
|
||||
# A node is a tuple: (type, value, context, children),
|
||||
# where children is a list of nodes or None, and context may be None.
|
||||
newnode = (start, None, None, [])
|
||||
stackentry = (self._grammar_dfas[start], 0, newnode)
|
||||
self.stack = [stackentry]
|
||||
self.rootnode = None
|
||||
self.used_names = set() # Aliased to self.rootnode.used_names in pop()
|
||||
|
||||
def addtoken(self, int type, value, context):
|
||||
"""Add a token; return True iff this is the end of the program."""
|
||||
cdef int ilabel, i, t, state, newstate
|
||||
# Map from token to label
|
||||
ilabel = self.classify(type, value, context)
|
||||
# Loop until the token is shifted; may raise exceptions
|
||||
while True:
|
||||
dfa, state, node = self.stack[-1]
|
||||
states, first = dfa
|
||||
arcs = states[state]
|
||||
# Look for a state with this label
|
||||
for i, newstate in arcs:
|
||||
t, v = self._grammar_labels[i]
|
||||
if ilabel == i:
|
||||
# Look it up in the list of labels
|
||||
## assert t < 256
|
||||
# Shift a token; we're done with it
|
||||
self.shift(type, value, newstate, context)
|
||||
# Pop while we are in an accept-only state
|
||||
state = newstate
|
||||
while states[state] == [(0, state)]:
|
||||
self.pop()
|
||||
if not self.stack:
|
||||
# Done parsing!
|
||||
return True
|
||||
dfa, state, node = self.stack[-1]
|
||||
states, first = dfa
|
||||
# Done with this token
|
||||
return False
|
||||
elif t >= 256:
|
||||
# See if it's a symbol and if we're in its first set
|
||||
itsdfa = self._grammar_dfas[t]
|
||||
itsstates, itsfirst = itsdfa
|
||||
if ilabel in itsfirst:
|
||||
# Push a symbol
|
||||
self.push(t, itsdfa, newstate, context)
|
||||
break # To continue the outer while loop
|
||||
else:
|
||||
if (0, state) in arcs:
|
||||
# An accepting state, pop it and try something else
|
||||
self.pop()
|
||||
if not self.stack:
|
||||
# Done parsing, but another token is input
|
||||
raise ParseError("too much input",
|
||||
type, value, context)
|
||||
else:
|
||||
# No success finding a transition
|
||||
raise ParseError("bad input", type, value, context)
|
||||
|
||||
cdef int classify(self, int type, value, context):
|
||||
"""Turn a token into a label. (Internal)"""
|
||||
if type == NAME:
|
||||
# Keep a listing of all used names
|
||||
self.used_names.add(value)
|
||||
# Check for reserved words
|
||||
if value in self._grammar_keywords:
|
||||
return self._grammar_keywords[value]
|
||||
if type not in self._grammar_tokens:
|
||||
raise ParseError("bad token", type, value, context)
|
||||
return self._grammar_tokens[type]
|
||||
|
||||
cdef void shift(self, type, value, newstate, context):
|
||||
"""Shift a token. (Internal)"""
|
||||
cdef tuple node
|
||||
dfa, state, node = self.stack[-1]
|
||||
newnode = (type, value, context, None)
|
||||
newnode = self.convert(newnode)
|
||||
if newnode is not None:
|
||||
node[-1].append(newnode)
|
||||
self.stack[-1] = (dfa, newstate, node)
|
||||
|
||||
cdef void push(self, type, newdfa, newstate, context):
|
||||
"""Push a nonterminal. (Internal)"""
|
||||
dfa, state, node = self.stack[-1]
|
||||
newnode = (type, None, context, [])
|
||||
self.stack[-1] = (dfa, newstate, node)
|
||||
self.stack.append((newdfa, 0, newnode))
|
||||
|
||||
cdef void pop(self):
|
||||
"""Pop a nonterminal. (Internal)"""
|
||||
popdfa, popstate, popnode = self.stack.pop()
|
||||
newnode = self.convert(popnode)
|
||||
if newnode is not None:
|
||||
if self.stack:
|
||||
dfa, state, node = self.stack[-1]
|
||||
node[-1].append(newnode)
|
||||
else:
|
||||
self.rootnode = newnode
|
||||
self.rootnode.used_names = self.used_names
|
||||
|
||||
cdef convert(self, tuple raw_node):
|
||||
type, value, context, children = raw_node
|
||||
if children or type in self._grammar_number2symbol:
|
||||
# If there's exactly one child, return that child instead of
|
||||
# creating a new node.
|
||||
if len(children) == 1:
|
||||
return children[0]
|
||||
return Node(type, children, context=context)
|
||||
else:
|
||||
return Leaf(type, value, context=context)
|
||||
@@ -1,403 +0,0 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
from six import iteritems
|
||||
from collections import OrderedDict
|
||||
|
||||
# Pgen imports
|
||||
from sphinx.pycode.pgen2 import grammar, token, tokenize
|
||||
|
||||
if False:
|
||||
# For type annotation
|
||||
from typing import Any, Dict, List, Tuple # NOQA
|
||||
|
||||
|
||||
class PgenGrammar(grammar.Grammar):
|
||||
pass
|
||||
|
||||
class ParserGenerator(object):
|
||||
|
||||
def __init__(self, filename, stream=None):
|
||||
close_stream = None
|
||||
if stream is None:
|
||||
stream = open(filename)
|
||||
close_stream = stream.close
|
||||
self.filename = filename
|
||||
self.stream = stream
|
||||
self.generator = tokenize.generate_tokens(stream.readline)
|
||||
self.gettoken() # Initialize lookahead
|
||||
self.dfas, self.startsymbol = self.parse()
|
||||
if close_stream is not None:
|
||||
close_stream()
|
||||
self.first = {} # type: Dict[unicode, List[unicode]]
|
||||
# map from symbol name to set of tokens
|
||||
self.addfirstsets()
|
||||
|
||||
def make_grammar(self):
|
||||
c = PgenGrammar()
|
||||
names = list(self.dfas.keys())
|
||||
names.sort()
|
||||
names.remove(self.startsymbol)
|
||||
names.insert(0, self.startsymbol)
|
||||
for name in names:
|
||||
i = 256 + len(c.symbol2number)
|
||||
c.symbol2number[name] = i
|
||||
c.number2symbol[i] = name
|
||||
for name in names:
|
||||
dfa = self.dfas[name]
|
||||
states = [] # type: List[List[Tuple[int, int]]]
|
||||
for state in dfa:
|
||||
arcs = []
|
||||
for label, next in iteritems(state.arcs):
|
||||
arcs.append((self.make_label(c, label), dfa.index(next)))
|
||||
if state.isfinal:
|
||||
arcs.append((0, dfa.index(state)))
|
||||
states.append(arcs)
|
||||
c.states.append(states)
|
||||
c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
|
||||
c.start = c.symbol2number[self.startsymbol]
|
||||
return c
|
||||
|
||||
def make_first(self, c, name):
|
||||
rawfirst = self.first[name]
|
||||
first = {}
|
||||
for label in sorted(rawfirst):
|
||||
ilabel = self.make_label(c, label)
|
||||
##assert ilabel not in first # X X X failed on <> ... !=
|
||||
first[ilabel] = 1
|
||||
return first
|
||||
|
||||
def make_label(self, c, label):
|
||||
# X X X Maybe this should be a method on a subclass of converter?
|
||||
ilabel = len(c.labels)
|
||||
if label[0].isalpha():
|
||||
# Either a symbol name or a named token
|
||||
if label in c.symbol2number:
|
||||
# A symbol name (a non-terminal)
|
||||
if label in c.symbol2label:
|
||||
return c.symbol2label[label]
|
||||
else:
|
||||
c.labels.append((c.symbol2number[label], None))
|
||||
c.symbol2label[label] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# A named token (NAME, NUMBER, STRING)
|
||||
itoken = getattr(token, label, None)
|
||||
assert isinstance(itoken, int), label
|
||||
assert itoken in token.tok_name, label
|
||||
if itoken in c.tokens:
|
||||
return c.tokens[itoken]
|
||||
else:
|
||||
c.labels.append((itoken, None))
|
||||
c.tokens[itoken] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# Either a keyword or an operator
|
||||
assert label[0] in ('"', "'"), label
|
||||
value = eval(label)
|
||||
if value[0].isalpha():
|
||||
# A keyword
|
||||
if value in c.keywords:
|
||||
return c.keywords[value]
|
||||
else:
|
||||
c.labels.append((token.NAME, value))
|
||||
c.keywords[value] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# An operator (any non-numeric token)
|
||||
itoken = grammar.opmap[value] # Fails if unknown token
|
||||
if itoken in c.tokens:
|
||||
return c.tokens[itoken]
|
||||
else:
|
||||
c.labels.append((itoken, None))
|
||||
c.tokens[itoken] = ilabel
|
||||
return ilabel
|
||||
|
||||
def addfirstsets(self):
|
||||
names = list(self.dfas.keys())
|
||||
names.sort()
|
||||
for name in names:
|
||||
if name not in self.first:
|
||||
self.calcfirst(name)
|
||||
#print name, self.first[name].keys()
|
||||
|
||||
def calcfirst(self, name):
|
||||
dfa = self.dfas[name]
|
||||
self.first[name] = None # dummy to detect left recursion
|
||||
state = dfa[0]
|
||||
totalset = {} # type: Dict[unicode, int]
|
||||
overlapcheck = {}
|
||||
for label, next in iteritems(state.arcs):
|
||||
if label in self.dfas:
|
||||
if label in self.first:
|
||||
fset = self.first[label]
|
||||
if fset is None:
|
||||
raise ValueError("recursion for rule %r" % name)
|
||||
else:
|
||||
self.calcfirst(label)
|
||||
fset = self.first[label]
|
||||
totalset.update(fset)
|
||||
overlapcheck[label] = fset
|
||||
else:
|
||||
totalset[label] = 1
|
||||
overlapcheck[label] = {label: 1}
|
||||
inverse = {} # type: Dict[unicode, unicode]
|
||||
for label, itsfirst in sorted(overlapcheck.items()):
|
||||
for symbol in sorted(itsfirst):
|
||||
if symbol in inverse:
|
||||
raise ValueError("rule %s is ambiguous; %s is in the"
|
||||
" first sets of %s as well as %s" %
|
||||
(name, symbol, label, inverse[symbol]))
|
||||
inverse[symbol] = label
|
||||
self.first[name] = totalset
|
||||
|
||||
def parse(self):
|
||||
dfas = {}
|
||||
startsymbol = None
|
||||
# MSTART: (NEWLINE | RULE)* ENDMARKER
|
||||
while self.type != token.ENDMARKER:
|
||||
while self.type == token.NEWLINE:
|
||||
self.gettoken()
|
||||
# RULE: NAME ':' RHS NEWLINE
|
||||
name = self.expect(token.NAME)
|
||||
self.expect(token.OP, ":")
|
||||
a, z = self.parse_rhs()
|
||||
self.expect(token.NEWLINE)
|
||||
#self.dump_nfa(name, a, z)
|
||||
dfa = self.make_dfa(a, z)
|
||||
#self.dump_dfa(name, dfa)
|
||||
#oldlen = len(dfa)
|
||||
self.simplify_dfa(dfa)
|
||||
#newlen = len(dfa)
|
||||
dfas[name] = dfa
|
||||
#print name, oldlen, newlen
|
||||
if startsymbol is None:
|
||||
startsymbol = name
|
||||
return dfas, startsymbol
|
||||
|
||||
def make_dfa(self, start, finish):
|
||||
# To turn an NFA into a DFA, we define the states of the DFA
|
||||
# to correspond to *sets* of states of the NFA. Then do some
|
||||
# state reduction. Let's represent sets as dicts with 1 for
|
||||
# values.
|
||||
assert isinstance(start, NFAState)
|
||||
assert isinstance(finish, NFAState)
|
||||
def closure(state):
|
||||
base = {} # type: Dict
|
||||
addclosure(state, base)
|
||||
return base
|
||||
def addclosure(state, base):
|
||||
assert isinstance(state, NFAState)
|
||||
if state in base:
|
||||
return
|
||||
base[state] = 1
|
||||
for label, next in state.arcs:
|
||||
if label is None:
|
||||
addclosure(next, base)
|
||||
states = [DFAState(closure(start), finish)]
|
||||
for state in states: # NB states grows while we're iterating
|
||||
arcs = {} # type: Dict[unicode, Dict]
|
||||
for nfastate in state.nfaset:
|
||||
for label, next in nfastate.arcs:
|
||||
if label is not None:
|
||||
addclosure(next, arcs.setdefault(label, {}))
|
||||
for label, nfaset in iteritems(arcs):
|
||||
for st in states:
|
||||
if st.nfaset == nfaset:
|
||||
break
|
||||
else:
|
||||
st = DFAState(nfaset, finish)
|
||||
states.append(st)
|
||||
state.addarc(st, label)
|
||||
return states # List of DFAState instances; first one is start
|
||||
|
||||
def dump_nfa(self, name, start, finish):
|
||||
print("Dump of NFA for", name)
|
||||
todo = [start]
|
||||
for i, state in enumerate(todo):
|
||||
print(" State", i, state is finish and "(final)" or "")
|
||||
for label, next in state.arcs:
|
||||
if next in todo:
|
||||
j = todo.index(next)
|
||||
else:
|
||||
j = len(todo)
|
||||
todo.append(next)
|
||||
if label is None:
|
||||
print(" -> %d" % j)
|
||||
else:
|
||||
print(" %s -> %d" % (label, j))
|
||||
|
||||
def dump_dfa(self, name, dfa):
|
||||
print("Dump of DFA for", name)
|
||||
for i, state in enumerate(dfa):
|
||||
print(" State", i, state.isfinal and "(final)" or "")
|
||||
for label, next in iteritems(state.arcs):
|
||||
print(" %s -> %d" % (label, dfa.index(next)))
|
||||
|
||||
def simplify_dfa(self, dfa):
|
||||
# This is not theoretically optimal, but works well enough.
|
||||
# Algorithm: repeatedly look for two states that have the same
|
||||
# set of arcs (same labels pointing to the same nodes) and
|
||||
# unify them, until things stop changing.
|
||||
|
||||
# dfa is a list of DFAState instances
|
||||
changes = True
|
||||
while changes:
|
||||
changes = False
|
||||
for i, state_i in enumerate(dfa):
|
||||
for j in range(i+1, len(dfa)):
|
||||
state_j = dfa[j]
|
||||
if state_i == state_j:
|
||||
#print " unify", i, j
|
||||
del dfa[j]
|
||||
for state in dfa:
|
||||
state.unifystate(state_j, state_i)
|
||||
changes = True
|
||||
break
|
||||
|
||||
def parse_rhs(self):
|
||||
# RHS: ALT ('|' ALT)*
|
||||
a, z = self.parse_alt()
|
||||
if self.value != "|":
|
||||
return a, z
|
||||
else:
|
||||
aa = NFAState()
|
||||
zz = NFAState()
|
||||
aa.addarc(a)
|
||||
z.addarc(zz)
|
||||
while self.value == "|":
|
||||
self.gettoken()
|
||||
a, z = self.parse_alt()
|
||||
aa.addarc(a)
|
||||
z.addarc(zz)
|
||||
return aa, zz
|
||||
|
||||
def parse_alt(self):
|
||||
# ALT: ITEM+
|
||||
a, b = self.parse_item()
|
||||
while (self.value in ("(", "[") or
|
||||
self.type in (token.NAME, token.STRING)):
|
||||
c, d = self.parse_item()
|
||||
b.addarc(c)
|
||||
b = d
|
||||
return a, b
|
||||
|
||||
def parse_item(self):
|
||||
# ITEM: '[' RHS ']' | ATOM ['+' | '*']
|
||||
if self.value == "[":
|
||||
self.gettoken()
|
||||
a, z = self.parse_rhs()
|
||||
self.expect(token.OP, "]")
|
||||
a.addarc(z)
|
||||
return a, z
|
||||
else:
|
||||
a, z = self.parse_atom()
|
||||
value = self.value
|
||||
if value not in ("+", "*"):
|
||||
return a, z
|
||||
self.gettoken()
|
||||
z.addarc(a)
|
||||
if value == "+":
|
||||
return a, z
|
||||
else:
|
||||
return a, a
|
||||
|
||||
def parse_atom(self):
|
||||
# ATOM: '(' RHS ')' | NAME | STRING
|
||||
if self.value == "(":
|
||||
self.gettoken()
|
||||
a, z = self.parse_rhs()
|
||||
self.expect(token.OP, ")")
|
||||
return a, z
|
||||
elif self.type in (token.NAME, token.STRING):
|
||||
a = NFAState()
|
||||
z = NFAState()
|
||||
a.addarc(z, self.value)
|
||||
self.gettoken()
|
||||
return a, z
|
||||
else:
|
||||
self.raise_error("expected (...) or NAME or STRING, got %s/%s",
|
||||
self.type, self.value)
|
||||
|
||||
def expect(self, type, value=None):
|
||||
if self.type != type or (value is not None and self.value != value):
|
||||
self.raise_error("expected %s/%s, got %s/%s",
|
||||
type, value, self.type, self.value)
|
||||
value = self.value
|
||||
self.gettoken()
|
||||
return value
|
||||
|
||||
def gettoken(self):
|
||||
tup = next(self.generator)
|
||||
while tup[0] in (tokenize.COMMENT, tokenize.NL):
|
||||
tup = next(self.generator)
|
||||
self.type, self.value, self.begin, self.end, self.line = tup
|
||||
#print token.tok_name[self.type], repr(self.value)
|
||||
|
||||
def raise_error(self, msg, *args):
|
||||
if args:
|
||||
try:
|
||||
msg = msg % args
|
||||
except:
|
||||
msg = " ".join([msg] + [str(x) for x in args])
|
||||
raise SyntaxError(msg, (self.filename, self.end[0],
|
||||
self.end[1], self.line))
|
||||
|
||||
class NFAState(object):
|
||||
|
||||
def __init__(self):
|
||||
self.arcs = [] # type: List[Tuple[unicode, Any]]
|
||||
# list of (label, NFAState) pairs
|
||||
|
||||
def addarc(self, next, label=None):
|
||||
assert label is None or isinstance(label, str)
|
||||
assert isinstance(next, NFAState)
|
||||
self.arcs.append((label, next))
|
||||
|
||||
def __hash__(self):
|
||||
return hash(tuple(x[0] for x in self.arcs))
|
||||
|
||||
class DFAState(object):
|
||||
|
||||
def __init__(self, nfaset, final):
|
||||
assert isinstance(nfaset, dict)
|
||||
assert isinstance(next(iter(nfaset)), NFAState)
|
||||
assert isinstance(final, NFAState)
|
||||
self.nfaset = nfaset
|
||||
self.isfinal = final in nfaset
|
||||
self.arcs = OrderedDict() # type: OrderedDict
|
||||
# map from label to DFAState
|
||||
|
||||
def __hash__(self):
|
||||
return hash(tuple(self.arcs))
|
||||
|
||||
def addarc(self, next, label):
|
||||
assert isinstance(label, str)
|
||||
assert label not in self.arcs
|
||||
assert isinstance(next, DFAState)
|
||||
self.arcs[label] = next
|
||||
|
||||
def unifystate(self, old, new):
|
||||
for label, next in iteritems(self.arcs):
|
||||
if next is old:
|
||||
self.arcs[label] = new
|
||||
|
||||
def __eq__(self, other):
|
||||
# Equality test -- ignore the nfaset instance variable
|
||||
assert isinstance(other, DFAState)
|
||||
if self.isfinal != other.isfinal:
|
||||
return False
|
||||
# Can't just return self.arcs == other.arcs, because that
|
||||
# would invoke this method recursively, with cycles...
|
||||
if len(self.arcs) != len(other.arcs):
|
||||
return False
|
||||
for label, next in iteritems(self.arcs):
|
||||
if next is not other.arcs.get(label):
|
||||
return False
|
||||
return True
|
||||
|
||||
def generate_grammar(filename="Grammar.txt"):
|
||||
p = ParserGenerator(filename)
|
||||
return p.make_grammar()
|
||||
@@ -1,86 +0,0 @@
|
||||
#! /usr/bin/env python
|
||||
|
||||
"""Token constants (from "token.h")."""
|
||||
|
||||
# Taken from Python (r53757) and modified to include some tokens
|
||||
# originally monkeypatched in by pgen2.tokenize
|
||||
|
||||
#--start constants--
|
||||
ENDMARKER = 0
|
||||
NAME = 1
|
||||
NUMBER = 2
|
||||
STRING = 3
|
||||
NEWLINE = 4
|
||||
INDENT = 5
|
||||
DEDENT = 6
|
||||
LPAR = 7
|
||||
RPAR = 8
|
||||
LSQB = 9
|
||||
RSQB = 10
|
||||
COLON = 11
|
||||
COMMA = 12
|
||||
SEMI = 13
|
||||
PLUS = 14
|
||||
MINUS = 15
|
||||
STAR = 16
|
||||
SLASH = 17
|
||||
VBAR = 18
|
||||
AMPER = 19
|
||||
LESS = 20
|
||||
GREATER = 21
|
||||
EQUAL = 22
|
||||
DOT = 23
|
||||
PERCENT = 24
|
||||
BACKQUOTE = 25
|
||||
LBRACE = 26
|
||||
RBRACE = 27
|
||||
EQEQUAL = 28
|
||||
NOTEQUAL = 29
|
||||
LESSEQUAL = 30
|
||||
GREATEREQUAL = 31
|
||||
TILDE = 32
|
||||
CIRCUMFLEX = 33
|
||||
LEFTSHIFT = 34
|
||||
RIGHTSHIFT = 35
|
||||
DOUBLESTAR = 36
|
||||
PLUSEQUAL = 37
|
||||
MINEQUAL = 38
|
||||
STAREQUAL = 39
|
||||
SLASHEQUAL = 40
|
||||
PERCENTEQUAL = 41
|
||||
AMPEREQUAL = 42
|
||||
VBAREQUAL = 43
|
||||
CIRCUMFLEXEQUAL = 44
|
||||
LEFTSHIFTEQUAL = 45
|
||||
RIGHTSHIFTEQUAL = 46
|
||||
DOUBLESTAREQUAL = 47
|
||||
DOUBLESLASH = 48
|
||||
DOUBLESLASHEQUAL = 49
|
||||
AT = 50
|
||||
ATEQUAL = 51
|
||||
RARROW = 52
|
||||
ELLIPSIS = 53
|
||||
OP = 54
|
||||
AWAIT = 55
|
||||
ASYNC = 56
|
||||
COMMENT = 57
|
||||
NL = 58
|
||||
ERRORTOKEN = 59
|
||||
N_TOKENS = 60
|
||||
NT_OFFSET = 256
|
||||
#--end constants--
|
||||
|
||||
tok_name = {}
|
||||
for _name, _value in list(globals().items()):
|
||||
if type(_value) is type(0):
|
||||
tok_name[_value] = _name
|
||||
|
||||
|
||||
def ISTERMINAL(x):
|
||||
return x < NT_OFFSET
|
||||
|
||||
def ISNONTERMINAL(x):
|
||||
return x >= NT_OFFSET
|
||||
|
||||
def ISEOF(x):
|
||||
return x == ENDMARKER
|
||||
@@ -1,441 +0,0 @@
|
||||
# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
|
||||
# All rights reserved.
|
||||
|
||||
"""Tokenization help for Python programs.
|
||||
|
||||
generate_tokens(readline) is a generator that breaks a stream of
|
||||
text into Python tokens. It accepts a readline-like method which is called
|
||||
repeatedly to get the next line of input (or "" for EOF). It generates
|
||||
5-tuples with these members:
|
||||
|
||||
the token type (see token.py)
|
||||
the token (a string)
|
||||
the starting (row, column) indices of the token (a 2-tuple of ints)
|
||||
the ending (row, column) indices of the token (a 2-tuple of ints)
|
||||
the original line (string)
|
||||
|
||||
It is designed to match the working of the Python tokenizer exactly, except
|
||||
that it produces COMMENT tokens for comments and gives type OP for all
|
||||
operators
|
||||
|
||||
Older entry points
|
||||
tokenize_loop(readline, tokeneater)
|
||||
tokenize(readline, tokeneater=printtoken)
|
||||
are the same, except instead of generating tokens, tokeneater is a callback
|
||||
function to which the 5 fields described above are passed as 5 arguments,
|
||||
each time a new token is found.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
__author__ = 'Ka-Ping Yee <ping@lfw.org>'
|
||||
__credits__ = \
|
||||
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
|
||||
|
||||
import string, re
|
||||
from six import PY3
|
||||
from sphinx.pycode.pgen2.token import *
|
||||
from sphinx.pycode.pgen2 import token
|
||||
|
||||
if False:
|
||||
# For type annotation
|
||||
from typing import List # NOQA
|
||||
|
||||
__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
|
||||
"generate_tokens", "untokenize"]
|
||||
del token
|
||||
|
||||
def group(*choices): return '(' + '|'.join(choices) + ')'
|
||||
def any(*choices): return group(*choices) + '*'
|
||||
def maybe(*choices): return group(*choices) + '?'
|
||||
|
||||
Whitespace = r'[ \f\t]*'
|
||||
Comment = r'#[^\r\n]*'
|
||||
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
|
||||
Name = r'[a-zA-Z_]\w*'
|
||||
|
||||
Binnumber = r'0[bB][01]*'
|
||||
Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
|
||||
Octnumber = r'0[oO]?[0-7]*[lL]?'
|
||||
Decnumber = r'[1-9]\d*[lL]?'
|
||||
Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
|
||||
Exponent = r'[eE][-+]?\d+'
|
||||
Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
|
||||
Expfloat = r'\d+' + Exponent
|
||||
Floatnumber = group(Pointfloat, Expfloat)
|
||||
Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
|
||||
Number = group(Imagnumber, Floatnumber, Intnumber)
|
||||
|
||||
# Tail end of ' string.
|
||||
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
||||
# Tail end of " string.
|
||||
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
|
||||
# Tail end of ''' string.
|
||||
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
|
||||
# Tail end of """ string.
|
||||
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
|
||||
Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""')
|
||||
# Single-line ' or " string.
|
||||
String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
|
||||
r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
|
||||
|
||||
# Because of leftmost-then-longest match semantics, be sure to put the
|
||||
# longest operators first (e.g., if = came before ==, == would get
|
||||
# recognized as two instances of =).
|
||||
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
|
||||
r"//=?", r"->",
|
||||
r"[+\-*/%&|^=<>]=?",
|
||||
r"~")
|
||||
|
||||
Bracket = '[][(){}]'
|
||||
Special = group(r'\r?\n', r'[:;.,`@]')
|
||||
if PY3:
|
||||
Ellipsis_ = r'\.{3}'
|
||||
Special = group(Ellipsis_, Special)
|
||||
Funny = group(Operator, Bracket, Special)
|
||||
|
||||
PlainToken = group(Number, Funny, String, Name)
|
||||
Token = Ignore + PlainToken
|
||||
|
||||
# First (or only) line of ' or " string.
|
||||
ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
|
||||
group("'", r'\\\r?\n'),
|
||||
r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
|
||||
group('"', r'\\\r?\n'))
|
||||
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
|
||||
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
|
||||
|
||||
tokenprog, pseudoprog, single3prog, double3prog = [
|
||||
re.compile(x) for x in (Token, PseudoToken, Single3, Double3)
|
||||
]
|
||||
endprogs = {"'": re.compile(Single), '"': re.compile(Double),
|
||||
"'''": single3prog, '"""': double3prog,
|
||||
"r'''": single3prog, 'r"""': double3prog,
|
||||
"u'''": single3prog, 'u"""': double3prog,
|
||||
"b'''": single3prog, 'b"""': double3prog,
|
||||
"ur'''": single3prog, 'ur"""': double3prog,
|
||||
"br'''": single3prog, 'br"""': double3prog,
|
||||
"R'''": single3prog, 'R"""': double3prog,
|
||||
"U'''": single3prog, 'U"""': double3prog,
|
||||
"B'''": single3prog, 'B"""': double3prog,
|
||||
"uR'''": single3prog, 'uR"""': double3prog,
|
||||
"Ur'''": single3prog, 'Ur"""': double3prog,
|
||||
"UR'''": single3prog, 'UR"""': double3prog,
|
||||
"bR'''": single3prog, 'bR"""': double3prog,
|
||||
"Br'''": single3prog, 'Br"""': double3prog,
|
||||
"BR'''": single3prog, 'BR"""': double3prog,
|
||||
'r': None, 'R': None,
|
||||
'u': None, 'U': None,
|
||||
'b': None, 'B': None}
|
||||
|
||||
triple_quoted = {}
|
||||
for t in ("'''", '"""',
|
||||
"r'''", 'r"""', "R'''", 'R"""',
|
||||
"u'''", 'u"""', "U'''", 'U"""',
|
||||
"b'''", 'b"""', "B'''", 'B"""',
|
||||
"ur'''", 'ur"""', "Ur'''", 'Ur"""',
|
||||
"uR'''", 'uR"""', "UR'''", 'UR"""',
|
||||
"br'''", 'br"""', "Br'''", 'Br"""',
|
||||
"bR'''", 'bR"""', "BR'''", 'BR"""',):
|
||||
triple_quoted[t] = t
|
||||
single_quoted = {}
|
||||
for t in ("'", '"',
|
||||
"r'", 'r"', "R'", 'R"',
|
||||
"u'", 'u"', "U'", 'U"',
|
||||
"b'", 'b"', "B'", 'B"',
|
||||
"ur'", 'ur"', "Ur'", 'Ur"',
|
||||
"uR'", 'uR"', "UR'", 'UR"',
|
||||
"br'", 'br"', "Br'", 'Br"',
|
||||
"bR'", 'bR"', "BR'", 'BR"', ):
|
||||
single_quoted[t] = t
|
||||
|
||||
tabsize = 8
|
||||
|
||||
class TokenError(Exception): pass
|
||||
|
||||
class StopTokenizing(Exception): pass
|
||||
|
||||
def printtoken(type, token, scell, ecell, line): # for testing
|
||||
srow, scol = scell
|
||||
erow, ecol = ecell
|
||||
print("%d,%d-%d,%d:\t%s\t%s" %
|
||||
(srow, scol, erow, ecol, tok_name[type], repr(token)))
|
||||
|
||||
def tokenize(readline, tokeneater=printtoken):
|
||||
"""
|
||||
The tokenize() function accepts two parameters: one representing the
|
||||
input stream, and one providing an output mechanism for tokenize().
|
||||
|
||||
The first parameter, readline, must be a callable object which provides
|
||||
the same interface as the readline() method of built-in file objects.
|
||||
Each call to the function should return one line of input as a string.
|
||||
|
||||
The second parameter, tokeneater, must also be a callable object. It is
|
||||
called once for each token, with five arguments, corresponding to the
|
||||
tuples generated by generate_tokens().
|
||||
"""
|
||||
try:
|
||||
tokenize_loop(readline, tokeneater)
|
||||
except StopTokenizing:
|
||||
pass
|
||||
|
||||
# backwards compatible interface
|
||||
def tokenize_loop(readline, tokeneater):
|
||||
for token_info in generate_tokens(readline):
|
||||
tokeneater(*token_info)
|
||||
|
||||
class Untokenizer:
|
||||
|
||||
def __init__(self):
|
||||
self.tokens = [] # type: List[unicode]
|
||||
self.prev_row = 1
|
||||
self.prev_col = 0
|
||||
|
||||
def add_whitespace(self, start):
|
||||
row, col = start
|
||||
assert row <= self.prev_row
|
||||
col_offset = col - self.prev_col
|
||||
if col_offset:
|
||||
self.tokens.append(" " * col_offset)
|
||||
|
||||
def untokenize(self, iterable):
|
||||
for t in iterable:
|
||||
if len(t) == 2:
|
||||
self.compat(t, iterable)
|
||||
break
|
||||
tok_type, token, start, end, line = t
|
||||
self.add_whitespace(start)
|
||||
self.tokens.append(token)
|
||||
self.prev_row, self.prev_col = end
|
||||
if tok_type in (NEWLINE, NL):
|
||||
self.prev_row += 1
|
||||
self.prev_col = 0
|
||||
return "".join(self.tokens)
|
||||
|
||||
def compat(self, token, iterable):
|
||||
startline = False
|
||||
indents = []
|
||||
toks_append = self.tokens.append
|
||||
toknum, tokval = token
|
||||
if toknum in (NAME, NUMBER):
|
||||
tokval += ' '
|
||||
if toknum in (NEWLINE, NL):
|
||||
startline = True
|
||||
for tok in iterable:
|
||||
toknum, tokval = tok[:2]
|
||||
|
||||
if toknum in (NAME, NUMBER):
|
||||
tokval += ' '
|
||||
|
||||
if toknum == INDENT:
|
||||
indents.append(tokval)
|
||||
continue
|
||||
elif toknum == DEDENT:
|
||||
indents.pop()
|
||||
continue
|
||||
elif toknum in (NEWLINE, NL):
|
||||
startline = True
|
||||
elif startline and indents:
|
||||
toks_append(indents[-1])
|
||||
startline = False
|
||||
toks_append(tokval)
|
||||
|
||||
def untokenize(iterable):
|
||||
"""Transform tokens back into Python source code.
|
||||
|
||||
Each element returned by the iterable must be a token sequence
|
||||
with at least two elements, a token number and token value. If
|
||||
only two tokens are passed, the resulting output is poor.
|
||||
|
||||
Round-trip invariant for full input:
|
||||
Untokenized source will match input source exactly
|
||||
|
||||
Round-trip invariant for limited intput:
|
||||
# Output text will tokenize the back to the input
|
||||
t1 = [tok[:2] for tok in generate_tokens(f.readline)]
|
||||
newcode = untokenize(t1)
|
||||
readline = iter(newcode.splitlines(1)).next
|
||||
t2 = [tok[:2] for tokin generate_tokens(readline)]
|
||||
assert t1 == t2
|
||||
"""
|
||||
ut = Untokenizer()
|
||||
return ut.untokenize(iterable)
|
||||
|
||||
def generate_tokens(readline):
|
||||
"""
|
||||
The generate_tokens() generator requires one argment, readline, which
|
||||
must be a callable object which provides the same interface as the
|
||||
readline() method of built-in file objects. Each call to the function
|
||||
should return one line of input as a string. Alternately, readline
|
||||
can be a callable function terminating with StopIteration:
|
||||
readline = open(myfile).next # Example of alternate readline
|
||||
|
||||
The generator produces 5-tuples with these members: the token type; the
|
||||
token string; a 2-tuple (srow, scol) of ints specifying the row and
|
||||
column where the token begins in the source; a 2-tuple (erow, ecol) of
|
||||
ints specifying the row and column where the token ends in the source;
|
||||
and the line on which the token was found. The line passed is the
|
||||
logical line; continuation lines are included.
|
||||
"""
|
||||
lnum = parenlev = continued = 0
|
||||
namechars, numchars = string.ascii_letters + '_', '0123456789'
|
||||
contstr, needcont = '', 0
|
||||
contline = None
|
||||
indents = [0]
|
||||
|
||||
while 1: # loop over lines in stream
|
||||
try:
|
||||
line = readline()
|
||||
except StopIteration:
|
||||
line = ''
|
||||
# if we are not at the end of the file make sure the
|
||||
# line ends with a newline because the parser depends
|
||||
# on that.
|
||||
if line:
|
||||
line = line.rstrip() + '\n'
|
||||
lnum = lnum + 1
|
||||
pos, max = 0, len(line)
|
||||
|
||||
if contstr: # continued string
|
||||
if not line:
|
||||
raise TokenError("EOF in multi-line string", strstart) # type: ignore
|
||||
endmatch = endprog.match(line) # type: ignore
|
||||
if endmatch:
|
||||
pos = end = endmatch.end(0)
|
||||
yield (STRING, contstr + line[:end],
|
||||
strstart, (lnum, end), contline + line) # type: ignore
|
||||
contstr, needcont = '', 0
|
||||
contline = None
|
||||
elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
|
||||
yield (ERRORTOKEN, contstr + line,
|
||||
strstart, (lnum, len(line)), contline) # type: ignore
|
||||
contstr = ''
|
||||
contline = None
|
||||
continue
|
||||
else:
|
||||
contstr = contstr + line
|
||||
contline = contline + line
|
||||
continue
|
||||
|
||||
elif parenlev == 0 and not continued: # new statement
|
||||
if not line: break
|
||||
column = 0
|
||||
while pos < max: # measure leading whitespace
|
||||
if line[pos] == ' ': column = column + 1
|
||||
elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
|
||||
elif line[pos] == '\f': column = 0
|
||||
else: break
|
||||
pos = pos + 1
|
||||
if pos == max: break
|
||||
|
||||
if line[pos] in '#\r\n': # skip comments or blank lines
|
||||
if line[pos] == '#':
|
||||
comment_token = line[pos:].rstrip('\r\n')
|
||||
nl_pos = pos + len(comment_token)
|
||||
yield (COMMENT, comment_token,
|
||||
(lnum, pos), (lnum, pos + len(comment_token)), line)
|
||||
yield (NL, line[nl_pos:],
|
||||
(lnum, nl_pos), (lnum, len(line)), line)
|
||||
else:
|
||||
yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
|
||||
(lnum, pos), (lnum, len(line)), line)
|
||||
continue
|
||||
|
||||
if column > indents[-1]: # count indents or dedents
|
||||
indents.append(column)
|
||||
yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
|
||||
while column < indents[-1]:
|
||||
if column not in indents:
|
||||
raise IndentationError(
|
||||
"unindent does not match any outer indentation level",
|
||||
("<tokenize>", lnum, pos, line))
|
||||
indents = indents[:-1]
|
||||
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
|
||||
|
||||
else: # continued statement
|
||||
if not line:
|
||||
raise TokenError("EOF in multi-line statement", (lnum, 0))
|
||||
continued = 0
|
||||
|
||||
while pos < max:
|
||||
pseudomatch = pseudoprog.match(line, pos)
|
||||
if pseudomatch: # scan for tokens
|
||||
start, end = pseudomatch.span(1)
|
||||
spos, epos, pos = (lnum, start), (lnum, end), end
|
||||
token, initial = line[start:end], line[start]
|
||||
|
||||
if end < max:
|
||||
next_pseudomatch = pseudoprog.match(line, end)
|
||||
if next_pseudomatch:
|
||||
n_start, n_end = next_pseudomatch.span(1)
|
||||
n_token = line[n_start:n_end]
|
||||
else:
|
||||
n_token = None
|
||||
else:
|
||||
n_token = None
|
||||
|
||||
if initial in numchars or (
|
||||
initial == '.' and token not in ('.', '...')
|
||||
): # ordinary number
|
||||
yield (NUMBER, token, spos, epos, line)
|
||||
elif initial in '\r\n':
|
||||
newline = NEWLINE
|
||||
if parenlev > 0:
|
||||
newline = NL
|
||||
yield (newline, token, spos, epos, line)
|
||||
elif initial == '#':
|
||||
assert not token.endswith("\n")
|
||||
yield (COMMENT, token, spos, epos, line)
|
||||
elif token in triple_quoted:
|
||||
endprog = endprogs[token]
|
||||
endmatch = endprog.match(line, pos)
|
||||
if endmatch: # all on one line
|
||||
pos = endmatch.end(0)
|
||||
token = line[start:pos]
|
||||
yield (STRING, token, spos, (lnum, pos), line)
|
||||
else:
|
||||
strstart = (lnum, start) # multiple lines
|
||||
contstr = line[start:]
|
||||
contline = line
|
||||
break
|
||||
elif initial in single_quoted or \
|
||||
token[:2] in single_quoted or \
|
||||
token[:3] in single_quoted:
|
||||
if token[-1] == '\n': # continued string
|
||||
strstart = (lnum, start)
|
||||
endprog = (endprogs[initial] or endprogs[token[1]] or
|
||||
endprogs[token[2]])
|
||||
contstr, needcont = line[start:], 1
|
||||
contline = line
|
||||
break
|
||||
else: # ordinary string
|
||||
yield (STRING, token, spos, epos, line)
|
||||
elif token == 'await' and n_token:
|
||||
yield (AWAIT, token, spos, epos, line)
|
||||
elif token == 'async' and n_token in ('def', 'for', 'with'):
|
||||
yield (ASYNC, token, spos, epos, line)
|
||||
elif initial in namechars: # ordinary name
|
||||
yield (NAME, token, spos, epos, line)
|
||||
elif token in ('...',): # ordinary name
|
||||
yield (NAME, token, spos, epos, line)
|
||||
elif initial == '\\': # continued stmt
|
||||
# This yield is new; needed for better idempotency:
|
||||
yield (NL, token, spos, (lnum, pos), line)
|
||||
continued = 1
|
||||
else:
|
||||
if initial in '([{': parenlev = parenlev + 1
|
||||
elif initial in ')]}': parenlev = parenlev - 1
|
||||
yield (OP, token, spos, epos, line)
|
||||
else:
|
||||
yield (ERRORTOKEN, line[pos],
|
||||
(lnum, pos), (lnum, pos+1), line)
|
||||
pos = pos + 1
|
||||
|
||||
for _ in indents[1:]: # pop remaining indent levels
|
||||
yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
|
||||
yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
|
||||
|
||||
if __name__ == '__main__': # testing
|
||||
import sys
|
||||
if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
|
||||
else: tokenize(sys.stdin.readline)
|
||||
@@ -9,8 +9,41 @@
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
|
||||
import os
|
||||
from six import PY2
|
||||
|
||||
import sphinx
|
||||
from sphinx.pycode import ModuleAnalyzer
|
||||
|
||||
SPHINX_MODULE_PATH = os.path.splitext(sphinx.__file__)[0] + '.py'
|
||||
|
||||
|
||||
def test_ModuleAnalyzer_for_string():
|
||||
analyzer = ModuleAnalyzer.for_string('print("Hello world")', 'module_name')
|
||||
assert analyzer.modname == 'module_name'
|
||||
assert analyzer.srcname == '<string>'
|
||||
if PY2:
|
||||
assert analyzer.encoding == 'ascii'
|
||||
else:
|
||||
assert analyzer.encoding is None
|
||||
|
||||
|
||||
def test_ModuleAnalyzer_for_file():
|
||||
analyzer = ModuleAnalyzer.for_string(SPHINX_MODULE_PATH, 'sphinx')
|
||||
assert analyzer.modname == 'sphinx'
|
||||
assert analyzer.srcname == '<string>'
|
||||
if PY2:
|
||||
assert analyzer.encoding == 'ascii'
|
||||
else:
|
||||
assert analyzer.encoding is None
|
||||
|
||||
|
||||
def test_ModuleAnalyzer_for_module():
|
||||
analyzer = ModuleAnalyzer.for_module('sphinx')
|
||||
assert analyzer.modname == 'sphinx'
|
||||
assert analyzer.srcname == SPHINX_MODULE_PATH
|
||||
assert analyzer.encoding == 'utf-8'
|
||||
|
||||
|
||||
def test_ModuleAnalyzer_find_tags():
|
||||
code = ('class Foo(object):\n' # line: 1
|
||||
@@ -30,20 +63,30 @@ def test_ModuleAnalyzer_find_tags():
|
||||
' """function baz"""\n'
|
||||
' pass\n'
|
||||
'\n'
|
||||
'@decorator\n'
|
||||
'@decorator1\n'
|
||||
'@decorator2\n'
|
||||
'def quux():\n'
|
||||
' pass\n')
|
||||
' pass\n' # line: 21
|
||||
'\n'
|
||||
'class Corge(object):\n'
|
||||
' @decorator1\n'
|
||||
' @decorator2\n'
|
||||
' def grault(self):\n'
|
||||
' pass\n')
|
||||
analyzer = ModuleAnalyzer.for_string(code, 'module')
|
||||
tags = analyzer.find_tags()
|
||||
assert set(tags.keys()) == {'Foo', 'Foo.__init__', 'Foo.bar',
|
||||
'Foo.Baz', 'Foo.Baz.__init__', 'qux', 'quux'}
|
||||
assert tags['Foo'] == ('class', 1, 13) # type, start, end
|
||||
assert tags['Foo.__init__'] == ('def', 3, 5)
|
||||
assert tags['Foo.bar'] == ('def', 6, 9)
|
||||
assert tags['Foo.Baz'] == ('class', 10, 13)
|
||||
assert tags['Foo.Baz.__init__'] == ('def', 11, 13)
|
||||
assert tags['qux'] == ('def', 14, 17)
|
||||
assert tags['quux'] == ('def', 18, 21) # decorator
|
||||
'Foo.Baz', 'Foo.Baz.__init__', 'qux', 'quux',
|
||||
'Corge', 'Corge.grault'}
|
||||
assert tags['Foo'] == ('class', 1, 12) # type, start, end
|
||||
assert tags['Foo.__init__'] == ('def', 3, 4)
|
||||
assert tags['Foo.bar'] == ('def', 6, 8)
|
||||
assert tags['Foo.Baz'] == ('class', 10, 12)
|
||||
assert tags['Foo.Baz.__init__'] == ('def', 11, 12)
|
||||
assert tags['qux'] == ('def', 14, 16)
|
||||
assert tags['quux'] == ('def', 18, 21)
|
||||
assert tags['Corge'] == ('class', 23, 27)
|
||||
assert tags['Corge.grault'] == ('def', 24, 27)
|
||||
|
||||
|
||||
def test_ModuleAnalyzer_find_attr_docs():
|
||||
@@ -72,13 +115,17 @@ def test_ModuleAnalyzer_find_attr_docs():
|
||||
'\n'
|
||||
'def baz():\n'
|
||||
' """function baz"""\n'
|
||||
' pass\n')
|
||||
' pass\n'
|
||||
'\n'
|
||||
'class Qux: attr1 = 1; attr2 = 2')
|
||||
analyzer = ModuleAnalyzer.for_string(code, 'module')
|
||||
docs = analyzer.find_attr_docs()
|
||||
assert set(docs) == {('Foo', 'attr1'),
|
||||
('Foo', 'attr3'),
|
||||
('Foo', 'attr4'),
|
||||
('Foo', 'attr5'),
|
||||
('Foo', 'attr6'),
|
||||
('Foo', 'attr7'),
|
||||
('Foo', 'attr8'),
|
||||
('Foo', 'attr9')}
|
||||
assert docs[('Foo', 'attr1')] == ['comment before attr1', '']
|
||||
@@ -86,5 +133,23 @@ def test_ModuleAnalyzer_find_attr_docs():
|
||||
assert docs[('Foo', 'attr4')] == ['long attribute comment', '']
|
||||
assert docs[('Foo', 'attr4')] == ['long attribute comment', '']
|
||||
assert docs[('Foo', 'attr5')] == ['attribute comment for attr5', '']
|
||||
assert docs[('Foo', 'attr6')] == ['this comment is ignored', '']
|
||||
assert docs[('Foo', 'attr7')] == ['this comment is ignored', '']
|
||||
assert docs[('Foo', 'attr8')] == ['attribute comment for attr8', '']
|
||||
assert docs[('Foo', 'attr9')] == ['string after attr9', '']
|
||||
assert analyzer.tagorder == {'Foo': 0,
|
||||
'Foo.__init__': 8,
|
||||
'Foo.attr1': 1,
|
||||
'Foo.attr2': 2,
|
||||
'Foo.attr3': 3,
|
||||
'Foo.attr4': 4,
|
||||
'Foo.attr5': 5,
|
||||
'Foo.attr6': 6,
|
||||
'Foo.attr7': 7,
|
||||
'Foo.attr8': 10,
|
||||
'Foo.attr9': 12,
|
||||
'Foo.bar': 13,
|
||||
'baz': 14,
|
||||
'Qux': 15,
|
||||
'Qux.attr1': 16,
|
||||
'Qux.attr2': 17}
|
||||
|
||||
261
tests/test_pycode_parser.py
Normal file
261
tests/test_pycode_parser.py
Normal file
@@ -0,0 +1,261 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
test_pycode_parser
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Test pycode.parser.
|
||||
|
||||
:copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
|
||||
from sphinx.pycode.parser import Parser
|
||||
|
||||
|
||||
def test_comment_picker_basic():
|
||||
source = ('a = 1 + 1 #: assignment\n'
|
||||
'b = 1 +\\\n 1 #: assignment including a CR\n'
|
||||
'c = (1 +\n 1) #: tuple \n'
|
||||
'd = {1, \n 1} #: set\n'
|
||||
'e = [1, \n 1] #: list #: additional comment\n'
|
||||
'f = "abc"\n'
|
||||
'#: string; comment on next line (ignored)\n'
|
||||
'g = 1.0\n'
|
||||
'"""float; string on next line"""\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('', 'a'): 'assignment',
|
||||
('', 'b'): 'assignment including a CR',
|
||||
('', 'c'): 'tuple ',
|
||||
('', 'd'): ' set',
|
||||
('', 'e'): 'list #: additional comment',
|
||||
('', 'g'): 'float; string on next line'}
|
||||
|
||||
|
||||
def test_comment_picker_location():
|
||||
# multiple "before" comments
|
||||
source = ('#: comment before assignment1\n'
|
||||
'#:\n'
|
||||
'#: comment before assignment2\n'
|
||||
'a = 1 + 1\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('', 'a'): ('comment before assignment1\n'
|
||||
'\n'
|
||||
'comment before assignment2')}
|
||||
|
||||
# before and after comments
|
||||
source = ('#: comment before assignment\n'
|
||||
'a = 1 + 1 #: comment after assignment\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('', 'a'): 'comment after assignment'}
|
||||
|
||||
# after comment and next line string
|
||||
source = ('a = 1 + 1\n #: comment after assignment\n'
|
||||
'"""string on next line"""\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('', 'a'): 'string on next line'}
|
||||
|
||||
# before comment and next line string
|
||||
source = ('#: comment before assignment\n'
|
||||
'a = 1 + 1\n'
|
||||
'"""string on next line"""\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('', 'a'): 'string on next line'}
|
||||
|
||||
# before comment, after comment and next line string
|
||||
source = ('#: comment before assignment\n'
|
||||
'a = 1 + 1 #: comment after assignment\n'
|
||||
'"""string on next line"""\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('', 'a'): 'string on next line'}
|
||||
|
||||
# inside __init__ method
|
||||
source = ('class Foo(object):\n'
|
||||
' def __init__(self):\n'
|
||||
' #: comment before assignment\n'
|
||||
' self.attr1 = None\n'
|
||||
' self.attr2 = None #: comment after assignment\n'
|
||||
'\n'
|
||||
' #: comment for attr3(1)\n'
|
||||
' self.attr3 = None #: comment for attr3(2)\n'
|
||||
' """comment for attr3(3)"""\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('Foo', 'attr1'): 'comment before assignment',
|
||||
('Foo', 'attr2'): 'comment after assignment',
|
||||
('Foo', 'attr3'): 'comment for attr3(3)'}
|
||||
|
||||
|
||||
def test_complex_assignment():
|
||||
source = ('a = 1 + 1; b = a #: compound statement\n'
|
||||
'c, d = (1, 1) #: unpack assignment\n'
|
||||
'e = True #: first assignment\n'
|
||||
'e = False #: second assignment\n'
|
||||
'f = g = None #: multiple assignment at once\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('', 'b'): 'compound statement',
|
||||
('', 'c'): 'unpack assignment',
|
||||
('', 'd'): 'unpack assignment',
|
||||
('', 'e'): 'second assignment',
|
||||
('', 'f'): 'multiple assignment at once',
|
||||
('', 'g'): 'multiple assignment at once'}
|
||||
assert parser.definitions == {}
|
||||
|
||||
|
||||
def test_obj_assignment():
|
||||
source = ('obj = SomeObject() #: some object\n'
|
||||
'obj.attr = 1 #: attr1\n'
|
||||
'obj.attr.attr = 1 #: attr2\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('', 'obj'): 'some object'}
|
||||
assert parser.definitions == {}
|
||||
|
||||
|
||||
def test_container_assignment():
|
||||
source = ('l = [] #: list\n'
|
||||
'l[1] = True #: list assignment\n'
|
||||
'l[0:0] = [] #: list assignment\n'
|
||||
'l[_from:_to] = [] #: list assignment\n'
|
||||
'd = {} #: dict\n'
|
||||
'd["doc"] = 1 #: dict assignment\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('', 'l'): 'list',
|
||||
('', 'd'): 'dict'}
|
||||
assert parser.definitions == {}
|
||||
|
||||
|
||||
def test_function():
|
||||
source = ('def some_function():\n'
|
||||
' """docstring"""\n'
|
||||
' a = 1 + 1 #: comment1\n'
|
||||
'\n'
|
||||
' b = a #: comment2\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {}
|
||||
assert parser.definitions == {'some_function': ('def', 1, 5)}
|
||||
assert parser.deforders == {'some_function': 0}
|
||||
|
||||
|
||||
def test_nested_function():
|
||||
source = ('def some_function():\n'
|
||||
' a = 1 + 1 #: comment1\n'
|
||||
'\n'
|
||||
' def inner_function():\n'
|
||||
' b = 1 + 1 #: comment2\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {}
|
||||
assert parser.definitions == {'some_function': ('def', 1, 5)}
|
||||
assert parser.deforders == {'some_function': 0}
|
||||
|
||||
|
||||
def test_class():
|
||||
source = ('class Foo(object):\n'
|
||||
' attr1 = None #: comment1\n'
|
||||
' attr2 = None #: comment2\n'
|
||||
'\n'
|
||||
' def __init__(self):\n'
|
||||
' self.a = 1 + 1 #: comment3\n'
|
||||
' self.attr2 = 1 + 1 #: overrided\n'
|
||||
' b = 1 + 1 #: comment5\n'
|
||||
'\n'
|
||||
' def some_method(self):\n'
|
||||
' c = 1 + 1 #: comment6\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('Foo', 'attr1'): 'comment1',
|
||||
('Foo', 'a'): 'comment3',
|
||||
('Foo', 'attr2'): 'overrided'}
|
||||
assert parser.definitions == {'Foo': ('class', 1, 11),
|
||||
'Foo.__init__': ('def', 5, 8),
|
||||
'Foo.some_method': ('def', 10, 11)}
|
||||
assert parser.deforders == {'Foo': 0,
|
||||
'Foo.attr1': 1,
|
||||
'Foo.__init__': 3,
|
||||
'Foo.a': 4,
|
||||
'Foo.attr2': 5,
|
||||
'Foo.some_method': 6}
|
||||
|
||||
|
||||
def test_class_uses_non_self():
|
||||
source = ('class Foo(object):\n'
|
||||
' def __init__(this):\n'
|
||||
' this.a = 1 + 1 #: comment\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('Foo', 'a'): 'comment'}
|
||||
assert parser.definitions == {'Foo': ('class', 1, 3),
|
||||
'Foo.__init__': ('def', 2, 3)}
|
||||
assert parser.deforders == {'Foo': 0,
|
||||
'Foo.__init__': 1,
|
||||
'Foo.a': 2}
|
||||
|
||||
|
||||
def test_nested_class():
|
||||
source = ('class Foo(object):\n'
|
||||
' attr1 = None #: comment1\n'
|
||||
'\n'
|
||||
' class Bar(object):\n'
|
||||
' attr2 = None #: comment2\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('Foo', 'attr1'): 'comment1',
|
||||
('Foo.Bar', 'attr2'): 'comment2'}
|
||||
assert parser.definitions == {'Foo': ('class', 1, 5),
|
||||
'Foo.Bar': ('class', 4, 5)}
|
||||
assert parser.deforders == {'Foo': 0,
|
||||
'Foo.attr1': 1,
|
||||
'Foo.Bar': 2,
|
||||
'Foo.Bar.attr2': 3}
|
||||
|
||||
|
||||
def test_comment_picker_multiline_string():
|
||||
source = ('class Foo(object):\n'
|
||||
' a = None\n'
|
||||
' """multiline\n'
|
||||
' docstring\n'
|
||||
' """\n'
|
||||
' b = None\n'
|
||||
' """\n'
|
||||
' docstring\n'
|
||||
' starts with::\n'
|
||||
'\n'
|
||||
' empty line"""\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.comments == {('Foo', 'a'): 'multiline\ndocstring',
|
||||
('Foo', 'b'): 'docstring\nstarts with::\n\n empty line'}
|
||||
|
||||
|
||||
def test_decorators():
|
||||
source = ('@deco\n'
|
||||
'def func1(): pass\n'
|
||||
'\n'
|
||||
'@deco(param1, param2)\n'
|
||||
'def func2(): pass\n'
|
||||
'\n'
|
||||
'@deco1\n'
|
||||
'@deco2\n'
|
||||
'def func3(): pass\n'
|
||||
'\n'
|
||||
'@deco\n'
|
||||
'class Foo():\n'
|
||||
' @deco1\n'
|
||||
' @deco2\n'
|
||||
' def method(self): pass\n')
|
||||
parser = Parser(source)
|
||||
parser.parse()
|
||||
assert parser.definitions == {'func1': ('def', 1, 2),
|
||||
'func2': ('def', 4, 5),
|
||||
'func3': ('def', 7, 9),
|
||||
'Foo': ('class', 11, 15),
|
||||
'Foo.method': ('def', 13, 15)}
|
||||
65
tests/test_util_docstrings.py
Normal file
65
tests/test_util_docstrings.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
test_util_docstrings
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Test sphinx.util.docstrings.
|
||||
|
||||
:copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
|
||||
from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
|
||||
|
||||
|
||||
def test_prepare_docstring():
|
||||
docstring = """multiline docstring
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit,
|
||||
sed do eiusmod tempor incididunt ut labore et dolore magna
|
||||
aliqua::
|
||||
|
||||
Ut enim ad minim veniam, quis nostrud exercitation
|
||||
ullamco laboris nisi ut aliquip ex ea commodo consequat.
|
||||
"""
|
||||
|
||||
assert (prepare_docstring(docstring) ==
|
||||
["multiline docstring",
|
||||
"",
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit,",
|
||||
"sed do eiusmod tempor incididunt ut labore et dolore magna",
|
||||
"aliqua::",
|
||||
"",
|
||||
" Ut enim ad minim veniam, quis nostrud exercitation",
|
||||
" ullamco laboris nisi ut aliquip ex ea commodo consequat.",
|
||||
""])
|
||||
assert (prepare_docstring(docstring, 5) ==
|
||||
["multiline docstring",
|
||||
"",
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit,",
|
||||
"sed do eiusmod tempor incididunt ut labore et dolore magna",
|
||||
"aliqua::",
|
||||
"",
|
||||
"Ut enim ad minim veniam, quis nostrud exercitation",
|
||||
" ullamco laboris nisi ut aliquip ex ea commodo consequat.",
|
||||
""])
|
||||
|
||||
docstring = """
|
||||
|
||||
multiline docstring with leading empty lines
|
||||
"""
|
||||
assert (prepare_docstring(docstring) ==
|
||||
["multiline docstring with leading empty lines",
|
||||
""])
|
||||
|
||||
docstring = "single line docstring"
|
||||
assert (prepare_docstring(docstring) ==
|
||||
["single line docstring",
|
||||
""])
|
||||
|
||||
|
||||
def test_prepare_commentdoc():
|
||||
assert prepare_commentdoc("hello world") == []
|
||||
assert prepare_commentdoc("#: hello world") == ["hello world", ""]
|
||||
assert prepare_commentdoc("#: hello world") == [" hello world", ""]
|
||||
assert prepare_commentdoc("#: hello\n#: world\n") == ["hello", "world", ""]
|
||||
@@ -20,7 +20,6 @@ Release checklist
|
||||
* Check diff by `git diff`
|
||||
* `git commit -am 'Bump to x.y.z final'`
|
||||
* `make clean`
|
||||
* `python setup.py compile_grammar`
|
||||
* `python setup.py release bdist_wheel sdist upload --identity=[your key]`
|
||||
* open https://pypi.python.org/pypi/Sphinx and check there are no obvious errors
|
||||
* `git tag x.y.z` with version number
|
||||
|
||||
Reference in New Issue
Block a user