mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
493 lines
17 KiB
Python
493 lines
17 KiB
Python
"""
|
|
sphinx.pycode.parser
|
|
~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Utilities parsing and analyzing Python code.
|
|
|
|
:copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
import ast
|
|
import inspect
|
|
import itertools
|
|
import re
|
|
import sys
|
|
import tokenize
|
|
from token import NAME, NEWLINE, INDENT, DEDENT, NUMBER, OP, STRING
|
|
from tokenize import COMMENT, NL
|
|
from typing import Any, Dict, List, Tuple
|
|
|
|
|
|
comment_re = re.compile('^\\s*#: ?(.*)\r?\n?$')
|
|
indent_re = re.compile('^\\s*$')
|
|
emptyline_re = re.compile('^\\s*(#.*)?$')
|
|
|
|
|
|
if sys.version_info >= (3, 6):
|
|
ASSIGN_NODES = (ast.Assign, ast.AnnAssign)
|
|
else:
|
|
ASSIGN_NODES = (ast.Assign)
|
|
|
|
|
|
def filter_whitespace(code: str) -> str:
|
|
return code.replace('\f', ' ') # replace FF (form feed) with whitespace
|
|
|
|
|
|
def get_assign_targets(node: ast.AST) -> List[ast.expr]:
|
|
"""Get list of targets from Assign and AnnAssign node."""
|
|
if isinstance(node, ast.Assign):
|
|
return node.targets
|
|
else:
|
|
return [node.target] # type: ignore
|
|
|
|
|
|
def get_lvar_names(node: ast.AST, self: ast.arg = None) -> List[str]:
|
|
"""Convert assignment-AST to variable names.
|
|
|
|
This raises `TypeError` if the assignment does not create new variable::
|
|
|
|
ary[0] = 'foo'
|
|
dic["bar"] = 'baz'
|
|
# => TypeError
|
|
"""
|
|
if self:
|
|
self_id = self.arg
|
|
|
|
node_name = node.__class__.__name__
|
|
if node_name in ('Index', 'Num', 'Slice', 'Str', 'Subscript'):
|
|
raise TypeError('%r does not create new variable' % node)
|
|
elif node_name == 'Name':
|
|
if self is None or node.id == self_id: # type: ignore
|
|
return [node.id] # type: ignore
|
|
else:
|
|
raise TypeError('The assignment %r is not instance variable' % node)
|
|
elif node_name in ('Tuple', 'List'):
|
|
members = []
|
|
for elt in node.elts: # type: ignore
|
|
try:
|
|
members.extend(get_lvar_names(elt, self))
|
|
except TypeError:
|
|
pass
|
|
return members
|
|
elif node_name == 'Attribute':
|
|
if node.value.__class__.__name__ == 'Name' and self and node.value.id == self_id: # type: ignore # NOQA
|
|
# instance variable
|
|
return ["%s" % get_lvar_names(node.attr, self)[0]] # type: ignore
|
|
else:
|
|
raise TypeError('The assignment %r is not instance variable' % node)
|
|
elif node_name == 'str':
|
|
return [node] # type: ignore
|
|
elif node_name == 'Starred':
|
|
return get_lvar_names(node.value, self) # type: ignore
|
|
else:
|
|
raise NotImplementedError('Unexpected node name %r' % node_name)
|
|
|
|
|
|
def dedent_docstring(s: str) -> str:
|
|
"""Remove common leading indentation from docstring."""
|
|
def dummy() -> None:
|
|
# dummy function to mock `inspect.getdoc`.
|
|
pass
|
|
|
|
dummy.__doc__ = s
|
|
docstring = inspect.getdoc(dummy)
|
|
return docstring.lstrip("\r\n").rstrip("\r\n")
|
|
|
|
|
|
class Token:
|
|
"""Better token wrapper for tokenize module."""
|
|
|
|
def __init__(self, kind: int, value: Any, start: Tuple[int, int], end: Tuple[int, int],
|
|
source: str) -> None:
|
|
self.kind = kind
|
|
self.value = value
|
|
self.start = start
|
|
self.end = end
|
|
self.source = source
|
|
|
|
def __eq__(self, other: Any) -> bool:
|
|
if isinstance(other, int):
|
|
return self.kind == other
|
|
elif isinstance(other, str):
|
|
return self.value == other
|
|
elif isinstance(other, (list, tuple)):
|
|
return [self.kind, self.value] == list(other)
|
|
elif other is None:
|
|
return False
|
|
else:
|
|
raise ValueError('Unknown value: %r' % other)
|
|
|
|
def match(self, *conditions) -> bool:
|
|
return any(self == candidate for candidate in conditions)
|
|
|
|
def __repr__(self) -> str:
|
|
return '<Token kind=%r value=%r>' % (tokenize.tok_name[self.kind],
|
|
self.value.strip())
|
|
|
|
|
|
class TokenProcessor:
|
|
def __init__(self, buffers: List[str]) -> None:
|
|
lines = iter(buffers)
|
|
self.buffers = buffers
|
|
self.tokens = tokenize.generate_tokens(lambda: next(lines))
|
|
self.current = None # type: Token
|
|
self.previous = None # type: Token
|
|
|
|
def get_line(self, lineno: int) -> str:
|
|
"""Returns specified line."""
|
|
return self.buffers[lineno - 1]
|
|
|
|
def fetch_token(self) -> Token:
|
|
"""Fetch a next token from source code.
|
|
|
|
Returns ``False`` if sequence finished.
|
|
"""
|
|
try:
|
|
self.previous = self.current
|
|
self.current = Token(*next(self.tokens))
|
|
except StopIteration:
|
|
self.current = None
|
|
|
|
return self.current
|
|
|
|
def fetch_until(self, condition: Any) -> List[Token]:
|
|
"""Fetch tokens until specified token appeared.
|
|
|
|
.. note:: This also handles parenthesis well.
|
|
"""
|
|
tokens = []
|
|
while self.fetch_token():
|
|
tokens.append(self.current)
|
|
if self.current == condition:
|
|
break
|
|
elif self.current == [OP, '(']:
|
|
tokens += self.fetch_until([OP, ')'])
|
|
elif self.current == [OP, '{']:
|
|
tokens += self.fetch_until([OP, '}'])
|
|
elif self.current == [OP, '[']:
|
|
tokens += self.fetch_until([OP, ']'])
|
|
|
|
return tokens
|
|
|
|
|
|
class AfterCommentParser(TokenProcessor):
|
|
"""Python source code parser to pick up comment after assignment.
|
|
|
|
This parser takes a python code starts with assignment statement,
|
|
and returns the comments for variable if exists.
|
|
"""
|
|
|
|
def __init__(self, lines: List[str]) -> None:
|
|
super().__init__(lines)
|
|
self.comment = None # type: str
|
|
|
|
def fetch_rvalue(self) -> List[Token]:
|
|
"""Fetch right-hand value of assignment."""
|
|
tokens = []
|
|
while self.fetch_token():
|
|
tokens.append(self.current)
|
|
if self.current == [OP, '(']:
|
|
tokens += self.fetch_until([OP, ')'])
|
|
elif self.current == [OP, '{']:
|
|
tokens += self.fetch_until([OP, '}'])
|
|
elif self.current == [OP, '[']:
|
|
tokens += self.fetch_until([OP, ']'])
|
|
elif self.current == INDENT:
|
|
tokens += self.fetch_until(DEDENT)
|
|
elif self.current == [OP, ';']:
|
|
break
|
|
elif self.current.kind not in (OP, NAME, NUMBER, STRING):
|
|
break
|
|
|
|
return tokens
|
|
|
|
def parse(self) -> None:
|
|
"""Parse the code and obtain comment after assignment."""
|
|
# skip lvalue (or whole of AnnAssign)
|
|
while not self.fetch_token().match([OP, '='], NEWLINE, COMMENT):
|
|
assert self.current
|
|
|
|
# skip rvalue (if exists)
|
|
if self.current == [OP, '=']:
|
|
self.fetch_rvalue()
|
|
|
|
if self.current == COMMENT:
|
|
self.comment = self.current.value
|
|
|
|
|
|
class VariableCommentPicker(ast.NodeVisitor):
|
|
"""Python source code parser to pick up variable comments."""
|
|
|
|
def __init__(self, buffers: List[str], encoding: str) -> None:
|
|
self.counter = itertools.count()
|
|
self.buffers = buffers
|
|
self.encoding = encoding
|
|
self.context = [] # type: List[str]
|
|
self.current_classes = [] # type: List[str]
|
|
self.current_function = None # type: ast.FunctionDef
|
|
self.comments = {} # type: Dict[Tuple[str, str], str]
|
|
self.previous = None # type: ast.AST
|
|
self.deforders = {} # type: Dict[str, int]
|
|
super().__init__()
|
|
|
|
def add_entry(self, name: str) -> None:
|
|
if self.current_function:
|
|
if self.current_classes and self.context[-1] == "__init__":
|
|
# store variable comments inside __init__ method of classes
|
|
definition = self.context[:-1] + [name]
|
|
else:
|
|
return
|
|
else:
|
|
definition = self.context + [name]
|
|
|
|
self.deforders[".".join(definition)] = next(self.counter)
|
|
|
|
def add_variable_comment(self, name: str, comment: str) -> None:
|
|
if self.current_function:
|
|
if self.current_classes and self.context[-1] == "__init__":
|
|
# store variable comments inside __init__ method of classes
|
|
context = ".".join(self.context[:-1])
|
|
else:
|
|
return
|
|
else:
|
|
context = ".".join(self.context)
|
|
|
|
self.comments[(context, name)] = comment
|
|
|
|
def get_self(self) -> ast.arg:
|
|
"""Returns the name of first argument if in function."""
|
|
if self.current_function and self.current_function.args.args:
|
|
return self.current_function.args.args[0]
|
|
else:
|
|
return None
|
|
|
|
def get_line(self, lineno: int) -> str:
|
|
"""Returns specified line."""
|
|
return self.buffers[lineno - 1]
|
|
|
|
def visit(self, node: ast.AST) -> None:
|
|
"""Updates self.previous to ."""
|
|
super().visit(node)
|
|
self.previous = node
|
|
|
|
def visit_Import(self, node: ast.Import) -> None:
|
|
"""Handles Import node and record it to definition orders."""
|
|
for name in node.names:
|
|
if name.asname:
|
|
self.add_entry(name.asname)
|
|
else:
|
|
self.add_entry(name.name)
|
|
|
|
def visit_ImportFrom(self, node: ast.Import) -> None:
|
|
"""Handles Import node and record it to definition orders."""
|
|
for name in node.names:
|
|
if name.asname:
|
|
self.add_entry(name.asname)
|
|
else:
|
|
self.add_entry(name.name)
|
|
|
|
def visit_Assign(self, node: ast.Assign) -> None:
|
|
"""Handles Assign node and pick up a variable comment."""
|
|
try:
|
|
targets = get_assign_targets(node)
|
|
varnames = sum([get_lvar_names(t, self=self.get_self()) for t in targets], []) # type: List[str] # NOQA
|
|
current_line = self.get_line(node.lineno)
|
|
except TypeError:
|
|
return # this assignment is not new definition!
|
|
|
|
# check comments after assignment
|
|
parser = AfterCommentParser([current_line[node.col_offset:]] +
|
|
self.buffers[node.lineno:])
|
|
parser.parse()
|
|
if parser.comment and comment_re.match(parser.comment):
|
|
for varname in varnames:
|
|
self.add_variable_comment(varname, comment_re.sub('\\1', parser.comment))
|
|
self.add_entry(varname)
|
|
return
|
|
|
|
# check comments before assignment
|
|
if indent_re.match(current_line[:node.col_offset]):
|
|
comment_lines = []
|
|
for i in range(node.lineno - 1):
|
|
before_line = self.get_line(node.lineno - 1 - i)
|
|
if comment_re.match(before_line):
|
|
comment_lines.append(comment_re.sub('\\1', before_line))
|
|
else:
|
|
break
|
|
|
|
if comment_lines:
|
|
comment = dedent_docstring('\n'.join(reversed(comment_lines)))
|
|
for varname in varnames:
|
|
self.add_variable_comment(varname, comment)
|
|
self.add_entry(varname)
|
|
return
|
|
|
|
# not commented (record deforders only)
|
|
for varname in varnames:
|
|
self.add_entry(varname)
|
|
|
|
def visit_AnnAssign(self, node: ast.AST) -> None: # Note: ast.AnnAssign not found in py35
|
|
"""Handles AnnAssign node and pick up a variable comment."""
|
|
self.visit_Assign(node) # type: ignore
|
|
|
|
def visit_Expr(self, node: ast.Expr) -> None:
|
|
"""Handles Expr node and pick up a comment if string."""
|
|
if (isinstance(self.previous, ASSIGN_NODES) and isinstance(node.value, ast.Str)):
|
|
try:
|
|
targets = get_assign_targets(self.previous)
|
|
varnames = get_lvar_names(targets[0], self.get_self())
|
|
for varname in varnames:
|
|
if isinstance(node.value.s, str):
|
|
docstring = node.value.s
|
|
else:
|
|
docstring = node.value.s.decode(self.encoding or 'utf-8')
|
|
|
|
self.add_variable_comment(varname, dedent_docstring(docstring))
|
|
self.add_entry(varname)
|
|
except TypeError:
|
|
pass # this assignment is not new definition!
|
|
|
|
def visit_Try(self, node: ast.Try) -> None:
|
|
"""Handles Try node and processes body and else-clause.
|
|
|
|
.. note:: pycode parser ignores objects definition in except-clause.
|
|
"""
|
|
for subnode in node.body:
|
|
self.visit(subnode)
|
|
for subnode in node.orelse:
|
|
self.visit(subnode)
|
|
|
|
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
"""Handles ClassDef node and set context."""
|
|
self.current_classes.append(node.name)
|
|
self.add_entry(node.name)
|
|
self.context.append(node.name)
|
|
self.previous = node
|
|
for child in node.body:
|
|
self.visit(child)
|
|
self.context.pop()
|
|
self.current_classes.pop()
|
|
|
|
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
"""Handles FunctionDef node and set context."""
|
|
if self.current_function is None:
|
|
self.add_entry(node.name) # should be called before setting self.current_function
|
|
self.context.append(node.name)
|
|
self.current_function = node
|
|
for child in node.body:
|
|
self.visit(child)
|
|
self.context.pop()
|
|
self.current_function = None
|
|
|
|
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
"""Handles AsyncFunctionDef node and set context."""
|
|
self.visit_FunctionDef(node) # type: ignore
|
|
|
|
|
|
class DefinitionFinder(TokenProcessor):
|
|
"""Python source code parser to detect location of functions,
|
|
classes and methods.
|
|
"""
|
|
|
|
def __init__(self, lines: List[str]) -> None:
|
|
super().__init__(lines)
|
|
self.decorator = None # type: Token
|
|
self.context = [] # type: List[str]
|
|
self.indents = [] # type: List
|
|
self.definitions = {} # type: Dict[str, Tuple[str, int, int]]
|
|
|
|
def add_definition(self, name: str, entry: Tuple[str, int, int]) -> None:
|
|
"""Add a location of definition."""
|
|
if self.indents and self.indents[-1][0] == 'def' and entry[0] == 'def':
|
|
# ignore definition of inner function
|
|
pass
|
|
else:
|
|
self.definitions[name] = entry
|
|
|
|
def parse(self) -> None:
|
|
"""Parse the code to obtain location of definitions."""
|
|
while True:
|
|
token = self.fetch_token()
|
|
if token is None:
|
|
break
|
|
elif token == COMMENT:
|
|
pass
|
|
elif token == [OP, '@'] and (self.previous is None or
|
|
self.previous.match(NEWLINE, NL, INDENT, DEDENT)):
|
|
if self.decorator is None:
|
|
self.decorator = token
|
|
elif token.match([NAME, 'class']):
|
|
self.parse_definition('class')
|
|
elif token.match([NAME, 'def']):
|
|
self.parse_definition('def')
|
|
elif token == INDENT:
|
|
self.indents.append(('other', None, None))
|
|
elif token == DEDENT:
|
|
self.finalize_block()
|
|
|
|
def parse_definition(self, typ: str) -> None:
|
|
"""Parse AST of definition."""
|
|
name = self.fetch_token()
|
|
self.context.append(name.value)
|
|
funcname = '.'.join(self.context)
|
|
|
|
if self.decorator:
|
|
start_pos = self.decorator.start[0]
|
|
self.decorator = None
|
|
else:
|
|
start_pos = name.start[0]
|
|
|
|
self.fetch_until([OP, ':'])
|
|
if self.fetch_token().match(COMMENT, NEWLINE):
|
|
self.fetch_until(INDENT)
|
|
self.indents.append((typ, funcname, start_pos))
|
|
else:
|
|
# one-liner
|
|
self.add_definition(funcname, (typ, start_pos, name.end[0]))
|
|
self.context.pop()
|
|
|
|
def finalize_block(self) -> None:
|
|
"""Finalize definition block."""
|
|
definition = self.indents.pop()
|
|
if definition[0] != 'other':
|
|
typ, funcname, start_pos = definition
|
|
end_pos = self.current.end[0] - 1
|
|
while emptyline_re.match(self.get_line(end_pos)):
|
|
end_pos -= 1
|
|
|
|
self.add_definition(funcname, (typ, start_pos, end_pos))
|
|
self.context.pop()
|
|
|
|
|
|
class Parser:
|
|
"""Python source code parser to pick up variable comments.
|
|
|
|
This is a better wrapper for ``VariableCommentPicker``.
|
|
"""
|
|
|
|
def __init__(self, code: str, encoding: str = 'utf-8') -> None:
|
|
self.code = filter_whitespace(code)
|
|
self.encoding = encoding
|
|
self.comments = {} # type: Dict[Tuple[str, str], str]
|
|
self.deforders = {} # type: Dict[str, int]
|
|
self.definitions = {} # type: Dict[str, Tuple[str, int, int]]
|
|
|
|
def parse(self) -> None:
|
|
"""Parse the source code."""
|
|
self.parse_comments()
|
|
self.parse_definition()
|
|
|
|
def parse_comments(self) -> None:
|
|
"""Parse the code and pick up comments."""
|
|
tree = ast.parse(self.code.encode())
|
|
picker = VariableCommentPicker(self.code.splitlines(True), self.encoding)
|
|
picker.visit(tree)
|
|
self.comments = picker.comments
|
|
self.deforders = picker.deforders
|
|
|
|
def parse_definition(self) -> None:
|
|
"""Parse the location of definitions from the code."""
|
|
parser = DefinitionFinder(self.code.splitlines(True))
|
|
parser.parse()
|
|
self.definitions = parser.definitions
|