mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Some speedups in pytree.
Add Cython parse.py replacement, yielding a 2x speedup in parsing.
This commit is contained in:
parent
eb66f15544
commit
eb7d654cb0
156
sphinx/pycode/pgen2/parse.pyx
Normal file
156
sphinx/pycode/pgen2/parse.pyx
Normal file
@ -0,0 +1,156 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
"""Parser engine for the grammar tables generated by pgen.
|
||||
|
||||
The grammar table must be loaded first.
|
||||
|
||||
See Parser/parser.c in the Python distribution for additional info on
|
||||
how this parsing engine works.
|
||||
|
||||
"""
|
||||
|
||||
from sphinx.pycode.pytree import Node, Leaf
|
||||
|
||||
DEF NAME = 1
|
||||
|
||||
class ParseError(Exception):
|
||||
"""Exception to signal the parser is stuck."""
|
||||
|
||||
def __init__(self, msg, type, value, context):
|
||||
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
|
||||
(msg, type, value, context))
|
||||
self.msg = msg
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.context = context
|
||||
|
||||
|
||||
cdef class Parser:
|
||||
cdef public grammar, stack, rootnode, used_names
|
||||
cdef _grammar_dfas, _grammar_labels, _grammar_keywords, _grammar_tokens
|
||||
cdef _grammar_number2symbol
|
||||
|
||||
def __init__(self, grammar, convert=None):
|
||||
self.grammar = grammar
|
||||
#self.convert = convert or noconvert
|
||||
|
||||
self._grammar_dfas = grammar.dfas
|
||||
self._grammar_labels = grammar.labels
|
||||
self._grammar_keywords = grammar.keywords
|
||||
self._grammar_tokens = grammar.tokens
|
||||
self._grammar_number2symbol = grammar.number2symbol
|
||||
|
||||
def setup(self, start=None):
|
||||
if start is None:
|
||||
start = self.grammar.start
|
||||
# Each stack entry is a tuple: (dfa, state, node).
|
||||
# A node is a tuple: (type, value, context, children),
|
||||
# where children is a list of nodes or None, and context may be None.
|
||||
newnode = (start, None, None, [])
|
||||
stackentry = (self._grammar_dfas[start], 0, newnode)
|
||||
self.stack = [stackentry]
|
||||
self.rootnode = None
|
||||
self.used_names = set() # Aliased to self.rootnode.used_names in pop()
|
||||
|
||||
def addtoken(self, type, value, context):
|
||||
"""Add a token; return True iff this is the end of the program."""
|
||||
cdef int ilabel, i, t, state, newstate
|
||||
# Map from token to label
|
||||
ilabel = self.classify(type, value, context)
|
||||
# Loop until the token is shifted; may raise exceptions
|
||||
while True:
|
||||
dfa, state, node = self.stack[-1]
|
||||
states, first = dfa
|
||||
arcs = states[state]
|
||||
# Look for a state with this label
|
||||
for i, newstate in arcs:
|
||||
t, v = self._grammar_labels[i]
|
||||
if ilabel == i:
|
||||
# Look it up in the list of labels
|
||||
## assert t < 256
|
||||
# Shift a token; we're done with it
|
||||
self.shift(type, value, newstate, context)
|
||||
# Pop while we are in an accept-only state
|
||||
state = newstate
|
||||
while states[state] == [(0, state)]:
|
||||
self.pop()
|
||||
if not self.stack:
|
||||
# Done parsing!
|
||||
return True
|
||||
dfa, state, node = self.stack[-1]
|
||||
states, first = dfa
|
||||
# Done with this token
|
||||
return False
|
||||
elif t >= 256:
|
||||
# See if it's a symbol and if we're in its first set
|
||||
itsdfa = self._grammar_dfas[t]
|
||||
itsstates, itsfirst = itsdfa
|
||||
if ilabel in itsfirst:
|
||||
# Push a symbol
|
||||
self.push(t, itsdfa, newstate, context)
|
||||
break # To continue the outer while loop
|
||||
else:
|
||||
if (0, state) in arcs:
|
||||
# An accepting state, pop it and try something else
|
||||
self.pop()
|
||||
if not self.stack:
|
||||
# Done parsing, but another token is input
|
||||
raise ParseError("too much input",
|
||||
type, value, context)
|
||||
else:
|
||||
# No success finding a transition
|
||||
raise ParseError("bad input", type, value, context)
|
||||
|
||||
cdef int classify(self, type, value, context):
|
||||
"""Turn a token into a label. (Internal)"""
|
||||
if type == NAME:
|
||||
# Keep a listing of all used names
|
||||
self.used_names.add(value)
|
||||
# Check for reserved words
|
||||
ilabel = self._grammar_keywords.get(value)
|
||||
if ilabel is not None:
|
||||
return ilabel
|
||||
ilabel = self._grammar_tokens.get(type)
|
||||
if ilabel is None:
|
||||
raise ParseError("bad token", type, value, context)
|
||||
return ilabel
|
||||
|
||||
cdef void shift(self, type, value, newstate, context):
|
||||
"""Shift a token. (Internal)"""
|
||||
dfa, state, node = self.stack[-1]
|
||||
newnode = (type, value, context, None)
|
||||
newnode = self.convert(newnode)
|
||||
if newnode is not None:
|
||||
node[-1].append(newnode)
|
||||
self.stack[-1] = (dfa, newstate, node)
|
||||
|
||||
cdef void push(self, type, newdfa, newstate, context):
|
||||
"""Push a nonterminal. (Internal)"""
|
||||
dfa, state, node = self.stack[-1]
|
||||
newnode = (type, None, context, [])
|
||||
self.stack[-1] = (dfa, newstate, node)
|
||||
self.stack.append((newdfa, 0, newnode))
|
||||
|
||||
cdef void pop(self):
|
||||
"""Pop a nonterminal. (Internal)"""
|
||||
popdfa, popstate, popnode = self.stack.pop()
|
||||
newnode = self.convert(popnode)
|
||||
if newnode is not None:
|
||||
if self.stack:
|
||||
dfa, state, node = self.stack[-1]
|
||||
node[-1].append(newnode)
|
||||
else:
|
||||
self.rootnode = newnode
|
||||
self.rootnode.used_names = self.used_names
|
||||
|
||||
cdef convert(self, raw_node):
|
||||
type, value, context, children = raw_node
|
||||
if children or type in self._grammar_number2symbol:
|
||||
# If there's exactly one child, return that child instead of
|
||||
# creating a new node.
|
||||
if len(children) == 1:
|
||||
return children[0]
|
||||
return Node(type, children, context=context)
|
||||
else:
|
||||
return Leaf(type, value, context=context)
|
@ -29,11 +29,6 @@ class Base(object):
|
||||
children = () # Tuple of subnodes
|
||||
was_changed = False
|
||||
|
||||
def __new__(cls, *args, **kwds):
|
||||
"""Constructor that prevents Base from being instantiated."""
|
||||
assert cls is not Base, "Cannot instantiate Base"
|
||||
return object.__new__(cls)
|
||||
|
||||
def __eq__(self, other):
|
||||
"""Compares two nodes for equality.
|
||||
|
||||
@ -132,7 +127,7 @@ class Node(Base):
|
||||
|
||||
"""Concrete implementation for interior nodes."""
|
||||
|
||||
def __init__(self, type, children, context=None, prefix=None):
|
||||
def __init__(self, type, children, context=None):
|
||||
"""Initializer.
|
||||
|
||||
Takes a type constant (a symbol number >= 256), a sequence of
|
||||
@ -140,14 +135,14 @@ class Node(Base):
|
||||
|
||||
As a side effect, the parent pointers of the children are updated.
|
||||
"""
|
||||
assert type >= 256, type
|
||||
# assert type >= 256, type
|
||||
self.type = type
|
||||
self.children = list(children)
|
||||
for ch in self.children:
|
||||
assert ch.parent is None, repr(ch)
|
||||
# assert ch.parent is None, repr(ch)
|
||||
ch.parent = self
|
||||
if prefix is not None:
|
||||
self.set_prefix(prefix)
|
||||
# if prefix is not None:
|
||||
# self.set_prefix(prefix)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s, %r)" % (self.__class__.__name__,
|
||||
@ -206,19 +201,19 @@ class Leaf(Base):
|
||||
lineno = 0 # Line where this token starts in the input
|
||||
column = 0 # Column where this token tarts in the input
|
||||
|
||||
def __init__(self, type, value, context=None, prefix=None):
|
||||
def __init__(self, type, value, context=None):
|
||||
"""Initializer.
|
||||
|
||||
Takes a type constant (a token number < 256), a string value,
|
||||
and an optional context keyword argument.
|
||||
"""
|
||||
assert 0 <= type < 256, type
|
||||
# assert 0 <= type < 256, type
|
||||
if context is not None:
|
||||
self.prefix, (self.lineno, self.column) = context
|
||||
self.type = type
|
||||
self.value = value
|
||||
if prefix is not None:
|
||||
self.prefix = prefix
|
||||
# if prefix is not None:
|
||||
# self.prefix = prefix
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%r, %r, %r)" % (self.__class__.__name__,
|
||||
|
Loading…
Reference in New Issue
Block a user