merge in pycode branch

This commit is contained in:
Georg Brandl 2009-01-04 20:55:01 +01:00
commit dc3679ddb5
32 changed files with 5997 additions and 161 deletions

View File

@ -1,7 +1,9 @@
.*\.pyc
.*\.egg
.*\.so
build/
dist/
sphinx/pycode/Grammar.*pickle
Sphinx.egg-info/
doc/_build/
TAGS

55
LICENSE
View File

@ -1,4 +1,4 @@
Copyright (c) 2007-2009 by the respective authors (see AUTHORS file).
Copyright (c) 2007-2009 by the Sphinx team (see AUTHORS file).
All rights reserved.
License for Sphinx
@ -31,6 +31,59 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Licenses for incorporated software
==================================
The pgen2 package, included in this distribution under the name
sphinx.pycode.pgen2, is available in the Python 2.6 distribution under
the PSF license agreement for Python:
----------------------------------------------------------------------
1. This LICENSE AGREEMENT is between the Python Software Foundation
("PSF"), and the Individual or Organization ("Licensee") accessing
and otherwise using Python 2.6 software in source or binary form
and its associated documentation.
2. Subject to the terms and conditions of this License Agreement, PSF
hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display
publicly, prepare derivative works, distribute, and otherwise use
Python 2.6 alone or in any derivative version, provided, however,
that PSF's License Agreement and PSF's notice of copyright, i.e.,
"Copyright © 2001-2008 Python Software Foundation; All Rights
Reserved" are retained in Python 2.6 alone or in any derivative
version prepared by Licensee.
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python 2.6 or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary
of the changes made to Python 2.6.
4. PSF is making Python 2.6 available to Licensee on an "AS IS" basis.
PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY
WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY
REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY
PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.6 WILL NOT INFRINGE
ANY THIRD PARTY RIGHTS.
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
2.6 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON
2.6, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY
THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between PSF
and Licensee. This License Agreement does not grant permission to
use PSF trademarks or trade name in a trademark sense to endorse or
promote products or services of Licensee, or any third party.
8. By copying, installing or otherwise using Python 2.6, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.
----------------------------------------------------------------------
The included smartypants module, included as sphinx.util.smartypants,
is available under the following license:

View File

@ -36,7 +36,7 @@ master_doc = 'contents'
# General substitutions.
project = 'Sphinx'
copyright = '2008, Georg Brandl'
copyright = '2007-2009, Georg Brandl'
# The default replacements for |version| and |release|, also used in various
# other places throughout the built documents.

View File

@ -113,8 +113,35 @@ Includes
.. literalinclude:: example.py
:encoding: latin-1
The directive also supports including only parts of the file. If it is a
Python module, you can select a class, function or method to include using
the ``pyobject`` option::
.. literalinclude:: example.py
:pyobject: Timer.start
This would only include the code lines belonging to the ``start()`` method in
the ``Timer`` class within the file.
Alternately, you can specify exactly which lines to include by giving a
``lines`` option::
.. literalinclude:: example.py
:lines: 1,3,5-10,20-
This includes the lines 1, 3, 5 to 10 and lines 20 to the last line.
Another way to control which part of the file is included is to use the
``start-after`` and ``end-before`` options (or only one of them). If
``start-after`` is given as a string option, only lines that follow the first
line containing that string are included. If ``end-before`` is given as a
string option, only lines that precede the first lines containing that string
are included.
.. versionadded:: 0.4.3
The ``encoding`` option.
.. versionadded:: 0.6
The ``pyobject``, ``lines``, ``start-after`` and ``end-before`` options.
.. rubric:: Footnotes

View File

@ -5,7 +5,7 @@
Builder superclass for all builders.
:copyright: 2007-2008 by Georg Brandl, Sebastian Wiesner, Horst Gutmann.
:copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -37,8 +37,9 @@ except ImportError:
except ImportError:
json = None
#: the filename for the inventory of objects
INVENTORY_FILENAME = 'objects.inv'
#: the filename for the "last build" file (for serializing builders)
LAST_BUILD_FILENAME = 'last_build'

View File

@ -15,6 +15,7 @@ from docutils import nodes
from docutils.parsers.rst import directives
from sphinx import addnodes
from sphinx.util import parselinenos
# ------ highlight directive --------------------------------------------------------
@ -68,32 +69,78 @@ def literalinclude_directive(name, arguments, options, content, lineno,
lineno - state_machine.input_offset - 1)))
fn = path.normpath(path.join(source_dir, rel_fn))
if 'pyobject' in options and 'lines' in options:
return [state.document.reporter.warning(
'Cannot use both "pyobject" and "lines" options', line=lineno)]
encoding = options.get('encoding', env.config.source_encoding)
try:
f = codecs.open(fn, 'r', encoding)
text = f.read()
lines = f.readlines()
f.close()
except (IOError, OSError):
retnode = state.document.reporter.warning(
'Include file %r not found or reading it failed' % arguments[0], line=lineno)
return [state.document.reporter.warning(
'Include file %r not found or reading it failed' % arguments[0],
line=lineno)]
except UnicodeError:
retnode = state.document.reporter.warning(
return [state.document.reporter.warning(
'Encoding %r used for reading included file %r seems to '
'be wrong, try giving an :encoding: option' %
(encoding, arguments[0]))
else:
retnode = nodes.literal_block(text, text, source=fn)
retnode.line = 1
if options.get('language', ''):
retnode['language'] = options['language']
if 'linenos' in options:
retnode['linenos'] = True
state.document.settings.env.note_dependency(rel_fn)
(encoding, arguments[0]))]
objectname = options.get('pyobject')
if objectname is not None:
from sphinx.pycode import ModuleAnalyzer
analyzer = ModuleAnalyzer.for_file(fn, '')
tags = analyzer.find_tags()
if objectname not in tags:
return [state.document.reporter.warning(
'Object named %r not found in include file %r' %
(objectname, arguments[0]), line=lineno)]
else:
lines = lines[tags[objectname][1] - 1 : tags[objectname][2] - 1]
linespec = options.get('lines')
if linespec is not None:
try:
linelist = parselinenos(linespec, len(lines))
except ValueError, err:
return [state.document.reporter.warning(str(err), line=lineno)]
lines = [lines[i] for i in linelist]
startafter = options.get('start-after')
endbefore = options.get('end-before')
if startafter is not None or endbefore is not None:
use = not startafter
res = []
for line in lines:
if not use and startafter in line:
use = True
elif use and endbefore in line:
use = False
break
elif use:
res.append(line)
lines = res
text = ''.join(lines)
retnode = nodes.literal_block(text, text, source=fn)
retnode.line = 1
if options.get('language', ''):
retnode['language'] = options['language']
if 'linenos' in options:
retnode['linenos'] = True
state.document.settings.env.note_dependency(rel_fn)
return [retnode]
literalinclude_directive.options = {'linenos': directives.flag,
'language': directives.unchanged,
'encoding': directives.encoding}
'language': directives.unchanged_required,
'encoding': directives.encoding,
'pyobject': directives.unchanged_required,
'lines': directives.unchanged_required,
'start-after': directives.unchanged_required,
'end-before': directives.unchanged_required,
}
literalinclude_directive.content = 0
literalinclude_directive.arguments = (1, 0, 0)
directives.register_directive('literalinclude', literalinclude_directive)

View File

@ -5,11 +5,7 @@
Global creation environment.
<<<<<<< local
:copyright: 2007-2009 by Georg Brandl.
=======
:copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
>>>>>>> other
:license: BSD, see LICENSE for details.
"""

View File

@ -21,7 +21,9 @@ from docutils import nodes
from docutils.parsers.rst import directives
from docutils.statemachine import ViewList
from sphinx.util import rpartition, nested_parse_with_titles
from sphinx.util import rpartition, nested_parse_with_titles, force_decode
from sphinx.pycode import ModuleAnalyzer, PycodeError
from sphinx.util.docstrings import prepare_docstring
clstypes = (type, ClassType)
try:
@ -29,8 +31,6 @@ try:
except NameError:
base_exception = Exception
_charset_re = re.compile(r'coding[:=]\s*([-\w.]+)')
_module_charsets = {}
py_ext_sig_re = re.compile(
r'''^ ([\w.]+::)? # explicit module name
@ -171,56 +171,6 @@ def isdescriptor(x):
return False
def prepare_docstring(s):
"""
Convert a docstring into lines of parseable reST. Return it as a list of
lines usable for inserting into a docutils ViewList (used as argument
of nested_parse().) An empty line is added to act as a separator between
this docstring and following content.
"""
lines = s.expandtabs().splitlines()
# Find minimum indentation of any non-blank lines after first line.
margin = sys.maxint
for line in lines[1:]:
content = len(line.lstrip())
if content:
indent = len(line) - content
margin = min(margin, indent)
# Remove indentation.
if lines:
lines[0] = lines[0].lstrip()
if margin < sys.maxint:
for i in range(1, len(lines)): lines[i] = lines[i][margin:]
# Remove any leading blank lines.
while lines and not lines[0]:
lines.pop(0)
# make sure there is an empty line at the end
if lines and lines[-1]:
lines.append('')
return lines
def get_module_charset(module):
"""Return the charset of the given module (cached in _module_charsets)."""
if module in _module_charsets:
return _module_charsets[module]
try:
filename = __import__(module, None, None, ['foo']).__file__
except (ImportError, AttributeError):
return None
if filename[-4:].lower() in ('.pyc', '.pyo'):
filename = filename[:-1]
for line in [linecache.getline(filename, x) for x in (1, 2)]:
match = _charset_re.search(line)
if match is not None:
charset = match.group(1)
break
else:
charset = 'ascii'
_module_charsets[module] = charset
return charset
class RstGenerator(object):
def __init__(self, options, document, lineno):
self.options = options
@ -234,15 +184,19 @@ class RstGenerator(object):
def warn(self, msg):
self.warnings.append(self.reporter.warning(msg, line=self.lineno))
def get_doc(self, what, name, obj):
"""Format and yield lines of the docstring(s) for the object."""
def get_doc(self, what, obj, encoding=None):
"""Decode and return lines of the docstring(s) for the object."""
docstrings = []
# add the regular docstring if present
if getattr(obj, '__doc__', None):
docstrings.append(obj.__doc__)
# skip some lines in module docstrings if configured
# skip some lines in module docstrings if configured (deprecated!)
if what == 'module' and self.env.config.automodule_skip_lines and docstrings:
docstrings[0] = '\n'.join(docstrings[0].splitlines()
[self.env.config.automodule_skip_lines:])
# for classes, what the "docstring" is can be controlled via an option
if what in ('class', 'exception'):
content = self.env.config.autoclass_content
@ -258,24 +212,13 @@ class RstGenerator(object):
docstrings.append(initdocstring)
# the default is only the class docstring
# decode the docstrings using the module's source encoding
charset = None
module = getattr(obj, '__module__', None)
if module is not None:
charset = get_module_charset(module)
# make sure we have Unicode docstrings, then sanitize and split into lines
return [prepare_docstring(force_decode(docstring, encoding))
for docstring in docstrings]
for docstring in docstrings:
if isinstance(docstring, str):
if charset:
docstring = docstring.decode(charset)
else:
try:
# try decoding with utf-8, should only work for real UTF-8
docstring = docstring.decode('utf-8')
except UnicodeError:
# last resort -- can't fail
docstring = docstring.decode('latin1')
docstringlines = prepare_docstring(docstring)
def process_doc(self, docstrings, what, name, obj):
"""Let the user process the docstrings."""
for docstringlines in docstrings:
if self.env.app:
# let extensions preprocess docstrings
self.env.app.emit('autodoc-process-docstring',
@ -313,7 +256,7 @@ class RstGenerator(object):
'for automodule %s' % name)
return (path or '') + base, [], None, None
elif what in ('exception', 'function', 'class'):
elif what in ('exception', 'function', 'class', 'data'):
if mod is None:
if path:
mod = path.rstrip('.')
@ -424,14 +367,8 @@ class RstGenerator(object):
# now, import the module and get object to document
try:
todoc = module = __import__(mod, None, None, ['foo'])
if hasattr(module, '__file__') and module.__file__:
modfile = module.__file__
if modfile[-4:].lower() in ('.pyc', '.pyo'):
modfile = modfile[:-1]
self.filename_set.add(modfile)
else:
modfile = None # e.g. for builtin and C modules
__import__(mod)
todoc = module = sys.modules[mod]
for part in objpath:
todoc = getattr(todoc, part)
except (ImportError, AttributeError), err:
@ -440,12 +377,26 @@ class RstGenerator(object):
(what, str(fullname), err))
return
# try to also get a source code analyzer for attribute docs
try:
analyzer = ModuleAnalyzer.for_module(mod)
except PycodeError, err:
# no source file -- e.g. for builtin and C modules
analyzer = None
else:
self.filename_set.add(analyzer.srcname)
# check __module__ of object if wanted (for members not given explicitly)
if check_module:
if hasattr(todoc, '__module__'):
if todoc.__module__ != mod:
return
# make sure that the result starts with an empty line. This is
# necessary for some situations where another directive preprocesses
# reST and no starting newline is present
self.result.append(u'', '')
# format the object's signature, if any
try:
sig = self.format_signature(what, fullname, todoc, args, retann)
@ -454,11 +405,6 @@ class RstGenerator(object):
(fullname, err))
sig = ''
# make sure that the result starts with an empty line. This is
# necessary for some situations where another directive preprocesses
# reST and no starting newline is present
self.result.append(u'', '')
# now, create the directive header
if what == 'method':
directive = get_method_type(todoc)
@ -484,13 +430,14 @@ class RstGenerator(object):
self.result.append(indent + u' :noindex:', '<autodoc>')
self.result.append(u'', '<autodoc>')
# add inheritance info, if wanted
if self.options.show_inheritance and what in ('class', 'exception'):
if len(todoc.__bases__):
bases = [b.__module__ == '__builtin__' and
u':class:`%s`' % b.__name__ or
u':class:`%s.%s`' % (b.__module__, b.__name__)
for b in todoc.__bases__]
self.result.append(indent + u' Bases: %s' % ', '.join(bases),
self.result.append(indent + _(u' Bases: %s') % ', '.join(bases),
'<autodoc>')
self.result.append(u'', '<autodoc>')
@ -498,17 +445,31 @@ class RstGenerator(object):
if what != 'module':
indent += u' '
if modfile:
sourcename = '%s:docstring of %s' % (modfile, fullname)
# add content from attribute documentation
if analyzer:
sourcename = '%s:docstring of %s' % (analyzer.srcname, fullname)
attr_docs = analyzer.find_attr_docs()
if what in ('data', 'attribute'):
key = ('.'.join(objpath[:-1]), objpath[-1])
if key in attr_docs:
no_docstring = True
docstrings = [attr_docs[key]]
for i, line in enumerate(self.process_doc(docstrings, what,
fullname, todoc)):
self.result.append(indent + line, sourcename, i)
else:
sourcename = 'docstring of %s' % fullname
attr_docs = {}
# add content from docstrings
if not no_docstring:
for i, line in enumerate(self.get_doc(what, fullname, todoc)):
encoding = analyzer and analyzer.encoding
docstrings = self.get_doc(what, todoc, encoding)
for i, line in enumerate(self.process_doc(docstrings, what,
fullname, todoc)):
self.result.append(indent + line, sourcename, i)
# add source content, if present
# add additional content (e.g. from document), if present
if add_content:
for line, src in zip(add_content.data, add_content.items):
self.result.append(indent + line, src[0], src[1])
@ -523,10 +484,10 @@ class RstGenerator(object):
if objpath:
self.env.autodoc_current_class = objpath[0]
# add members, if possible
_all = members == ['__all__']
# look for members to include
want_all_members = members == ['__all__']
members_check_module = False
if _all:
if want_all_members:
# unqualified :members: given
if what == 'module':
if hasattr(todoc, '__all__'):
@ -555,14 +516,28 @@ class RstGenerator(object):
else:
all_members = [(mname, getattr(todoc, mname)) for mname in members]
# search for members in source code too
namespace = '.'.join(objpath) # will be empty for modules
for (membername, member) in all_members:
if _all and membername.startswith('_'):
# if isattr is True, the member is documented as an attribute
isattr = False
# if content is not None, no extra content from docstrings will be added
content = None
if want_all_members and membername.startswith('_'):
# ignore members whose name starts with _ by default
skip = True
else:
# ignore undocumented members if :undoc-members: is not given
doc = getattr(member, '__doc__', None)
skip = not self.options.undoc_members and not doc
if (namespace, membername) in attr_docs:
# keep documented attributes
skip = False
isattr = True
else:
# ignore undocumented members if :undoc-members: is not given
doc = getattr(member, '__doc__', None)
skip = not self.options.undoc_members and not doc
# give the user a chance to decide whether this member should be skipped
if self.env.app:
# let extensions preprocess docstrings
@ -573,10 +548,12 @@ class RstGenerator(object):
if skip:
continue
content = None
# determine member type
if what == 'module':
if isinstance(member, (FunctionType, BuiltinFunctionType)):
memberwhat = 'function'
elif isattr:
memberwhat = 'attribute'
elif isinstance(member, clstypes):
if member.__name__ != membername:
# assume it's aliased
@ -588,10 +565,13 @@ class RstGenerator(object):
else:
memberwhat = 'class'
else:
# XXX: todo -- attribute docs
continue
else:
if isinstance(member, clstypes):
if inspect.isroutine(member):
memberwhat = 'method'
elif isattr:
memberwhat = 'attribute'
elif isinstance(member, clstypes):
if member.__name__ != membername:
# assume it's aliased
memberwhat = 'attribute'
@ -599,13 +579,11 @@ class RstGenerator(object):
source='')
else:
memberwhat = 'class'
elif inspect.isroutine(member):
memberwhat = 'method'
elif isdescriptor(member):
memberwhat = 'attribute'
else:
# XXX: todo -- attribute docs
continue
# give explicitly separated module name, so that members of inner classes
# can be documented
full_membername = mod + '::' + '.'.join(objpath + [membername])

View File

@ -5,7 +5,7 @@
Glue code for the jinja2 templating engine.
:copyright: 2008 by Sebastian Wiesner.
:copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

155
sphinx/pycode/Grammar.txt Normal file
View File

@ -0,0 +1,155 @@
# Grammar for Python
# Note: Changing the grammar specified in this file will most likely
# require corresponding changes in the parser module
# (../Modules/parsermodule.c). If you can't make the changes to
# that module yourself, please co-ordinate the required changes
# with someone who can; ask around on python-dev for help. Fred
# Drake <fdrake@acm.org> will probably be listening there.
# NOTE WELL: You should also follow all the steps listed in PEP 306,
# "How to Change Python's Grammar"
# Commands for Kees Blom's railroad program
#diagram:token NAME
#diagram:token NUMBER
#diagram:token STRING
#diagram:token NEWLINE
#diagram:token ENDMARKER
#diagram:token INDENT
#diagram:output\input python.bla
#diagram:token DEDENT
#diagram:output\textwidth 20.04cm\oddsidemargin 0.0cm\evensidemargin 0.0cm
#diagram:rules
# Start symbols for the grammar:
# file_input is a module or sequence of commands read from an input file;
# single_input is a single interactive statement;
# eval_input is the input for the eval() and input() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
file_input: (NEWLINE | stmt)* ENDMARKER
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef)
funcdef: 'def' NAME parameters ['->' test] ':' suite
parameters: '(' [typedargslist] ')'
typedargslist: ((tfpdef ['=' test] ',')*
('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname)
| tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
tname: NAME [':' test]
tfpdef: tname | '(' tfplist ')'
tfplist: tfpdef (',' tfpdef)* [',']
varargslist: ((vfpdef ['=' test] ',')*
('*' [vname] (',' vname ['=' test])* [',' '**' vname] | '**' vname)
| vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
vname: NAME
vfpdef: vname | '(' vfplist ')'
vfplist: vfpdef (',' vfpdef)* [',']
stmt: simple_stmt | compound_stmt
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | exec_stmt | assert_stmt)
expr_stmt: testlist (augassign (yield_expr|testlist) |
('=' (yield_expr|testlist))*)
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
'<<=' | '>>=' | '**=' | '//=')
# For normal assignments, additional restrictions enforced by the interpreter
print_stmt: 'print' ( [ test (',' test)* [','] ] |
'>>' test [ (',' test)+ [','] ] )
del_stmt: 'del' exprlist
pass_stmt: 'pass'
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
break_stmt: 'break'
continue_stmt: 'continue'
return_stmt: 'return' [testlist]
yield_stmt: yield_expr
raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]]
import_stmt: import_name | import_from
import_name: 'import' dotted_as_names
import_from: ('from' ('.'* dotted_name | '.'+)
'import' ('*' | '(' import_as_names ')' | import_as_names))
import_as_name: NAME ['as' NAME]
dotted_as_name: dotted_name ['as' NAME]
import_as_names: import_as_name (',' import_as_name)* [',']
dotted_as_names: dotted_as_name (',' dotted_as_name)*
dotted_name: NAME ('.' NAME)*
global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
exec_stmt: 'exec' expr ['in' test [',' test]]
assert_stmt: 'assert' test [',' test]
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
while_stmt: 'while' test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
try_stmt: ('try' ':' suite
((except_clause ':' suite)+
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' test [ with_var ] ':' suite
with_var: 'as' expr
# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test [(',' | 'as') test]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
# Backward compatibility cruft to support:
# [ x for x in lambda: True, lambda: False if x() ]
# even while also allowing:
# lambda x: 5 if x else 2
# (But not a mix of the two)
testlist_safe: old_test [(',' old_test)+ [',']]
old_test: or_test | old_lambdef
old_lambdef: 'lambda' [varargslist] ':' old_test
test: or_test ['if' or_test 'else' test] | lambdef
or_test: and_test ('or' and_test)*
and_test: not_test ('and' not_test)*
not_test: 'not' not_test | comparison
comparison: expr (comp_op expr)*
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
expr: xor_expr ('|' xor_expr)*
xor_expr: and_expr ('^' and_expr)*
and_expr: shift_expr ('&' shift_expr)*
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
arith_expr: term (('+'|'-') term)*
term: factor (('*'|'/'|'%'|'//') factor)*
factor: ('+'|'-'|'~') factor | power
power: atom trailer* ['**' factor]
atom: ('(' [yield_expr|testlist_gexp] ')' |
'[' [listmaker] ']' |
'{' [dictsetmaker] '}' |
'`' testlist1 '`' |
NAME | NUMBER | STRING+ | '.' '.' '.')
listmaker: test ( comp_for | (',' test)* [','] )
testlist_gexp: test ( comp_for | (',' test)* [','] )
lambdef: 'lambda' [varargslist] ':' test
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: expr (',' expr)* [',']
testlist: test (',' test)* [',']
dictsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
(test (comp_for | (',' test)* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
arglist: (argument ',')* (argument [',']
|'*' test (',' argument)* [',' '**' test]
|'**' test)
argument: test [comp_for] | test '=' test # Really [keyword '='] test
comp_iter: comp_for | comp_if
comp_for: 'for' exprlist 'in' testlist_safe [comp_iter]
comp_if: 'if' old_test [comp_iter]
testlist1: test (',' test)*
# not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: NAME
yield_expr: 'yield' [testlist]

303
sphinx/pycode/__init__.py Normal file
View File

@ -0,0 +1,303 @@
# -*- coding: utf-8 -*-
"""
sphinx.pycode
~~~~~~~~~~~~~
Utilities parsing and analyzing Python code.
:copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
import sys
from os import path
from cStringIO import StringIO
from sphinx.pycode import nodes
from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
# load the Python grammar
_grammarfile = path.join(path.dirname(__file__), 'Grammar.txt')
pygrammar = driver.load_grammar(_grammarfile)
pydriver = driver.Driver(pygrammar, convert=nodes.convert)
# an object with attributes corresponding to token and symbol names
class sym: pass
for k, v in pygrammar.symbol2number.iteritems():
setattr(sym, k, v)
for k, v in token.tok_name.iteritems():
setattr(sym, v, k)
# a dict mapping terminal and nonterminal numbers to their names
number2name = pygrammar.number2symbol.copy()
number2name.update(token.tok_name)
# a regex to recognize coding cookies
_coding_re = re.compile(r'coding[:=]\s*([-\w.]+)')
_eq = nodes.Leaf(token.EQUAL, '=')
class AttrDocVisitor(nodes.NodeVisitor):
"""
Visitor that collects docstrings for attribute assignments on toplevel and
in classes.
The docstrings can either be in special '#:' comments before the assignment
or in a docstring after it.
"""
def init(self, scope, encoding):
self.scope = scope
self.encoding = encoding
self.namespace = []
self.collected = {}
def visit_classdef(self, node):
self.namespace.append(node[1].value)
self.generic_visit(node)
self.namespace.pop()
def visit_expr_stmt(self, node):
"""Visit an assignment which may have a special comment before it."""
if _eq not in node.children:
# not an assignment (we don't care for augmented assignments)
return
pnode = node[0]
prefix = pnode.get_prefix()
# if the assignment is the first statement on a new indentation
# level, its preceding whitespace and comments are not assigned
# to that token, but the first INDENT or DEDENT token
while not prefix:
pnode = pnode.get_prev_leaf()
if not pnode or pnode.type not in (token.INDENT, token.DEDENT):
break
prefix = pnode.get_prefix()
prefix = prefix.decode(self.encoding)
docstring = prepare_commentdoc(prefix)
if docstring:
self.add_docstring(node, docstring)
def visit_simple_stmt(self, node):
"""Visit a docstring statement which may have an assignment before."""
if node[0].type != token.STRING:
# not a docstring; but still need to visit children
return self.generic_visit(node)
prev = node.get_prev_sibling()
if not prev:
return
if prev.type == sym.simple_stmt and \
prev[0].type == sym.expr_stmt and _eq in prev[0].children:
# need to "eval" the string because it's returned in its original form
docstring = literals.evalString(node[0].value, self.encoding)
docstring = prepare_docstring(docstring)
self.add_docstring(prev[0], docstring)
def visit_funcdef(self, node):
# don't descend into functions -- nothing interesting there
return
def add_docstring(self, node, docstring):
# add an item for each assignment target
for i in range(0, len(node) - 1, 2):
target = node[i]
if target.type != token.NAME:
# don't care about complex targets
continue
namespace = '.'.join(self.namespace)
if namespace.startswith(self.scope):
self.collected[namespace, target.value] = docstring
class PycodeError(Exception):
def __str__(self):
res = self.args[0]
if len(self.args) > 1:
res += ' (exception was: %r)' % self.args[1]
return res
class ModuleAnalyzer(object):
# cache for analyzer objects -- caches both by module and file name
cache = {}
@classmethod
def for_string(cls, string, modname, srcname='<string>'):
return cls(StringIO(string), modname, srcname)
@classmethod
def for_file(cls, filename, modname):
if ('file', filename) in cls.cache:
return cls.cache['file', filename]
try:
fileobj = open(filename, 'r')
except Exception, err:
raise PycodeError('error opening %r' % filename, err)
obj = cls(fileobj, modname, filename)
cls.cache['file', filename] = obj
return obj
@classmethod
def for_module(cls, modname):
if ('module', modname) in cls.cache:
entry = cls.cache['module', modname]
if isinstance(entry, PycodeError):
raise entry
return entry
try:
if modname not in sys.modules:
try:
__import__(modname)
except ImportError, err:
raise PycodeError('error importing %r' % modname, err)
mod = sys.modules[modname]
if hasattr(mod, '__loader__'):
try:
source = mod.__loader__.get_source(modname)
except Exception, err:
raise PycodeError('error getting source for %r' % modname, err)
obj = cls.for_string(source, modname)
cls.cache['module', modname] = obj
return obj
filename = getattr(mod, '__file__', None)
if filename is None:
raise PycodeError('no source found for module %r' % modname)
filename = path.normpath(filename)
lfilename = filename.lower()
if lfilename.endswith('.pyo') or lfilename.endswith('.pyc'):
filename = filename[:-1]
elif not lfilename.endswith('.py'):
raise PycodeError('source is not a .py file: %r' % filename)
if not path.isfile(filename):
raise PycodeError('source file is not present: %r' % filename)
obj = cls.for_file(filename, modname)
except PycodeError, err:
cls.cache['module', modname] = err
raise
cls.cache['module', modname] = obj
return obj
def __init__(self, source, modname, srcname):
# name of the module
self.modname = modname
# name of the source file
self.srcname = srcname
# file-like object yielding source lines
self.source = source
# will be filled by tokenize()
self.tokens = None
# will be filled by parse()
self.parsetree = None
# will be filled by find_attr_docs()
self.attr_docs = None
# will be filled by find_tags()
self.tags = None
def tokenize(self):
"""Generate tokens from the source."""
if self.tokens is not None:
return
self.tokens = list(tokenize.generate_tokens(self.source.readline))
self.source.close()
def parse(self):
"""Parse the generated source tokens."""
if self.parsetree is not None:
return
self.tokenize()
self.parsetree = pydriver.parse_tokens(self.tokens)
# find the source code encoding
encoding = sys.getdefaultencoding()
comments = self.parsetree.get_prefix()
for line in comments.splitlines()[:2]:
match = _coding_re.search(line)
if match is not None:
encoding = match.group(1)
break
self.encoding = encoding
def find_attr_docs(self, scope=''):
"""Find class and module-level attributes and their documentation."""
if self.attr_docs is not None:
return self.attr_docs
self.parse()
attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)
attr_visitor.visit(self.parsetree)
self.attr_docs = attr_visitor.collected
return attr_visitor.collected
def find_tags(self):
"""Find class, function and method definitions and their location."""
if self.tags is not None:
return self.tags
self.tokenize()
result = {}
namespace = []
stack = []
indent = 0
defline = False
expect_indent = False
def tokeniter(ignore = (token.COMMENT, token.NL)):
for tokentup in self.tokens:
if tokentup[0] not in ignore:
yield tokentup
tokeniter = tokeniter()
for type, tok, spos, epos, line in tokeniter:
if expect_indent:
if type != token.INDENT:
# no suite -- one-line definition
assert stack
dtype, fullname, startline, _ = stack.pop()
endline = epos[0]
namespace.pop()
result[fullname] = (dtype, startline, endline)
expect_indent = False
if tok in ('def', 'class'):
name = tokeniter.next()[1]
namespace.append(name)
fullname = '.'.join(namespace)
stack.append((tok, fullname, spos[0], indent))
defline = True
elif type == token.INDENT:
expect_indent = False
indent += 1
elif type == token.DEDENT:
indent -= 1
# if the stacklevel is the same as it was before the last
# def/class block, this dedent closes that block
if stack and indent == stack[-1][3]:
dtype, fullname, startline, _ = stack.pop()
endline = spos[0]
namespace.pop()
result[fullname] = (dtype, startline, endline)
elif type == token.NEWLINE:
# if this line contained a definition, expect an INDENT to start the
# suite; if there is no such INDENT it's a one-line definition
if defline:
defline = False
expect_indent = True
self.tags = result
return result
if __name__ == '__main__':
import time, pprint
x0 = time.time()
#ma = ModuleAnalyzer.for_file(__file__.rstrip('c'), 'sphinx.builders.html')
ma = ModuleAnalyzer.for_file('sphinx/builders/html.py', 'sphinx.builders.html')
ma.tokenize()
x1 = time.time()
ma.parse()
x2 = time.time()
#for (ns, name), doc in ma.find_attr_docs().iteritems():
# print '>>', ns, name
# print '\n'.join(doc)
pprint.pprint(ma.find_tags())
x3 = time.time()
#print nodes.nice_repr(ma.parsetree, number2name)
print "tokenizing %.4f, parsing %.4f, finding %.4f" % (x1-x0, x2-x1, x3-x2)

202
sphinx/pycode/nodes.py Normal file
View File

@ -0,0 +1,202 @@
# -*- coding: utf-8 -*-
"""
sphinx.pycode.nodes
~~~~~~~~~~~~~~~~~~~
Parse tree node implementations.
:copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
class BaseNode(object):
"""
Node superclass for both terminal and nonterminal nodes.
"""
def _eq(self, other):
raise NotImplementedError
def __eq__(self, other):
if self.__class__ is not other.__class__:
return NotImplemented
return self._eq(other)
def __ne__(self, other):
if self.__class__ is not other.__class__:
return NotImplemented
return not self._eq(other)
def get_prev_sibling(self):
"""Return previous child in parent's children, or None."""
if self.parent is None:
return None
for i, child in enumerate(self.parent.children):
if child is self:
if i == 0:
return None
return self.parent.children[i-1]
def get_next_sibling(self):
"""Return next child in parent's children, or None."""
if self.parent is None:
return None
for i, child in enumerate(self.parent.children):
if child is self:
try:
return self.parent.children[i+1]
except IndexError:
return None
def get_prev_leaf(self):
"""Return the leaf node that precedes this node in the parse tree."""
def last_child(node):
if isinstance(node, Leaf):
return node
elif not node.children:
return None
else:
return last_child(node.children[-1])
if self.parent is None:
return None
prev = self.get_prev_sibling()
if isinstance(prev, Leaf):
return prev
elif prev is not None:
return last_child(prev)
return self.parent.get_prev_leaf()
def get_next_leaf(self):
"""Return self if leaf, otherwise the leaf node that succeeds this
node in the parse tree.
"""
node = self
while not isinstance(node, Leaf):
assert node.children
node = node.children[0]
return node
def get_lineno(self):
"""Return the line number which generated the invocant node."""
return self.get_next_leaf().lineno
def get_prefix(self):
"""Return the prefix of the next leaf node."""
# only leaves carry a prefix
return self.get_next_leaf().prefix
class Node(BaseNode):
"""
Node implementation for nonterminals.
"""
def __init__(self, type, children, context=None):
# type of nonterminals is >= 256
# assert type >= 256, type
self.type = type
self.children = list(children)
for ch in self.children:
# assert ch.parent is None, repr(ch)
ch.parent = self
def __repr__(self):
return '%s(%s, %r)' % (self.__class__.__name__, self.type, self.children)
def __str__(self):
"""This reproduces the input source exactly."""
return ''.join(map(str, self.children))
def _eq(self, other):
return (self.type, self.children) == (other.type, other.children)
# support indexing the node directly instead of .children
def __getitem__(self, index):
return self.children[index]
def __iter__(self):
return iter(self.children)
def __len__(self):
return len(self.children)
class Leaf(BaseNode):
"""
Node implementation for leaf nodes (terminals).
"""
prefix = '' # Whitespace and comments preceding this token in the input
lineno = 0 # Line where this token starts in the input
column = 0 # Column where this token tarts in the input
def __init__(self, type, value, context=None):
# type of terminals is below 256
# assert 0 <= type < 256, type
self.type = type
self.value = value
if context is not None:
self.prefix, (self.lineno, self.column) = context
def __repr__(self):
return '%s(%r, %r, %r)' % (self.__class__.__name__,
self.type, self.value, self.prefix)
def __str__(self):
"""This reproduces the input source exactly."""
return self.prefix + str(self.value)
def _eq(self, other):
"""Compares two nodes for equality."""
return (self.type, self.value) == (other.type, other.value)
def convert(grammar, raw_node):
"""Convert raw node to a Node or Leaf instance."""
type, value, context, children = raw_node
if children or type in grammar.number2symbol:
# If there's exactly one child, return that child instead of
# creating a new node.
if len(children) == 1:
return children[0]
return Node(type, children, context=context)
else:
return Leaf(type, value, context=context)
def nice_repr(node, number2name, prefix=False):
def _repr(node):
if isinstance(node, Leaf):
return "%s(%r)" % (number2name[node.type], node.value)
else:
return "%s(%s)" % (number2name[node.type],
', '.join(map(_repr, node.children)))
def _prepr(node):
if isinstance(node, Leaf):
return "%s(%r, %r)" % (number2name[node.type], node.prefix, node.value)
else:
return "%s(%s)" % (number2name[node.type],
', '.join(map(_prepr, node.children)))
return (prefix and _prepr or _repr)(node)
class NodeVisitor(object):
def __init__(self, number2name, *args):
self.number2name = number2name
self.init(*args)
def init(self, *args):
pass
def visit(self, node):
"""Visit a node."""
method = 'visit_' + self.number2name[node.type]
visitor = getattr(self, method, self.generic_visit)
return visitor(node)
def generic_visit(self, node):
"""Called if no explicit visitor function exists for a node."""
if isinstance(node, Node):
for child in node:
self.visit(child)

View File

@ -0,0 +1,4 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""The pgen2 package."""

View File

@ -0,0 +1,145 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Modifications:
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Parser driver.
This provides a high-level interface to parse a file into a syntax tree.
"""
__author__ = "Guido van Rossum <guido@python.org>"
__all__ = ["Driver", "load_grammar"]
# Python imports
import os
import logging
import sys
# Pgen imports
from sphinx.pycode.pgen2 import grammar, parse, token, tokenize, pgen
class Driver(object):
def __init__(self, grammar, convert=None, logger=None):
self.grammar = grammar
if logger is None:
logger = logging.getLogger()
self.logger = logger
self.convert = convert
def parse_tokens(self, tokens, debug=False):
"""Parse a series of tokens and return the syntax tree."""
# XXX Move the prefix computation into a wrapper around tokenize.
p = parse.Parser(self.grammar, self.convert)
p.setup()
lineno = 1
column = 0
type = value = start = end = line_text = None
prefix = ""
opmap = grammar.opmap
for type, value, start, end, line_text in tokens:
if start != (lineno, column):
assert (lineno, column) <= start, ((lineno, column), start)
s_lineno, s_column = start
if lineno < s_lineno:
prefix += "\n" * (s_lineno - lineno)
lineno = s_lineno
column = 0
if column < s_column:
prefix += line_text[column:s_column]
column = s_column
if type in (tokenize.COMMENT, tokenize.NL):
prefix += value
lineno, column = end
if value.endswith("\n"):
lineno += 1
column = 0
continue
if type == token.OP:
type = opmap[value]
# if debug:
# self.logger.debug("%s %r (prefix=%r)",
# token.tok_name[type], value, prefix)
if p.addtoken(type, value, (prefix, start)):
# if debug:
# self.logger.debug("Stop.")
break
prefix = ""
lineno, column = end
if value.endswith("\n"):
lineno += 1
column = 0
else:
# We never broke out -- EOF is too soon (how can this happen???)
raise parse.ParseError("incomplete input", type, value, line_text)
return p.rootnode
def parse_stream_raw(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
tokens = tokenize.generate_tokens(stream.readline)
return self.parse_tokens(tokens, debug)
def parse_stream(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
return self.parse_stream_raw(stream, debug)
def parse_file(self, filename, debug=False):
"""Parse a file and return the syntax tree."""
stream = open(filename)
try:
return self.parse_stream(stream, debug)
finally:
stream.close()
def parse_string(self, text, debug=False):
"""Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens(generate_lines(text).next)
return self.parse_tokens(tokens, debug)
def generate_lines(text):
"""Generator that behaves like readline without using StringIO."""
for line in text.splitlines(True):
yield line
while True:
yield ""
def load_grammar(gt="Grammar.txt", gp=None,
save=True, force=False, logger=None):
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger()
if gp is None:
head, tail = os.path.splitext(gt)
if tail == ".txt":
tail = ""
gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
if force or not _newer(gp, gt):
logger.info("Generating grammar tables from %s", gt)
g = pgen.generate_grammar(gt)
if save:
logger.info("Writing grammar tables to %s", gp)
try:
g.dump(gp)
except IOError, e:
logger.info("Writing failed:"+str(e))
else:
g = grammar.Grammar()
g.load(gp)
return g
def _newer(a, b):
"""Inquire whether file a was written since file b."""
if not os.path.exists(a):
return False
if not os.path.exists(b):
return True
return os.path.getmtime(a) >= os.path.getmtime(b)

View File

@ -0,0 +1,171 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""This module defines the data structures used to represent a grammar.
These are a bit arcane because they are derived from the data
structures used by Python's 'pgen' parser generator.
There's also a table here mapping operators to their names in the
token module; the Python tokenize module reports all operators as the
fallback token code OP, but the parser needs the actual token code.
"""
# Python imports
import pickle
# Local imports
from sphinx.pycode.pgen2 import token, tokenize
class Grammar(object):
"""Pgen parsing tables tables conversion class.
Once initialized, this class supplies the grammar tables for the
parsing engine implemented by parse.py. The parsing engine
accesses the instance variables directly. The class here does not
provide initialization of the tables; several subclasses exist to
do this (see the conv and pgen modules).
The load() method reads the tables from a pickle file, which is
much faster than the other ways offered by subclasses. The pickle
file is written by calling dump() (after loading the grammar
tables using a subclass). The report() method prints a readable
representation of the tables to stdout, for debugging.
The instance variables are as follows:
symbol2number -- a dict mapping symbol names to numbers. Symbol
numbers are always 256 or higher, to distinguish
them from token numbers, which are between 0 and
255 (inclusive).
number2symbol -- a dict mapping numbers to symbol names;
these two are each other's inverse.
states -- a list of DFAs, where each DFA is a list of
states, each state is is a list of arcs, and each
arc is a (i, j) pair where i is a label and j is
a state number. The DFA number is the index into
this list. (This name is slightly confusing.)
Final states are represented by a special arc of
the form (0, j) where j is its own state number.
dfas -- a dict mapping symbol numbers to (DFA, first)
pairs, where DFA is an item from the states list
above, and first is a set of tokens that can
begin this grammar rule (represented by a dict
whose values are always 1).
labels -- a list of (x, y) pairs where x is either a token
number or a symbol number, and y is either None
or a string; the strings are keywords. The label
number is the index in this list; label numbers
are used to mark state transitions (arcs) in the
DFAs.
start -- the number of the grammar's start symbol.
keywords -- a dict mapping keyword strings to arc labels.
tokens -- a dict mapping token numbers to arc labels.
"""
def __init__(self):
self.symbol2number = {}
self.number2symbol = {}
self.states = []
self.dfas = {}
self.labels = [(0, "EMPTY")]
self.keywords = {}
self.tokens = {}
self.symbol2label = {}
self.start = 256
def dump(self, filename):
"""Dump the grammar tables to a pickle file."""
f = open(filename, "wb")
pickle.dump(self.__dict__, f, 2)
f.close()
def load(self, filename):
"""Load the grammar tables from a pickle file."""
f = open(filename, "rb")
d = pickle.load(f)
f.close()
self.__dict__.update(d)
def report(self):
"""Dump the grammar tables to standard output, for debugging."""
from pprint import pprint
print "s2n"
pprint(self.symbol2number)
print "n2s"
pprint(self.number2symbol)
print "states"
pprint(self.states)
print "dfas"
pprint(self.dfas)
print "labels"
pprint(self.labels)
print "start", self.start
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """
( LPAR
) RPAR
[ LSQB
] RSQB
: COLON
, COMMA
; SEMI
+ PLUS
- MINUS
* STAR
/ SLASH
| VBAR
& AMPER
< LESS
> GREATER
= EQUAL
. DOT
% PERCENT
` BACKQUOTE
{ LBRACE
} RBRACE
@ AT
== EQEQUAL
!= NOTEQUAL
<> NOTEQUAL
<= LESSEQUAL
>= GREATEREQUAL
~ TILDE
^ CIRCUMFLEX
<< LEFTSHIFT
>> RIGHTSHIFT
** DOUBLESTAR
+= PLUSEQUAL
-= MINEQUAL
*= STAREQUAL
/= SLASHEQUAL
%= PERCENTEQUAL
&= AMPEREQUAL
|= VBAREQUAL
^= CIRCUMFLEXEQUAL
<<= LEFTSHIFTEQUAL
>>= RIGHTSHIFTEQUAL
**= DOUBLESTAREQUAL
// DOUBLESLASH
//= DOUBLESLASHEQUAL
-> RARROW
"""
opmap = {}
for line in opmap_raw.splitlines():
if line:
op, name = line.split()
opmap[op] = getattr(token, name)

View File

@ -0,0 +1,96 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Extended to handle raw and unicode literals by Georg Brandl.
"""Safely evaluate Python string literals without using eval()."""
import re
simple_escapes = {"a": "\a",
"b": "\b",
"f": "\f",
"n": "\n",
"r": "\r",
"t": "\t",
"v": "\v",
"'": "'",
'"': '"',
"\\": "\\"}
def convert_hex(x, n):
if len(x) < n+1:
raise ValueError("invalid hex string escape ('\\%s')" % x)
try:
return int(x[1:], 16)
except ValueError:
raise ValueError("invalid hex string escape ('\\%s')" % x)
def escape(m):
all, tail = m.group(0, 1)
assert all.startswith("\\")
esc = simple_escapes.get(tail)
if esc is not None:
return esc
elif tail.startswith("x"):
return chr(convert_hex(tail, 2))
elif tail.startswith('u'):
return unichr(convert_hex(tail, 4))
elif tail.startswith('U'):
return unichr(convert_hex(tail, 8))
elif tail.startswith('N'):
import unicodedata
try:
return unicodedata.lookup(tail[1:-1])
except KeyError:
raise ValueError("undefined character name %r" % tail[1:-1])
else:
try:
return chr(int(tail, 8))
except ValueError:
raise ValueError("invalid octal string escape ('\\%s')" % tail)
def escaperaw(m):
all, tail = m.group(0, 1)
if tail.startswith('u'):
return unichr(convert_hex(tail, 4))
elif tail.startswith('U'):
return unichr(convert_hex(tail, 8))
else:
return all
escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})")
uni_escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3}|"
r"u[0-9a-fA-F]{0,4}|U[0-9a-fA-F]{0,8}|N\{.+?\})")
def evalString(s, encoding=None):
regex = escape_re
repl = escape
if encoding:
s = s.decode(encoding)
if s.startswith('u') or s.startswith('U'):
regex = uni_escape_re
s = s[1:]
if s.startswith('r') or s.startswith('R'):
repl = escaperaw
s = s[1:]
assert s.startswith("'") or s.startswith('"'), repr(s[:1])
q = s[0]
if s[:3] == q*3:
q = q*3
assert s.endswith(q), repr(s[-len(q):])
assert len(s) >= 2*len(q)
s = s[len(q):-len(q)]
return regex.sub(repl, s)
def test():
for i in range(256):
c = chr(i)
s = repr(c)
e = evalString(s)
if e != c:
print i, c, s, e
if __name__ == "__main__":
test()

3261
sphinx/pycode/pgen2/parse.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,201 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Parser engine for the grammar tables generated by pgen.
The grammar table must be loaded first.
See Parser/parser.c in the Python distribution for additional info on
how this parsing engine works.
"""
# Local imports
from sphinx.pycode.pgen2 import token
class ParseError(Exception):
"""Exception to signal the parser is stuck."""
def __init__(self, msg, type, value, context):
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
(msg, type, value, context))
self.msg = msg
self.type = type
self.value = value
self.context = context
class Parser(object):
"""Parser engine.
The proper usage sequence is:
p = Parser(grammar, [converter]) # create instance
p.setup([start]) # prepare for parsing
<for each input token>:
if p.addtoken(...): # parse a token; may raise ParseError
break
root = p.rootnode # root of abstract syntax tree
A Parser instance may be reused by calling setup() repeatedly.
A Parser instance contains state pertaining to the current token
sequence, and should not be used concurrently by different threads
to parse separate token sequences.
See driver.py for how to get input tokens by tokenizing a file or
string.
Parsing is complete when addtoken() returns True; the root of the
abstract syntax tree can then be retrieved from the rootnode
instance variable. When a syntax error occurs, addtoken() raises
the ParseError exception. There is no error recovery; the parser
cannot be used after a syntax error was reported (but it can be
reinitialized by calling setup()).
"""
def __init__(self, grammar, convert=None):
"""Constructor.
The grammar argument is a grammar.Grammar instance; see the
grammar module for more information.
The parser is not ready yet for parsing; you must call the
setup() method to get it started.
The optional convert argument is a function mapping concrete
syntax tree nodes to abstract syntax tree nodes. If not
given, no conversion is done and the syntax tree produced is
the concrete syntax tree. If given, it must be a function of
two arguments, the first being the grammar (a grammar.Grammar
instance), and the second being the concrete syntax tree node
to be converted. The syntax tree is converted from the bottom
up.
A concrete syntax tree node is a (type, value, context, nodes)
tuple, where type is the node type (a token or symbol number),
value is None for symbols and a string for tokens, context is
None or an opaque value used for error reporting (typically a
(lineno, offset) pair), and nodes is a list of children for
symbols, and None for tokens.
An abstract syntax tree node may be anything; this is entirely
up to the converter function.
"""
self.grammar = grammar
self.convert = convert or (lambda grammar, node: node)
def setup(self, start=None):
"""Prepare for parsing.
This *must* be called before starting to parse.
The optional argument is an alternative start symbol; it
defaults to the grammar's start symbol.
You can use a Parser instance to parse any number of programs;
each time you call setup() the parser is reset to an initial
state determined by the (implicit or explicit) start symbol.
"""
if start is None:
start = self.grammar.start
# Each stack entry is a tuple: (dfa, state, node).
# A node is a tuple: (type, value, context, children),
# where children is a list of nodes or None, and context may be None.
newnode = (start, None, None, [])
stackentry = (self.grammar.dfas[start], 0, newnode)
self.stack = [stackentry]
self.rootnode = None
self.used_names = set() # Aliased to self.rootnode.used_names in pop()
def addtoken(self, type, value, context):
"""Add a token; return True iff this is the end of the program."""
# Map from token to label
ilabel = self.classify(type, value, context)
# Loop until the token is shifted; may raise exceptions
while True:
dfa, state, node = self.stack[-1]
states, first = dfa
arcs = states[state]
# Look for a state with this label
for i, newstate in arcs:
t, v = self.grammar.labels[i]
if ilabel == i:
# Look it up in the list of labels
assert t < 256
# Shift a token; we're done with it
self.shift(type, value, newstate, context)
# Pop while we are in an accept-only state
state = newstate
while states[state] == [(0, state)]:
self.pop()
if not self.stack:
# Done parsing!
return True
dfa, state, node = self.stack[-1]
states, first = dfa
# Done with this token
return False
elif t >= 256:
# See if it's a symbol and if we're in its first set
itsdfa = self.grammar.dfas[t]
itsstates, itsfirst = itsdfa
if ilabel in itsfirst:
# Push a symbol
self.push(t, self.grammar.dfas[t], newstate, context)
break # To continue the outer while loop
else:
if (0, state) in arcs:
# An accepting state, pop it and try something else
self.pop()
if not self.stack:
# Done parsing, but another token is input
raise ParseError("too much input",
type, value, context)
else:
# No success finding a transition
raise ParseError("bad input", type, value, context)
def classify(self, type, value, context):
"""Turn a token into a label. (Internal)"""
if type == token.NAME:
# Keep a listing of all used names
self.used_names.add(value)
# Check for reserved words
ilabel = self.grammar.keywords.get(value)
if ilabel is not None:
return ilabel
ilabel = self.grammar.tokens.get(type)
if ilabel is None:
raise ParseError("bad token", type, value, context)
return ilabel
def shift(self, type, value, newstate, context):
"""Shift a token. (Internal)"""
dfa, state, node = self.stack[-1]
newnode = (type, value, context, None)
newnode = self.convert(self.grammar, newnode)
if newnode is not None:
node[-1].append(newnode)
self.stack[-1] = (dfa, newstate, node)
def push(self, type, newdfa, newstate, context):
"""Push a nonterminal. (Internal)"""
dfa, state, node = self.stack[-1]
newnode = (type, None, context, [])
self.stack[-1] = (dfa, newstate, node)
self.stack.append((newdfa, 0, newnode))
def pop(self):
"""Pop a nonterminal. (Internal)"""
popdfa, popstate, popnode = self.stack.pop()
newnode = self.convert(self.grammar, popnode)
if newnode is not None:
if self.stack:
dfa, state, node = self.stack[-1]
node[-1].append(newnode)
else:
self.rootnode = newnode
self.rootnode.used_names = self.used_names

View File

@ -0,0 +1,158 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Adapted from parse.py to be compiled with Cython by Georg Brandl.
"""Parser engine for the grammar tables generated by pgen.
The grammar table must be loaded first.
See Parser/parser.c in the Python distribution for additional info on
how this parsing engine works.
"""
from sphinx.pycode.nodes import Node, Leaf
DEF NAME = 1
class ParseError(Exception):
"""Exception to signal the parser is stuck."""
def __init__(self, msg, type, value, context):
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
(msg, type, value, context))
self.msg = msg
self.type = type
self.value = value
self.context = context
cdef class Parser:
cdef public grammar, stack, rootnode, used_names
cdef _grammar_dfas, _grammar_labels, _grammar_keywords, _grammar_tokens
cdef _grammar_number2symbol
def __init__(self, grammar, convert=None):
self.grammar = grammar
#self.convert = convert or noconvert
self._grammar_dfas = grammar.dfas
self._grammar_labels = grammar.labels
self._grammar_keywords = grammar.keywords
self._grammar_tokens = grammar.tokens
self._grammar_number2symbol = grammar.number2symbol
def setup(self, start=None):
if start is None:
start = self.grammar.start
# Each stack entry is a tuple: (dfa, state, node).
# A node is a tuple: (type, value, context, children),
# where children is a list of nodes or None, and context may be None.
newnode = (start, None, None, [])
stackentry = (self._grammar_dfas[start], 0, newnode)
self.stack = [stackentry]
self.rootnode = None
self.used_names = set() # Aliased to self.rootnode.used_names in pop()
def addtoken(self, type, value, context):
"""Add a token; return True iff this is the end of the program."""
cdef int ilabel, i, t, state, newstate
# Map from token to label
ilabel = self.classify(type, value, context)
# Loop until the token is shifted; may raise exceptions
while True:
dfa, state, node = self.stack[-1]
states, first = dfa
arcs = states[state]
# Look for a state with this label
for i, newstate in arcs:
t, v = self._grammar_labels[i]
if ilabel == i:
# Look it up in the list of labels
## assert t < 256
# Shift a token; we're done with it
self.shift(type, value, newstate, context)
# Pop while we are in an accept-only state
state = newstate
while states[state] == [(0, state)]:
self.pop()
if not self.stack:
# Done parsing!
return True
dfa, state, node = self.stack[-1]
states, first = dfa
# Done with this token
return False
elif t >= 256:
# See if it's a symbol and if we're in its first set
itsdfa = self._grammar_dfas[t]
itsstates, itsfirst = itsdfa
if ilabel in itsfirst:
# Push a symbol
self.push(t, itsdfa, newstate, context)
break # To continue the outer while loop
else:
if (0, state) in arcs:
# An accepting state, pop it and try something else
self.pop()
if not self.stack:
# Done parsing, but another token is input
raise ParseError("too much input",
type, value, context)
else:
# No success finding a transition
raise ParseError("bad input", type, value, context)
cdef int classify(self, type, value, context):
"""Turn a token into a label. (Internal)"""
if type == NAME:
# Keep a listing of all used names
self.used_names.add(value)
# Check for reserved words
ilabel = self._grammar_keywords.get(value)
if ilabel is not None:
return ilabel
ilabel = self._grammar_tokens.get(type)
if ilabel is None:
raise ParseError("bad token", type, value, context)
return ilabel
cdef void shift(self, type, value, newstate, context):
"""Shift a token. (Internal)"""
dfa, state, node = self.stack[-1]
newnode = (type, value, context, None)
newnode = self.convert(newnode)
if newnode is not None:
node[-1].append(newnode)
self.stack[-1] = (dfa, newstate, node)
cdef void push(self, type, newdfa, newstate, context):
"""Push a nonterminal. (Internal)"""
dfa, state, node = self.stack[-1]
newnode = (type, None, context, [])
self.stack[-1] = (dfa, newstate, node)
self.stack.append((newdfa, 0, newnode))
cdef void pop(self):
"""Pop a nonterminal. (Internal)"""
popdfa, popstate, popnode = self.stack.pop()
newnode = self.convert(popnode)
if newnode is not None:
if self.stack:
dfa, state, node = self.stack[-1]
node[-1].append(newnode)
else:
self.rootnode = newnode
self.rootnode.used_names = self.used_names
cdef convert(self, raw_node):
type, value, context, children = raw_node
if children or type in self._grammar_number2symbol:
# If there's exactly one child, return that child instead of
# creating a new node.
if len(children) == 1:
return children[0]
return Node(type, children, context=context)
else:
return Leaf(type, value, context=context)

384
sphinx/pycode/pgen2/pgen.py Normal file
View File

@ -0,0 +1,384 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Pgen imports
from sphinx.pycode.pgen2 import grammar, token, tokenize
class PgenGrammar(grammar.Grammar):
pass
class ParserGenerator(object):
def __init__(self, filename, stream=None):
close_stream = None
if stream is None:
stream = open(filename)
close_stream = stream.close
self.filename = filename
self.stream = stream
self.generator = tokenize.generate_tokens(stream.readline)
self.gettoken() # Initialize lookahead
self.dfas, self.startsymbol = self.parse()
if close_stream is not None:
close_stream()
self.first = {} # map from symbol name to set of tokens
self.addfirstsets()
def make_grammar(self):
c = PgenGrammar()
names = self.dfas.keys()
names.sort()
names.remove(self.startsymbol)
names.insert(0, self.startsymbol)
for name in names:
i = 256 + len(c.symbol2number)
c.symbol2number[name] = i
c.number2symbol[i] = name
for name in names:
dfa = self.dfas[name]
states = []
for state in dfa:
arcs = []
for label, next in state.arcs.iteritems():
arcs.append((self.make_label(c, label), dfa.index(next)))
if state.isfinal:
arcs.append((0, dfa.index(state)))
states.append(arcs)
c.states.append(states)
c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
c.start = c.symbol2number[self.startsymbol]
return c
def make_first(self, c, name):
rawfirst = self.first[name]
first = {}
for label in rawfirst:
ilabel = self.make_label(c, label)
##assert ilabel not in first # XXX failed on <> ... !=
first[ilabel] = 1
return first
def make_label(self, c, label):
# XXX Maybe this should be a method on a subclass of converter?
ilabel = len(c.labels)
if label[0].isalpha():
# Either a symbol name or a named token
if label in c.symbol2number:
# A symbol name (a non-terminal)
if label in c.symbol2label:
return c.symbol2label[label]
else:
c.labels.append((c.symbol2number[label], None))
c.symbol2label[label] = ilabel
return ilabel
else:
# A named token (NAME, NUMBER, STRING)
itoken = getattr(token, label, None)
assert isinstance(itoken, int), label
assert itoken in token.tok_name, label
if itoken in c.tokens:
return c.tokens[itoken]
else:
c.labels.append((itoken, None))
c.tokens[itoken] = ilabel
return ilabel
else:
# Either a keyword or an operator
assert label[0] in ('"', "'"), label
value = eval(label)
if value[0].isalpha():
# A keyword
if value in c.keywords:
return c.keywords[value]
else:
c.labels.append((token.NAME, value))
c.keywords[value] = ilabel
return ilabel
else:
# An operator (any non-numeric token)
itoken = grammar.opmap[value] # Fails if unknown token
if itoken in c.tokens:
return c.tokens[itoken]
else:
c.labels.append((itoken, None))
c.tokens[itoken] = ilabel
return ilabel
def addfirstsets(self):
names = self.dfas.keys()
names.sort()
for name in names:
if name not in self.first:
self.calcfirst(name)
#print name, self.first[name].keys()
def calcfirst(self, name):
dfa = self.dfas[name]
self.first[name] = None # dummy to detect left recursion
state = dfa[0]
totalset = {}
overlapcheck = {}
for label, next in state.arcs.iteritems():
if label in self.dfas:
if label in self.first:
fset = self.first[label]
if fset is None:
raise ValueError("recursion for rule %r" % name)
else:
self.calcfirst(label)
fset = self.first[label]
totalset.update(fset)
overlapcheck[label] = fset
else:
totalset[label] = 1
overlapcheck[label] = {label: 1}
inverse = {}
for label, itsfirst in overlapcheck.iteritems():
for symbol in itsfirst:
if symbol in inverse:
raise ValueError("rule %s is ambiguous; %s is in the"
" first sets of %s as well as %s" %
(name, symbol, label, inverse[symbol]))
inverse[symbol] = label
self.first[name] = totalset
def parse(self):
dfas = {}
startsymbol = None
# MSTART: (NEWLINE | RULE)* ENDMARKER
while self.type != token.ENDMARKER:
while self.type == token.NEWLINE:
self.gettoken()
# RULE: NAME ':' RHS NEWLINE
name = self.expect(token.NAME)
self.expect(token.OP, ":")
a, z = self.parse_rhs()
self.expect(token.NEWLINE)
#self.dump_nfa(name, a, z)
dfa = self.make_dfa(a, z)
#self.dump_dfa(name, dfa)
oldlen = len(dfa)
self.simplify_dfa(dfa)
newlen = len(dfa)
dfas[name] = dfa
#print name, oldlen, newlen
if startsymbol is None:
startsymbol = name
return dfas, startsymbol
def make_dfa(self, start, finish):
# To turn an NFA into a DFA, we define the states of the DFA
# to correspond to *sets* of states of the NFA. Then do some
# state reduction. Let's represent sets as dicts with 1 for
# values.
assert isinstance(start, NFAState)
assert isinstance(finish, NFAState)
def closure(state):
base = {}
addclosure(state, base)
return base
def addclosure(state, base):
assert isinstance(state, NFAState)
if state in base:
return
base[state] = 1
for label, next in state.arcs:
if label is None:
addclosure(next, base)
states = [DFAState(closure(start), finish)]
for state in states: # NB states grows while we're iterating
arcs = {}
for nfastate in state.nfaset:
for label, next in nfastate.arcs:
if label is not None:
addclosure(next, arcs.setdefault(label, {}))
for label, nfaset in arcs.iteritems():
for st in states:
if st.nfaset == nfaset:
break
else:
st = DFAState(nfaset, finish)
states.append(st)
state.addarc(st, label)
return states # List of DFAState instances; first one is start
def dump_nfa(self, name, start, finish):
print "Dump of NFA for", name
todo = [start]
for i, state in enumerate(todo):
print " State", i, state is finish and "(final)" or ""
for label, next in state.arcs:
if next in todo:
j = todo.index(next)
else:
j = len(todo)
todo.append(next)
if label is None:
print " -> %d" % j
else:
print " %s -> %d" % (label, j)
def dump_dfa(self, name, dfa):
print "Dump of DFA for", name
for i, state in enumerate(dfa):
print " State", i, state.isfinal and "(final)" or ""
for label, next in state.arcs.iteritems():
print " %s -> %d" % (label, dfa.index(next))
def simplify_dfa(self, dfa):
# This is not theoretically optimal, but works well enough.
# Algorithm: repeatedly look for two states that have the same
# set of arcs (same labels pointing to the same nodes) and
# unify them, until things stop changing.
# dfa is a list of DFAState instances
changes = True
while changes:
changes = False
for i, state_i in enumerate(dfa):
for j in range(i+1, len(dfa)):
state_j = dfa[j]
if state_i == state_j:
#print " unify", i, j
del dfa[j]
for state in dfa:
state.unifystate(state_j, state_i)
changes = True
break
def parse_rhs(self):
# RHS: ALT ('|' ALT)*
a, z = self.parse_alt()
if self.value != "|":
return a, z
else:
aa = NFAState()
zz = NFAState()
aa.addarc(a)
z.addarc(zz)
while self.value == "|":
self.gettoken()
a, z = self.parse_alt()
aa.addarc(a)
z.addarc(zz)
return aa, zz
def parse_alt(self):
# ALT: ITEM+
a, b = self.parse_item()
while (self.value in ("(", "[") or
self.type in (token.NAME, token.STRING)):
c, d = self.parse_item()
b.addarc(c)
b = d
return a, b
def parse_item(self):
# ITEM: '[' RHS ']' | ATOM ['+' | '*']
if self.value == "[":
self.gettoken()
a, z = self.parse_rhs()
self.expect(token.OP, "]")
a.addarc(z)
return a, z
else:
a, z = self.parse_atom()
value = self.value
if value not in ("+", "*"):
return a, z
self.gettoken()
z.addarc(a)
if value == "+":
return a, z
else:
return a, a
def parse_atom(self):
# ATOM: '(' RHS ')' | NAME | STRING
if self.value == "(":
self.gettoken()
a, z = self.parse_rhs()
self.expect(token.OP, ")")
return a, z
elif self.type in (token.NAME, token.STRING):
a = NFAState()
z = NFAState()
a.addarc(z, self.value)
self.gettoken()
return a, z
else:
self.raise_error("expected (...) or NAME or STRING, got %s/%s",
self.type, self.value)
def expect(self, type, value=None):
if self.type != type or (value is not None and self.value != value):
self.raise_error("expected %s/%s, got %s/%s",
type, value, self.type, self.value)
value = self.value
self.gettoken()
return value
def gettoken(self):
tup = self.generator.next()
while tup[0] in (tokenize.COMMENT, tokenize.NL):
tup = self.generator.next()
self.type, self.value, self.begin, self.end, self.line = tup
#print token.tok_name[self.type], repr(self.value)
def raise_error(self, msg, *args):
if args:
try:
msg = msg % args
except:
msg = " ".join([msg] + map(str, args))
raise SyntaxError(msg, (self.filename, self.end[0],
self.end[1], self.line))
class NFAState(object):
def __init__(self):
self.arcs = [] # list of (label, NFAState) pairs
def addarc(self, next, label=None):
assert label is None or isinstance(label, str)
assert isinstance(next, NFAState)
self.arcs.append((label, next))
class DFAState(object):
def __init__(self, nfaset, final):
assert isinstance(nfaset, dict)
assert isinstance(iter(nfaset).next(), NFAState)
assert isinstance(final, NFAState)
self.nfaset = nfaset
self.isfinal = final in nfaset
self.arcs = {} # map from label to DFAState
def addarc(self, next, label):
assert isinstance(label, str)
assert label not in self.arcs
assert isinstance(next, DFAState)
self.arcs[label] = next
def unifystate(self, old, new):
for label, next in self.arcs.iteritems():
if next is old:
self.arcs[label] = new
def __eq__(self, other):
# Equality test -- ignore the nfaset instance variable
assert isinstance(other, DFAState)
if self.isfinal != other.isfinal:
return False
# Can't just return self.arcs == other.arcs, because that
# would invoke this method recursively, with cycles...
if len(self.arcs) != len(other.arcs):
return False
for label, next in self.arcs.iteritems():
if next is not other.arcs.get(label):
return False
return True
def generate_grammar(filename="Grammar.txt"):
p = ParserGenerator(filename)
return p.make_grammar()

82
sphinx/pycode/pgen2/token.py Executable file
View File

@ -0,0 +1,82 @@
#! /usr/bin/env python
"""Token constants (from "token.h")."""
# Taken from Python (r53757) and modified to include some tokens
# originally monkeypatched in by pgen2.tokenize
#--start constants--
ENDMARKER = 0
NAME = 1
NUMBER = 2
STRING = 3
NEWLINE = 4
INDENT = 5
DEDENT = 6
LPAR = 7
RPAR = 8
LSQB = 9
RSQB = 10
COLON = 11
COMMA = 12
SEMI = 13
PLUS = 14
MINUS = 15
STAR = 16
SLASH = 17
VBAR = 18
AMPER = 19
LESS = 20
GREATER = 21
EQUAL = 22
DOT = 23
PERCENT = 24
BACKQUOTE = 25
LBRACE = 26
RBRACE = 27
EQEQUAL = 28
NOTEQUAL = 29
LESSEQUAL = 30
GREATEREQUAL = 31
TILDE = 32
CIRCUMFLEX = 33
LEFTSHIFT = 34
RIGHTSHIFT = 35
DOUBLESTAR = 36
PLUSEQUAL = 37
MINEQUAL = 38
STAREQUAL = 39
SLASHEQUAL = 40
PERCENTEQUAL = 41
AMPEREQUAL = 42
VBAREQUAL = 43
CIRCUMFLEXEQUAL = 44
LEFTSHIFTEQUAL = 45
RIGHTSHIFTEQUAL = 46
DOUBLESTAREQUAL = 47
DOUBLESLASH = 48
DOUBLESLASHEQUAL = 49
AT = 50
OP = 51
COMMENT = 52
NL = 53
RARROW = 54
ERRORTOKEN = 55
N_TOKENS = 56
NT_OFFSET = 256
#--end constants--
tok_name = {}
for _name, _value in globals().items():
if type(_value) is type(0):
tok_name[_value] = _name
def ISTERMINAL(x):
return x < NT_OFFSET
def ISNONTERMINAL(x):
return x >= NT_OFFSET
def ISEOF(x):
return x == ENDMARKER

View File

@ -0,0 +1,405 @@
# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
# All rights reserved.
"""Tokenization help for Python programs.
generate_tokens(readline) is a generator that breaks a stream of
text into Python tokens. It accepts a readline-like method which is called
repeatedly to get the next line of input (or "" for EOF). It generates
5-tuples with these members:
the token type (see token.py)
the token (a string)
the starting (row, column) indices of the token (a 2-tuple of ints)
the ending (row, column) indices of the token (a 2-tuple of ints)
the original line (string)
It is designed to match the working of the Python tokenizer exactly, except
that it produces COMMENT tokens for comments and gives type OP for all
operators
Older entry points
tokenize_loop(readline, tokeneater)
tokenize(readline, tokeneater=printtoken)
are the same, except instead of generating tokens, tokeneater is a callback
function to which the 5 fields described above are passed as 5 arguments,
each time a new token is found."""
__author__ = 'Ka-Ping Yee <ping@lfw.org>'
__credits__ = \
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
import string, re
from sphinx.pycode.pgen2.token import *
from sphinx.pycode.pgen2 import token
__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
"generate_tokens", "untokenize"]
del token
def group(*choices): return '(' + '|'.join(choices) + ')'
def any(*choices): return group(*choices) + '*'
def maybe(*choices): return group(*choices) + '?'
Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
Name = r'[a-zA-Z_]\w*'
Binnumber = r'0[bB][01]*'
Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
Octnumber = r'0[oO]?[0-7]*[lL]?'
Decnumber = r'[1-9]\d*[lL]?'
Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
Exponent = r'[eE][-+]?\d+'
Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
Expfloat = r'\d+' + Exponent
Floatnumber = group(Pointfloat, Expfloat)
Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
Number = group(Imagnumber, Floatnumber, Intnumber)
# Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
# Tail end of " string.
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
# Tail end of ''' string.
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
# Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""')
# Single-line ' or " string.
String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
# Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get
# recognized as two instances of =).
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
r"//=?", r"->",
r"[+\-*/%&|^=<>]=?",
r"~")
Bracket = '[][(){}]'
Special = group(r'\r?\n', r'[:;.,`@]')
Funny = group(Operator, Bracket, Special)
PlainToken = group(Number, Funny, String, Name)
Token = Ignore + PlainToken
# First (or only) line of ' or " string.
ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'),
r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n'))
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
tokenprog, pseudoprog, single3prog, double3prog = map(
re.compile, (Token, PseudoToken, Single3, Double3))
endprogs = {"'": re.compile(Single), '"': re.compile(Double),
"'''": single3prog, '"""': double3prog,
"r'''": single3prog, 'r"""': double3prog,
"u'''": single3prog, 'u"""': double3prog,
"b'''": single3prog, 'b"""': double3prog,
"ur'''": single3prog, 'ur"""': double3prog,
"br'''": single3prog, 'br"""': double3prog,
"R'''": single3prog, 'R"""': double3prog,
"U'''": single3prog, 'U"""': double3prog,
"B'''": single3prog, 'B"""': double3prog,
"uR'''": single3prog, 'uR"""': double3prog,
"Ur'''": single3prog, 'Ur"""': double3prog,
"UR'''": single3prog, 'UR"""': double3prog,
"bR'''": single3prog, 'bR"""': double3prog,
"Br'''": single3prog, 'Br"""': double3prog,
"BR'''": single3prog, 'BR"""': double3prog,
'r': None, 'R': None,
'u': None, 'U': None,
'b': None, 'B': None}
triple_quoted = {}
for t in ("'''", '"""',
"r'''", 'r"""', "R'''", 'R"""',
"u'''", 'u"""', "U'''", 'U"""',
"b'''", 'b"""', "B'''", 'B"""',
"ur'''", 'ur"""', "Ur'''", 'Ur"""',
"uR'''", 'uR"""', "UR'''", 'UR"""',
"br'''", 'br"""', "Br'''", 'Br"""',
"bR'''", 'bR"""', "BR'''", 'BR"""',):
triple_quoted[t] = t
single_quoted = {}
for t in ("'", '"',
"r'", 'r"', "R'", 'R"',
"u'", 'u"', "U'", 'U"',
"b'", 'b"', "B'", 'B"',
"ur'", 'ur"', "Ur'", 'Ur"',
"uR'", 'uR"', "UR'", 'UR"',
"br'", 'br"', "Br'", 'Br"',
"bR'", 'bR"', "BR'", 'BR"', ):
single_quoted[t] = t
tabsize = 8
class TokenError(Exception): pass
class StopTokenizing(Exception): pass
def printtoken(type, token, (srow, scol), (erow, ecol), line): # for testing
print "%d,%d-%d,%d:\t%s\t%s" % \
(srow, scol, erow, ecol, tok_name[type], repr(token))
def tokenize(readline, tokeneater=printtoken):
"""
The tokenize() function accepts two parameters: one representing the
input stream, and one providing an output mechanism for tokenize().
The first parameter, readline, must be a callable object which provides
the same interface as the readline() method of built-in file objects.
Each call to the function should return one line of input as a string.
The second parameter, tokeneater, must also be a callable object. It is
called once for each token, with five arguments, corresponding to the
tuples generated by generate_tokens().
"""
try:
tokenize_loop(readline, tokeneater)
except StopTokenizing:
pass
# backwards compatible interface
def tokenize_loop(readline, tokeneater):
for token_info in generate_tokens(readline):
tokeneater(*token_info)
class Untokenizer:
def __init__(self):
self.tokens = []
self.prev_row = 1
self.prev_col = 0
def add_whitespace(self, start):
row, col = start
assert row <= self.prev_row
col_offset = col - self.prev_col
if col_offset:
self.tokens.append(" " * col_offset)
def untokenize(self, iterable):
for t in iterable:
if len(t) == 2:
self.compat(t, iterable)
break
tok_type, token, start, end, line = t
self.add_whitespace(start)
self.tokens.append(token)
self.prev_row, self.prev_col = end
if tok_type in (NEWLINE, NL):
self.prev_row += 1
self.prev_col = 0
return "".join(self.tokens)
def compat(self, token, iterable):
startline = False
indents = []
toks_append = self.tokens.append
toknum, tokval = token
if toknum in (NAME, NUMBER):
tokval += ' '
if toknum in (NEWLINE, NL):
startline = True
for tok in iterable:
toknum, tokval = tok[:2]
if toknum in (NAME, NUMBER):
tokval += ' '
if toknum == INDENT:
indents.append(tokval)
continue
elif toknum == DEDENT:
indents.pop()
continue
elif toknum in (NEWLINE, NL):
startline = True
elif startline and indents:
toks_append(indents[-1])
startline = False
toks_append(tokval)
def untokenize(iterable):
"""Transform tokens back into Python source code.
Each element returned by the iterable must be a token sequence
with at least two elements, a token number and token value. If
only two tokens are passed, the resulting output is poor.
Round-trip invariant for full input:
Untokenized source will match input source exactly
Round-trip invariant for limited intput:
# Output text will tokenize the back to the input
t1 = [tok[:2] for tok in generate_tokens(f.readline)]
newcode = untokenize(t1)
readline = iter(newcode.splitlines(1)).next
t2 = [tok[:2] for tokin generate_tokens(readline)]
assert t1 == t2
"""
ut = Untokenizer()
return ut.untokenize(iterable)
def generate_tokens(readline):
"""
The generate_tokens() generator requires one argment, readline, which
must be a callable object which provides the same interface as the
readline() method of built-in file objects. Each call to the function
should return one line of input as a string. Alternately, readline
can be a callable function terminating with StopIteration:
readline = open(myfile).next # Example of alternate readline
The generator produces 5-tuples with these members: the token type; the
token string; a 2-tuple (srow, scol) of ints specifying the row and
column where the token begins in the source; a 2-tuple (erow, ecol) of
ints specifying the row and column where the token ends in the source;
and the line on which the token was found. The line passed is the
logical line; continuation lines are included.
"""
lnum = parenlev = continued = 0
namechars, numchars = string.ascii_letters + '_', '0123456789'
contstr, needcont = '', 0
contline = None
indents = [0]
while 1: # loop over lines in stream
try:
line = readline()
except StopIteration:
line = ''
lnum = lnum + 1
pos, max = 0, len(line)
if contstr: # continued string
if not line:
raise TokenError, ("EOF in multi-line string", strstart)
endmatch = endprog.match(line)
if endmatch:
pos = end = endmatch.end(0)
yield (STRING, contstr + line[:end],
strstart, (lnum, end), contline + line)
contstr, needcont = '', 0
contline = None
elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
yield (ERRORTOKEN, contstr + line,
strstart, (lnum, len(line)), contline)
contstr = ''
contline = None
continue
else:
contstr = contstr + line
contline = contline + line
continue
elif parenlev == 0 and not continued: # new statement
if not line: break
column = 0
while pos < max: # measure leading whitespace
if line[pos] == ' ': column = column + 1
elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
elif line[pos] == '\f': column = 0
else: break
pos = pos + 1
if pos == max: break
if line[pos] in '#\r\n': # skip comments or blank lines
if line[pos] == '#':
comment_token = line[pos:].rstrip('\r\n')
nl_pos = pos + len(comment_token)
yield (COMMENT, comment_token,
(lnum, pos), (lnum, pos + len(comment_token)), line)
yield (NL, line[nl_pos:],
(lnum, nl_pos), (lnum, len(line)), line)
else:
yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
(lnum, pos), (lnum, len(line)), line)
continue
if column > indents[-1]: # count indents or dedents
indents.append(column)
yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
while column < indents[-1]:
if column not in indents:
raise IndentationError(
"unindent does not match any outer indentation level",
("<tokenize>", lnum, pos, line))
indents = indents[:-1]
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
else: # continued statement
if not line:
raise TokenError, ("EOF in multi-line statement", (lnum, 0))
continued = 0
while pos < max:
pseudomatch = pseudoprog.match(line, pos)
if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1)
spos, epos, pos = (lnum, start), (lnum, end), end
token, initial = line[start:end], line[start]
if initial in numchars or \
(initial == '.' and token != '.'): # ordinary number
yield (NUMBER, token, spos, epos, line)
elif initial in '\r\n':
newline = NEWLINE
if parenlev > 0:
newline = NL
yield (newline, token, spos, epos, line)
elif initial == '#':
assert not token.endswith("\n")
yield (COMMENT, token, spos, epos, line)
elif token in triple_quoted:
endprog = endprogs[token]
endmatch = endprog.match(line, pos)
if endmatch: # all on one line
pos = endmatch.end(0)
token = line[start:pos]
yield (STRING, token, spos, (lnum, pos), line)
else:
strstart = (lnum, start) # multiple lines
contstr = line[start:]
contline = line
break
elif initial in single_quoted or \
token[:2] in single_quoted or \
token[:3] in single_quoted:
if token[-1] == '\n': # continued string
strstart = (lnum, start)
endprog = (endprogs[initial] or endprogs[token[1]] or
endprogs[token[2]])
contstr, needcont = line[start:], 1
contline = line
break
else: # ordinary string
yield (STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name
yield (NAME, token, spos, epos, line)
elif initial == '\\': # continued stmt
# This yield is new; needed for better idempotency:
yield (NL, token, spos, (lnum, pos), line)
continued = 1
else:
if initial in '([{': parenlev = parenlev + 1
elif initial in ')]}': parenlev = parenlev - 1
yield (OP, token, spos, epos, line)
else:
yield (ERRORTOKEN, line[pos],
(lnum, pos), (lnum, pos+1), line)
pos = pos + 1
for indent in indents[1:]: # pop remaining indent levels
yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
if __name__ == '__main__': # testing
import sys
if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
else: tokenize(sys.stdin.readline)

View File

@ -24,7 +24,7 @@ generic_docroles = {
'guilabel' : nodes.strong,
'kbd' : nodes.literal,
'mailheader' : addnodes.literal_emphasis,
'makevar' : nodes.Text,
'makevar' : nodes.strong,
'manpage' : addnodes.literal_emphasis,
'mimetype' : addnodes.literal_emphasis,
'newsgroup' : addnodes.literal_emphasis,

View File

@ -324,3 +324,40 @@ class FilenameUniqDict(dict):
def __setstate__(self, state):
self._existing = state
def parselinenos(spec, total):
"""
Parse a line number spec (such as "1,2,4-6") and return a list of
wanted line numbers.
"""
items = list()
parts = spec.split(',')
for part in parts:
try:
begend = part.strip().split('-')
if len(begend) > 2:
raise ValueError
if len(begend) == 1:
items.append(int(begend[0])-1)
else:
start = (begend[0] == '') and 0 or int(begend[0])-1
end = (begend[1] == '') and total or int(begend[1])
items.extend(xrange(start, end))
except Exception, err:
raise ValueError('invalid line number spec: %r' % spec)
return items
def force_decode(string, encoding):
if isinstance(string, str):
if encoding:
string = string.decode(encoding)
else:
try:
# try decoding with utf-8, should only work for real UTF-8
string = string.decode('utf-8')
except UnicodeError:
# last resort -- can't fail
string = string.decode('latin1')
return string

56
sphinx/util/docstrings.py Normal file
View File

@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
"""
sphinx.util.docstrings
~~~~~~~~~~~~~~~~~~~~~~
Utilities for docstring processing.
:copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import sys
def prepare_docstring(s):
"""
Convert a docstring into lines of parseable reST. Return it as a list of
lines usable for inserting into a docutils ViewList (used as argument
of nested_parse().) An empty line is added to act as a separator between
this docstring and following content.
"""
lines = s.expandtabs().splitlines()
# Find minimum indentation of any non-blank lines after first line.
margin = sys.maxint
for line in lines[1:]:
content = len(line.lstrip())
if content:
indent = len(line) - content
margin = min(margin, indent)
# Remove indentation.
if lines:
lines[0] = lines[0].lstrip()
if margin < sys.maxint:
for i in range(1, len(lines)): lines[i] = lines[i][margin:]
# Remove any leading blank lines.
while lines and not lines[0]:
lines.pop(0)
# make sure there is an empty line at the end
if lines and lines[-1]:
lines.append('')
return lines
def prepare_commentdoc(s):
"""
Extract documentation comment lines (starting with #:) and return them as a
list of lines. Returns an empty list if there is no documentation.
"""
result = []
lines = [line.strip() for line in s.expandtabs().splitlines()]
for line in lines:
if line.startswith('#: '):
result.append(line[3:])
if result and result[-1]:
result.append('')
return result

View File

@ -6,7 +6,7 @@
This module implements a simple JavaScript serializer.
Uses the basestring encode function from simplejson by Bob Ippolito.
:copyright: Copyright 2008 by the Sphinx team, see AUTHORS.
:copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -15,6 +15,28 @@ Test file and literal inclusion
.. include:: wrongenc.inc
:encoding: latin-1
Literalinclude options
======================
.. highlight:: text
.. cssclass:: inc-pyobj1
.. literalinclude:: literal.inc
:pyobject: Foo
.. cssclass:: inc-pyobj2
.. literalinclude:: literal.inc
:pyobject: Bar.baz
.. cssclass:: inc-lines
.. literalinclude:: literal.inc
:lines: 6-7,9
.. cssclass:: inc-startend
.. literalinclude:: literal.inc
:start-after: coding: utf-8
:end-before: class Foo
Testing downloadable files
==========================

View File

@ -2,3 +2,12 @@
# -*- coding: utf-8 -*-
foo = u"Including Unicode characters: üöä"
class Foo:
pass
class Bar:
def baz():
pass
def bar(): pass

View File

@ -173,13 +173,14 @@ def test_format_signature():
def test_get_doc():
def getdocl(*args):
# strip the empty line at the end
return list(gen.get_doc(*args))[:-1]
ds = gen.get_doc(*args)
# for testing purposes, concat them and strip the empty line at the end
return sum(ds, [])[:-1]
# objects without docstring
def f():
pass
assert getdocl('function', 'f', f) == []
assert getdocl('function', f) == []
# standard function, diverse docstring styles...
def f():
@ -189,7 +190,7 @@ def test_get_doc():
Docstring
"""
for func in (f, g):
assert getdocl('function', 'f', func) == ['Docstring']
assert getdocl('function', func) == ['Docstring']
# first line vs. other lines indentation
def f():
@ -198,17 +199,17 @@ def test_get_doc():
Other
lines
"""
assert getdocl('function', 'f', f) == ['First line', '', 'Other', ' lines']
assert getdocl('function', f) == ['First line', '', 'Other', ' lines']
# charset guessing (this module is encoded in utf-8)
def f():
"""Döcstring"""
assert getdocl('function', 'f', f) == [u'Döcstring']
assert getdocl('function', f) == [u'Döcstring']
# already-unicode docstrings must be taken literally
def f():
u"""Döcstring"""
assert getdocl('function', 'f', f) == [u'Döcstring']
assert getdocl('function', f) == [u'Döcstring']
# class docstring: depends on config value which one is taken
class C:
@ -216,11 +217,11 @@ def test_get_doc():
def __init__(self):
"""Init docstring"""
gen.env.config.autoclass_content = 'class'
assert getdocl('class', 'C', C) == ['Class docstring']
assert getdocl('class', C) == ['Class docstring']
gen.env.config.autoclass_content = 'init'
assert getdocl('class', 'C', C) == ['Init docstring']
assert getdocl('class', C) == ['Init docstring']
gen.env.config.autoclass_content = 'both'
assert getdocl('class', 'C', C) == ['Class docstring', '', 'Init docstring']
assert getdocl('class', C) == ['Class docstring', '', 'Init docstring']
class D:
"""Class docstring"""
@ -232,18 +233,21 @@ def test_get_doc():
"""
# Indentation is normalized for 'both'
assert getdocl('class', 'D', D) == ['Class docstring', '', 'Init docstring',
'', 'Other', ' lines']
assert getdocl('class', D) == ['Class docstring', '', 'Init docstring',
'', 'Other', ' lines']
def test_docstring_processing():
def process(what, name, obj):
return list(gen.process_doc(gen.get_doc(what, obj), what, name, obj))
class E:
def __init__(self):
"""Init docstring"""
# docstring processing by event handler
assert getdocl('class', 'bar', E) == ['Init docstring', '', '42']
assert process('class', 'bar', E) == ['Init docstring', '', '42', '']
def test_docstring_processing_functions():
lid = app.connect('autodoc-process-docstring', cut_lines(1, 1, ['function']))
def f():
"""
@ -251,7 +255,7 @@ def test_docstring_processing_functions():
second line
third line
"""
assert list(gen.get_doc('function', 'f', f)) == ['second line', '']
assert process('function', 'f', f) == ['second line', '']
app.disconnect(lid)
lid = app.connect('autodoc-process-docstring', between('---', ['function']))
@ -263,7 +267,7 @@ def test_docstring_processing_functions():
---
third line
"""
assert list(gen.get_doc('function', 'f', f)) == ['second line', '']
assert process('function', 'f', f) == ['second line', '']
app.disconnect(lid)
@ -289,7 +293,7 @@ def test_generate():
def assert_result_contains(item, *args):
gen.generate(*args)
print '\n'.join(gen.result)
#print '\n'.join(gen.result)
assert len(gen.warnings) == 0, gen.warnings
assert item in gen.result
del gen.result[:]
@ -325,7 +329,10 @@ def test_generate():
assert_processes(should, 'class', 'Class', [], None)
should.extend([('method', 'test_autodoc.Class.meth')])
assert_processes(should, 'class', 'Class', ['meth'], None)
should.extend([('attribute', 'test_autodoc.Class.prop')])
should.extend([('attribute', 'test_autodoc.Class.prop'),
('attribute', 'test_autodoc.Class.attr'),
('attribute', 'test_autodoc.Class.docattr'),
('attribute', 'test_autodoc.Class.udocattr')])
assert_processes(should, 'class', 'Class', ['__all__'], None)
options.undoc_members = True
should.append(('method', 'test_autodoc.Class.undocmeth'))
@ -369,6 +376,11 @@ def test_generate():
('method', 'test_autodoc.Outer.Inner.meth')],
'class', 'Outer', ['__all__'], None)
# test generation for C modules (which have no source file)
gen.env.currmodule = 'time'
assert_processes([('function', 'time.asctime')], 'function', 'asctime', [], None)
assert_processes([('function', 'time.asctime')], 'function', 'asctime', [], None)
# --- generate fodder ------------
@ -398,10 +410,22 @@ class Class(Base):
"""Method that should be skipped."""
pass
# should not be documented
skipattr = 'foo'
#: should be documented -- süß
attr = 'bar'
@property
def prop(self):
"""Property."""
docattr = 'baz'
"""should likewise be documented -- süß"""
udocattr = 'quux'
u"""should be documented as well - süß"""
class CustomDict(dict):
"""Docstring."""
@ -421,4 +445,5 @@ class Outer(object):
def meth(self):
"""Foo"""
# should be documented as an alias
factory = dict

View File

@ -10,6 +10,7 @@
"""
import os
import re
import sys
import difflib
import htmlentitydefs
@ -32,7 +33,7 @@ WARNING: %(root)s/images.txt:9: Image file not readable: foo.png
WARNING: %(root)s/images.txt:23: Nonlocal image URI found: http://www.python.org/logo.png
WARNING: %(root)s/includes.txt:: (WARNING/2) Encoding 'utf-8' used for reading included \
file u'wrongenc.inc' seems to be wrong, try giving an :encoding: option
WARNING: %(root)s/includes.txt:34: Download file not readable: nonexisting.png
WARNING: %(root)s/includes.txt:56: Download file not readable: nonexisting.png
"""
HTML_WARNINGS = ENV_WARNINGS + """\
@ -61,11 +62,19 @@ HTML_XPATH = {
".//pre": u'Max Strauß',
".//a[@href='_downloads/img.png']": '',
".//a[@href='_downloads/img1.png']": '',
".//div[@class='inc-pyobj1 highlight-text']/div/pre":
r'^class Foo:\n pass\n\s*$',
".//div[@class='inc-pyobj2 highlight-text']/div/pre":
r'^ def baz\(\):\n pass\n\s*$',
".//div[@class='inc-lines highlight-text']/div/pre":
r'^class Foo:\n pass\nclass Bar:\n$',
".//div[@class='inc-startend highlight-text']/div/pre":
ur'^foo = u"Including Unicode characters: üöä"\n$',
},
'autodoc.html': {
".//dt[@id='test_autodoc.Class']": '',
".//dt[@id='test_autodoc.function']/em": '**kwds',
".//dd": 'Return spam.',
".//dt[@id='test_autodoc.function']/em": r'\*\*kwds',
".//dd": r'Return spam\.',
},
'markup.html': {
".//meta[@name='author'][@content='Me']": '',
@ -81,7 +90,7 @@ HTML_XPATH = {
},
'contents.html': {
".//meta[@name='hc'][@content='hcval']": '',
".//td[@class='label']": '[Ref1]',
".//td[@class='label']": r'\[Ref1\]',
".//li[@class='toctree-l1']/a": 'Testing various markup',
".//li[@class='toctree-l2']/a": 'Admonitions',
".//title": 'Sphinx <Tests>',
@ -117,18 +126,23 @@ def test_html(app):
parser = NslessParser()
parser.entity.update(htmlentitydefs.entitydefs)
etree = ET.parse(os.path.join(app.outdir, fname), parser)
for path, text in paths.iteritems():
for path, check in paths.iteritems():
nodes = list(etree.findall(path))
assert nodes != []
if not text:
if callable(check):
check(nodes)
elif not check:
# only check for node presence
continue
for node in nodes:
if node.text and text in node.text:
break
else:
assert False, ('%r not found in any node matching '
'path %s in %s' % (text, path, fname))
rex = re.compile(check)
for node in nodes:
if node.text and rex.search(node.text):
break
else:
assert False, ('%r not found in any node matching '
'path %s in %s: %r' % (check, path, fname,
[node.text for node in nodes]))
@with_app(buildername='latex', warning=latex_warnfile)

View File

@ -17,11 +17,13 @@ from docutils import frontend, utils, nodes
from docutils.parsers import rst
from sphinx import addnodes
from sphinx.util import texescape
from sphinx.writers.html import HTMLWriter, SmartyPantsHTMLTranslator
from sphinx.writers.latex import LaTeXWriter, LaTeXTranslator
def setup_module():
global app, settings, parser
texescape.init() # otherwise done by the latex builder
app = TestApp(cleanenv=True)
optparser = frontend.OptionParser(components=(rst.Parser, HTMLWriter, LaTeXWriter))
settings = optparser.get_default_values()