merge in pycode branch

2025-02-25 18:55:22 -06:00 · 2009-01-04 20:55:01 +01:00 · 2009-01-04 20:55:01 +01:00 · dc3679ddb5
commit dc3679ddb5
parent b0f5eb2713 088d636487
32 changed files with 5997 additions and 161 deletions
--- a/.hgignore
+++ b/.hgignore
@ -1,7 +1,9 @@
 .*\.pyc
 .*\.egg
+.*\.so
 build/
 dist/
+sphinx/pycode/Grammar.*pickle
 Sphinx.egg-info/
 doc/_build/
 TAGS
--- a/55
+++ b/55
@ -1,4 +1,4 @@
-Copyright (c) 2007-2009 by the respective authors (see AUTHORS file).
+Copyright (c) 2007-2009 by the Sphinx team (see AUTHORS file).
 All rights reserved.

 License for Sphinx
@ -31,6 +31,59 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 Licenses for incorporated software
 ==================================

+The pgen2 package, included in this distribution under the name
+sphinx.pycode.pgen2, is available in the Python 2.6 distribution under
+the PSF license agreement for Python:
+
+----------------------------------------------------------------------
+1. This LICENSE AGREEMENT is between the Python Software Foundation
+   ("PSF"), and the Individual or Organization ("Licensee") accessing
+   and otherwise using Python 2.6 software in source or binary form
+   and its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, PSF
+   hereby grants Licensee a nonexclusive, royalty-free, world-wide
+   license to reproduce, analyze, test, perform and/or display
+   publicly, prepare derivative works, distribute, and otherwise use
+   Python 2.6 alone or in any derivative version, provided, however,
+   that PSF's License Agreement and PSF's notice of copyright, i.e.,
+   "Copyright © 2001-2008 Python Software Foundation; All Rights
+   Reserved" are retained in Python 2.6 alone or in any derivative
+   version prepared by Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on
+   or incorporates Python 2.6 or any part thereof, and wants to make
+   the derivative work available to others as provided herein, then
+   Licensee hereby agrees to include in any such work a brief summary
+   of the changes made to Python 2.6.
+
+4. PSF is making Python 2.6 available to Licensee on an "AS IS" basis.
+   PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED.  BY
+   WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY
+   REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY
+   PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.6 WILL NOT INFRINGE
+   ANY THIRD PARTY RIGHTS.
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+   2.6 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
+   AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON
+   2.6, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY
+   THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+   breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+   relationship of agency, partnership, or joint venture between PSF
+   and Licensee.  This License Agreement does not grant permission to
+   use PSF trademarks or trade name in a trademark sense to endorse or
+   promote products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using Python 2.6, Licensee
+   agrees to be bound by the terms and conditions of this License
+   Agreement.
+----------------------------------------------------------------------
+
 The included smartypants module, included as sphinx.util.smartypants,
 is available under the following license:

--- a/doc/conf.py
+++ b/doc/conf.py
@ -36,7 +36,7 @@ master_doc = 'contents'

 # General substitutions.
 project = 'Sphinx'
-copyright = '2008, Georg Brandl'
+copyright = '2007-2009, Georg Brandl'

 # The default replacements for |version| and |release|, also used in various
 # other places throughout the built documents.
--- a/doc/markup/code.rst
+++ b/doc/markup/code.rst
@ -113,8 +113,35 @@ Includes
      .. literalinclude:: example.py
         :encoding: latin-1

+   The directive also supports including only parts of the file.  If it is a
+   Python module, you can select a class, function or method to include using
+   the ``pyobject`` option::
+
+      .. literalinclude:: example.py
+         :pyobject: Timer.start
+
+   This would only include the code lines belonging to the ``start()`` method in
+   the ``Timer`` class within the file.
+
+   Alternately, you can specify exactly which lines to include by giving a
+   ``lines`` option::
+
+      .. literalinclude:: example.py
+         :lines: 1,3,5-10,20-
+
+   This includes the lines 1, 3, 5 to 10 and lines 20 to the last line.
+
+   Another way to control which part of the file is included is to use the
+   ``start-after`` and ``end-before`` options (or only one of them).  If
+   ``start-after`` is given as a string option, only lines that follow the first
+   line containing that string are included.  If ``end-before`` is given as a
+   string option, only lines that precede the first lines containing that string
+   are included.
+
   .. versionadded:: 0.4.3
      The ``encoding`` option.
+   .. versionadded:: 0.6
+      The ``pyobject``, ``lines``, ``start-after`` and ``end-before`` options.


 .. rubric:: Footnotes
--- a/sphinx/builders/init.py
+++ b/sphinx/builders/init.py
@ -5,7 +5,7 @@

    Builder superclass for all builders.

-    :copyright: 2007-2008 by Georg Brandl, Sebastian Wiesner, Horst Gutmann.
+    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
    :license: BSD, see LICENSE for details.
 """

--- a/sphinx/builders/html.py
+++ b/sphinx/builders/html.py
@ -37,8 +37,9 @@ except ImportError:
    except ImportError:
        json = None

-
+#: the filename for the inventory of objects
 INVENTORY_FILENAME = 'objects.inv'
+#: the filename for the "last build" file (for serializing builders)
 LAST_BUILD_FILENAME = 'last_build'


--- a/sphinx/directives/code.py
+++ b/sphinx/directives/code.py
@ -15,6 +15,7 @@ from docutils import nodes
 from docutils.parsers.rst import directives

 from sphinx import addnodes
+from sphinx.util import parselinenos


 # ------ highlight directive --------------------------------------------------------
@ -68,32 +69,78 @@ def literalinclude_directive(name, arguments, options, content, lineno,
        lineno - state_machine.input_offset - 1)))
    fn = path.normpath(path.join(source_dir, rel_fn))

+    if 'pyobject' in options and 'lines' in options:
+        return [state.document.reporter.warning(
+            'Cannot use both "pyobject" and "lines" options', line=lineno)]
+
    encoding = options.get('encoding', env.config.source_encoding)
    try:
        f = codecs.open(fn, 'r', encoding)
-        text = f.read()
+        lines = f.readlines()
        f.close()
    except (IOError, OSError):
-        retnode = state.document.reporter.warning(
-            'Include file %r not found or reading it failed' % arguments[0], line=lineno)
+        return [state.document.reporter.warning(
+            'Include file %r not found or reading it failed' % arguments[0],
+            line=lineno)]
    except UnicodeError:
-        retnode = state.document.reporter.warning(
+        return [state.document.reporter.warning(
            'Encoding %r used for reading included file %r seems to '
            'be wrong, try giving an :encoding: option' %
-            (encoding, arguments[0]))
-    else:
-        retnode = nodes.literal_block(text, text, source=fn)
-        retnode.line = 1
-        if options.get('language', ''):
-            retnode['language'] = options['language']
-        if 'linenos' in options:
-            retnode['linenos'] = True
-        state.document.settings.env.note_dependency(rel_fn)
+            (encoding, arguments[0]))]
+
+    objectname = options.get('pyobject')
+    if objectname is not None:
+        from sphinx.pycode import ModuleAnalyzer
+        analyzer = ModuleAnalyzer.for_file(fn, '')
+        tags = analyzer.find_tags()
+        if objectname not in tags:
+            return [state.document.reporter.warning(
+                'Object named %r not found in include file %r' %
+                (objectname, arguments[0]), line=lineno)]
+        else:
+            lines = lines[tags[objectname][1] - 1 : tags[objectname][2] - 1]
+
+    linespec = options.get('lines')
+    if linespec is not None:
+        try:
+            linelist = parselinenos(linespec, len(lines))
+        except ValueError, err:
+            return [state.document.reporter.warning(str(err), line=lineno)]
+        lines = [lines[i] for i in linelist]
+
+    startafter = options.get('start-after')
+    endbefore = options.get('end-before')
+    if startafter is not None or endbefore is not None:
+        use = not startafter
+        res = []
+        for line in lines:
+            if not use and startafter in line:
+                use = True
+            elif use and endbefore in line:
+                use = False
+                break
+            elif use:
+                res.append(line)
+        lines = res
+
+    text = ''.join(lines)
+    retnode = nodes.literal_block(text, text, source=fn)
+    retnode.line = 1
+    if options.get('language', ''):
+        retnode['language'] = options['language']
+    if 'linenos' in options:
+        retnode['linenos'] = True
+    state.document.settings.env.note_dependency(rel_fn)
    return [retnode]

 literalinclude_directive.options = {'linenos': directives.flag,
-                                    'language': directives.unchanged,
-                                    'encoding': directives.encoding}
+                                    'language': directives.unchanged_required,
+                                    'encoding': directives.encoding,
+                                    'pyobject': directives.unchanged_required,
+                                    'lines': directives.unchanged_required,
+                                    'start-after': directives.unchanged_required,
+                                    'end-before': directives.unchanged_required,
+                                    }
 literalinclude_directive.content = 0
 literalinclude_directive.arguments = (1, 0, 0)
 directives.register_directive('literalinclude', literalinclude_directive)
--- a/sphinx/environment.py
+++ b/sphinx/environment.py
@ -5,11 +5,7 @@

    Global creation environment.

-<<<<<<< local
-    :copyright: 2007-2009 by Georg Brandl.
-=======
    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
->>>>>>> other
    :license: BSD, see LICENSE for details.
 """

--- a/sphinx/ext/autodoc.py
+++ b/sphinx/ext/autodoc.py
@ -21,7 +21,9 @@ from docutils import nodes
 from docutils.parsers.rst import directives
 from docutils.statemachine import ViewList

-from sphinx.util import rpartition, nested_parse_with_titles
+from sphinx.util import rpartition, nested_parse_with_titles, force_decode
+from sphinx.pycode import ModuleAnalyzer, PycodeError
+from sphinx.util.docstrings import prepare_docstring

 clstypes = (type, ClassType)
 try:
@ -29,8 +31,6 @@ try:
 except NameError:
    base_exception = Exception

-_charset_re = re.compile(r'coding[:=]\s*([-\w.]+)')
-_module_charsets = {}

 py_ext_sig_re = re.compile(
    r'''^ ([\w.]+::)?            # explicit module name
@ -171,56 +171,6 @@ def isdescriptor(x):
    return False


-def prepare_docstring(s):
-    """
-    Convert a docstring into lines of parseable reST.  Return it as a list of
-    lines usable for inserting into a docutils ViewList (used as argument
-    of nested_parse().)  An empty line is added to act as a separator between
-    this docstring and following content.
-    """
-    lines = s.expandtabs().splitlines()
-    # Find minimum indentation of any non-blank lines after first line.
-    margin = sys.maxint
-    for line in lines[1:]:
-        content = len(line.lstrip())
-        if content:
-            indent = len(line) - content
-            margin = min(margin, indent)
-    # Remove indentation.
-    if lines:
-        lines[0] = lines[0].lstrip()
-    if margin < sys.maxint:
-        for i in range(1, len(lines)): lines[i] = lines[i][margin:]
-    # Remove any leading blank lines.
-    while lines and not lines[0]:
-        lines.pop(0)
-    # make sure there is an empty line at the end
-    if lines and lines[-1]:
-        lines.append('')
-    return lines
-
-
-def get_module_charset(module):
-    """Return the charset of the given module (cached in _module_charsets)."""
-    if module in _module_charsets:
-        return _module_charsets[module]
-    try:
-        filename = __import__(module, None, None, ['foo']).__file__
-    except (ImportError, AttributeError):
-        return None
-    if filename[-4:].lower() in ('.pyc', '.pyo'):
-        filename = filename[:-1]
-    for line in [linecache.getline(filename, x) for x in (1, 2)]:
-        match = _charset_re.search(line)
-        if match is not None:
-            charset = match.group(1)
-            break
-    else:
-        charset = 'ascii'
-    _module_charsets[module] = charset
-    return charset
-
-
 class RstGenerator(object):
    def __init__(self, options, document, lineno):
        self.options = options
@ -234,15 +184,19 @@ class RstGenerator(object):
    def warn(self, msg):
        self.warnings.append(self.reporter.warning(msg, line=self.lineno))

-    def get_doc(self, what, name, obj):
-        """Format and yield lines of the docstring(s) for the object."""
+    def get_doc(self, what, obj, encoding=None):
+        """Decode and return lines of the docstring(s) for the object."""
        docstrings = []
+
+        # add the regular docstring if present
        if getattr(obj, '__doc__', None):
            docstrings.append(obj.__doc__)
-        # skip some lines in module docstrings if configured
+
+        # skip some lines in module docstrings if configured (deprecated!)
        if what == 'module' and self.env.config.automodule_skip_lines and docstrings:
            docstrings[0] = '\n'.join(docstrings[0].splitlines()
                                      [self.env.config.automodule_skip_lines:])
+
        # for classes, what the "docstring" is can be controlled via an option
        if what in ('class', 'exception'):
            content = self.env.config.autoclass_content
@ -258,24 +212,13 @@ class RstGenerator(object):
                        docstrings.append(initdocstring)
            # the default is only the class docstring

-        # decode the docstrings using the module's source encoding
-        charset = None
-        module = getattr(obj, '__module__', None)
-        if module is not None:
-            charset = get_module_charset(module)
+        # make sure we have Unicode docstrings, then sanitize and split into lines
+        return [prepare_docstring(force_decode(docstring, encoding))
+                for docstring in docstrings]

-        for docstring in docstrings:
-            if isinstance(docstring, str):
-                if charset:
-                    docstring = docstring.decode(charset)
-                else:
-                    try:
-                        # try decoding with utf-8, should only work for real UTF-8
-                        docstring = docstring.decode('utf-8')
-                    except UnicodeError:
-                        # last resort -- can't fail
-                        docstring = docstring.decode('latin1')
-            docstringlines = prepare_docstring(docstring)
+    def process_doc(self, docstrings, what, name, obj):
+        """Let the user process the docstrings."""
+        for docstringlines in docstrings:
            if self.env.app:
                # let extensions preprocess docstrings
                self.env.app.emit('autodoc-process-docstring',
@ -313,7 +256,7 @@ class RstGenerator(object):
                          'for automodule %s' % name)
            return (path or '') + base, [], None, None

-        elif what in ('exception', 'function', 'class'):
+        elif what in ('exception', 'function', 'class', 'data'):
            if mod is None:
                if path:
                    mod = path.rstrip('.')
@ -424,14 +367,8 @@ class RstGenerator(object):

        # now, import the module and get object to document
        try:
-            todoc = module = __import__(mod, None, None, ['foo'])
-            if hasattr(module, '__file__') and module.__file__:
-                modfile = module.__file__
-                if modfile[-4:].lower() in ('.pyc', '.pyo'):
-                    modfile = modfile[:-1]
-                self.filename_set.add(modfile)
-            else:
-                modfile = None  # e.g. for builtin and C modules
+            __import__(mod)
+            todoc = module = sys.modules[mod]
            for part in objpath:
                todoc = getattr(todoc, part)
        except (ImportError, AttributeError), err:
@ -440,12 +377,26 @@ class RstGenerator(object):
                      (what, str(fullname), err))
            return

+        # try to also get a source code analyzer for attribute docs
+        try:
+            analyzer = ModuleAnalyzer.for_module(mod)
+        except PycodeError, err:
+            # no source file -- e.g. for builtin and C modules
+            analyzer = None
+        else:
+            self.filename_set.add(analyzer.srcname)
+
        # check __module__ of object if wanted (for members not given explicitly)
        if check_module:
            if hasattr(todoc, '__module__'):
                if todoc.__module__ != mod:
                    return

+        # make sure that the result starts with an empty line.  This is
+        # necessary for some situations where another directive preprocesses
+        # reST and no starting newline is present
+        self.result.append(u'', '')
+
        # format the object's signature, if any
        try:
            sig = self.format_signature(what, fullname, todoc, args, retann)
@ -454,11 +405,6 @@ class RstGenerator(object):
                      (fullname, err))
            sig = ''

-        # make sure that the result starts with an empty line.  This is
-        # necessary for some situations where another directive preprocesses
-        # reST and no starting newline is present
-        self.result.append(u'', '')
-
        # now, create the directive header
        if what == 'method':
            directive = get_method_type(todoc)
@ -484,13 +430,14 @@ class RstGenerator(object):
            self.result.append(indent + u'   :noindex:', '<autodoc>')
        self.result.append(u'', '<autodoc>')

+        # add inheritance info, if wanted
        if self.options.show_inheritance and what in ('class', 'exception'):
            if len(todoc.__bases__):
                bases = [b.__module__ == '__builtin__' and
                         u':class:`%s`' % b.__name__ or
                         u':class:`%s.%s`' % (b.__module__, b.__name__)
                         for b in todoc.__bases__]
-                self.result.append(indent + u'   Bases: %s' % ', '.join(bases),
+                self.result.append(indent + _(u'   Bases: %s') % ', '.join(bases),
                                   '<autodoc>')
                self.result.append(u'', '<autodoc>')

@ -498,17 +445,31 @@ class RstGenerator(object):
        if what != 'module':
            indent += u'   '

-        if modfile:
-            sourcename = '%s:docstring of %s' % (modfile, fullname)
+        # add content from attribute documentation
+        if analyzer:
+            sourcename = '%s:docstring of %s' % (analyzer.srcname, fullname)
+            attr_docs = analyzer.find_attr_docs()
+            if what in ('data', 'attribute'):
+                key = ('.'.join(objpath[:-1]), objpath[-1])
+                if key in attr_docs:
+                    no_docstring = True
+                    docstrings = [attr_docs[key]]
+                    for i, line in enumerate(self.process_doc(docstrings, what,
+                                                              fullname, todoc)):
+                        self.result.append(indent + line, sourcename, i)
        else:
            sourcename = 'docstring of %s' % fullname
+            attr_docs = {}

        # add content from docstrings
        if not no_docstring:
-            for i, line in enumerate(self.get_doc(what, fullname, todoc)):
+            encoding = analyzer and analyzer.encoding
+            docstrings = self.get_doc(what, todoc, encoding)
+            for i, line in enumerate(self.process_doc(docstrings, what,
+                                                      fullname, todoc)):
                self.result.append(indent + line, sourcename, i)

-        # add source content, if present
+        # add additional content (e.g. from document), if present
        if add_content:
            for line, src in zip(add_content.data, add_content.items):
                self.result.append(indent + line, src[0], src[1])
@ -523,10 +484,10 @@ class RstGenerator(object):
        if objpath:
            self.env.autodoc_current_class = objpath[0]

-        # add members, if possible
-        _all = members == ['__all__']
+        # look for members to include
+        want_all_members = members == ['__all__']
        members_check_module = False
-        if _all:
+        if want_all_members:
            # unqualified :members: given
            if what == 'module':
                if hasattr(todoc, '__all__'):
@ -555,14 +516,28 @@ class RstGenerator(object):
        else:
            all_members = [(mname, getattr(todoc, mname)) for mname in members]

+        # search for members in source code too
+        namespace = '.'.join(objpath)  # will be empty for modules
+
        for (membername, member) in all_members:
-            if _all and membername.startswith('_'):
+            # if isattr is True, the member is documented as an attribute
+            isattr = False
+            # if content is not None, no extra content from docstrings will be added
+            content = None
+
+            if want_all_members and membername.startswith('_'):
                # ignore members whose name starts with _ by default
                skip = True
            else:
-                # ignore undocumented members if :undoc-members: is not given
-                doc = getattr(member, '__doc__', None)
-                skip = not self.options.undoc_members and not doc
+                if (namespace, membername) in attr_docs:
+                    # keep documented attributes
+                    skip = False
+                    isattr = True
+                else:
+                    # ignore undocumented members if :undoc-members: is not given
+                    doc = getattr(member, '__doc__', None)
+                    skip = not self.options.undoc_members and not doc
+
            # give the user a chance to decide whether this member should be skipped
            if self.env.app:
                # let extensions preprocess docstrings
@ -573,10 +548,12 @@ class RstGenerator(object):
            if skip:
                continue

-            content = None
+            # determine member type
            if what == 'module':
                if isinstance(member, (FunctionType, BuiltinFunctionType)):
                    memberwhat = 'function'
+                elif isattr:
+                    memberwhat = 'attribute'
                elif isinstance(member, clstypes):
                    if member.__name__ != membername:
                        # assume it's aliased
@ -588,10 +565,13 @@ class RstGenerator(object):
                    else:
                        memberwhat = 'class'
                else:
-                    # XXX: todo -- attribute docs
                    continue
            else:
-                if isinstance(member, clstypes):
+                if inspect.isroutine(member):
+                    memberwhat = 'method'
+                elif isattr:
+                    memberwhat = 'attribute'
+                elif isinstance(member, clstypes):
                    if member.__name__ != membername:
                        # assume it's aliased
                        memberwhat = 'attribute'
@ -599,13 +579,11 @@ class RstGenerator(object):
                                           source='')
                    else:
                        memberwhat = 'class'
-                elif inspect.isroutine(member):
-                    memberwhat = 'method'
                elif isdescriptor(member):
                    memberwhat = 'attribute'
                else:
-                    # XXX: todo -- attribute docs
                    continue
+
            # give explicitly separated module name, so that members of inner classes
            # can be documented
            full_membername = mod + '::' + '.'.join(objpath + [membername])
--- a/sphinx/jinja2glue.py
+++ b/sphinx/jinja2glue.py
@ -5,7 +5,7 @@

    Glue code for the jinja2 templating engine.

-    :copyright: 2008 by Sebastian Wiesner.
+    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
    :license: BSD, see LICENSE for details.
 """

--- a/sphinx/pycode/Grammar.txt
+++ b/sphinx/pycode/Grammar.txt
@ -0,0 +1,155 @@
+# Grammar for Python
+
+# Note:  Changing the grammar specified in this file will most likely
+#        require corresponding changes in the parser module
+#        (../Modules/parsermodule.c).  If you can't make the changes to
+#        that module yourself, please co-ordinate the required changes
+#        with someone who can; ask around on python-dev for help.  Fred
+#        Drake <fdrake@acm.org> will probably be listening there.
+
+# NOTE WELL: You should also follow all the steps listed in PEP 306,
+# "How to Change Python's Grammar"
+
+# Commands for Kees Blom's railroad program
+#diagram:token NAME
+#diagram:token NUMBER
+#diagram:token STRING
+#diagram:token NEWLINE
+#diagram:token ENDMARKER
+#diagram:token INDENT
+#diagram:output\input python.bla
+#diagram:token DEDENT
+#diagram:output\textwidth 20.04cm\oddsidemargin  0.0cm\evensidemargin 0.0cm
+#diagram:rules
+
+# Start symbols for the grammar:
+#	file_input is a module or sequence of commands read from an input file;
+#	single_input is a single interactive statement;
+#	eval_input is the input for the eval() and input() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+file_input: (NEWLINE | stmt)* ENDMARKER
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef)
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+parameters: '(' [typedargslist] ')'
+typedargslist: ((tfpdef ['=' test] ',')*
+                ('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname)
+                | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
+tname: NAME [':' test]
+tfpdef: tname | '(' tfplist ')'
+tfplist: tfpdef (',' tfpdef)* [',']
+varargslist: ((vfpdef ['=' test] ',')*
+              ('*' [vname] (',' vname ['=' test])*  [',' '**' vname] | '**' vname)
+              | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
+vname: NAME
+vfpdef: vname | '(' vfplist ')'
+vfplist: vfpdef (',' vfpdef)* [',']
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | exec_stmt | assert_stmt)
+expr_stmt: testlist (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist))*)
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal assignments, additional restrictions enforced by the interpreter
+print_stmt: 'print' ( [ test (',' test)* [','] ] |
+                      '>>' test [ (',' test)+ [','] ] )
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+import_from: ('from' ('.'* dotted_name | '.'+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
+exec_stmt: 'exec' expr ['in' test [',' test]]
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+	    ['else' ':' suite]
+	    ['finally' ':' suite] |
+	   'finally' ':' suite))
+with_stmt: 'with' test [ with_var ] ':' suite
+with_var: 'as' expr
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test [(',' | 'as') test]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+# Backward compatibility cruft to support:
+# [ x for x in lambda: True, lambda: False if x() ]
+# even while also allowing:
+# lambda x: 5 if x else 2
+# (But not a mix of the two)
+testlist_safe: old_test [(',' old_test)+ [',']]
+old_test: or_test | old_lambdef
+old_lambdef: 'lambda' [varargslist] ':' old_test
+
+test: or_test ['if' or_test 'else' test] | lambdef
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom trailer* ['**' factor]
+atom: ('(' [yield_expr|testlist_gexp] ')' |
+       '[' [listmaker] ']' |
+       '{' [dictsetmaker] '}' |
+       '`' testlist1 '`' |
+       NAME | NUMBER | STRING+ | '.' '.' '.')
+listmaker: test ( comp_for | (',' test)* [','] )
+testlist_gexp: test ( comp_for | (',' test)* [','] )
+lambdef: 'lambda' [varargslist] ':' test
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: expr (',' expr)* [',']
+testlist: test (',' test)* [',']
+dictsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
+                (test (comp_for | (',' test)* [','])) )
+
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+arglist: (argument ',')* (argument [',']
+                         |'*' test (',' argument)* [',' '**' test] 
+                         |'**' test)
+argument: test [comp_for] | test '=' test  # Really [keyword '='] test
+
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist 'in' testlist_safe [comp_iter]
+comp_if: 'if' old_test [comp_iter]
+
+testlist1: test (',' test)*
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [testlist]
--- a/sphinx/pycode/init.py
+++ b/sphinx/pycode/init.py
@ -0,0 +1,303 @@
+# -*- coding: utf-8 -*-
+"""
+    sphinx.pycode
+    ~~~~~~~~~~~~~
+
+    Utilities parsing and analyzing Python code.
+
+    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import re
+import sys
+from os import path
+from cStringIO import StringIO
+
+from sphinx.pycode import nodes
+from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
+from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
+
+
+# load the Python grammar
+_grammarfile = path.join(path.dirname(__file__), 'Grammar.txt')
+pygrammar = driver.load_grammar(_grammarfile)
+pydriver = driver.Driver(pygrammar, convert=nodes.convert)
+
+# an object with attributes corresponding to token and symbol names
+class sym: pass
+for k, v in pygrammar.symbol2number.iteritems():
+    setattr(sym, k, v)
+for k, v in token.tok_name.iteritems():
+    setattr(sym, v, k)
+
+# a dict mapping terminal and nonterminal numbers to their names
+number2name = pygrammar.number2symbol.copy()
+number2name.update(token.tok_name)
+
+
+# a regex to recognize coding cookies
+_coding_re = re.compile(r'coding[:=]\s*([-\w.]+)')
+
+_eq = nodes.Leaf(token.EQUAL, '=')
+
+
+class AttrDocVisitor(nodes.NodeVisitor):
+    """
+    Visitor that collects docstrings for attribute assignments on toplevel and
+    in classes.
+
+    The docstrings can either be in special '#:' comments before the assignment
+    or in a docstring after it.
+    """
+    def init(self, scope, encoding):
+        self.scope = scope
+        self.encoding = encoding
+        self.namespace = []
+        self.collected = {}
+
+    def visit_classdef(self, node):
+        self.namespace.append(node[1].value)
+        self.generic_visit(node)
+        self.namespace.pop()
+
+    def visit_expr_stmt(self, node):
+        """Visit an assignment which may have a special comment before it."""
+        if _eq not in node.children:
+            # not an assignment (we don't care for augmented assignments)
+            return
+        pnode = node[0]
+        prefix = pnode.get_prefix()
+        # if the assignment is the first statement on a new indentation
+        # level, its preceding whitespace and comments are not assigned
+        # to that token, but the first INDENT or DEDENT token
+        while not prefix:
+            pnode = pnode.get_prev_leaf()
+            if not pnode or pnode.type not in (token.INDENT, token.DEDENT):
+                break
+            prefix = pnode.get_prefix()
+        prefix = prefix.decode(self.encoding)
+        docstring = prepare_commentdoc(prefix)
+        if docstring:
+            self.add_docstring(node, docstring)
+
+    def visit_simple_stmt(self, node):
+        """Visit a docstring statement which may have an assignment before."""
+        if node[0].type != token.STRING:
+            # not a docstring; but still need to visit children
+            return self.generic_visit(node)
+        prev = node.get_prev_sibling()
+        if not prev:
+            return
+        if prev.type == sym.simple_stmt and \
+               prev[0].type == sym.expr_stmt and _eq in prev[0].children:
+            # need to "eval" the string because it's returned in its original form
+            docstring = literals.evalString(node[0].value, self.encoding)
+            docstring = prepare_docstring(docstring)
+            self.add_docstring(prev[0], docstring)
+
+    def visit_funcdef(self, node):
+        # don't descend into functions -- nothing interesting there
+        return
+
+    def add_docstring(self, node, docstring):
+        # add an item for each assignment target
+        for i in range(0, len(node) - 1, 2):
+            target = node[i]
+            if target.type != token.NAME:
+                # don't care about complex targets
+                continue
+            namespace = '.'.join(self.namespace)
+            if namespace.startswith(self.scope):
+                self.collected[namespace, target.value] = docstring
+
+
+class PycodeError(Exception):
+    def __str__(self):
+        res = self.args[0]
+        if len(self.args) > 1:
+            res += ' (exception was: %r)' % self.args[1]
+        return res
+
+
+class ModuleAnalyzer(object):
+    # cache for analyzer objects -- caches both by module and file name
+    cache = {}
+
+    @classmethod
+    def for_string(cls, string, modname, srcname='<string>'):
+        return cls(StringIO(string), modname, srcname)
+
+    @classmethod
+    def for_file(cls, filename, modname):
+        if ('file', filename) in cls.cache:
+            return cls.cache['file', filename]
+        try:
+            fileobj = open(filename, 'r')
+        except Exception, err:
+            raise PycodeError('error opening %r' % filename, err)
+        obj = cls(fileobj, modname, filename)
+        cls.cache['file', filename] = obj
+        return obj
+
+    @classmethod
+    def for_module(cls, modname):
+        if ('module', modname) in cls.cache:
+            entry = cls.cache['module', modname]
+            if isinstance(entry, PycodeError):
+                raise entry
+            return entry
+
+        try:
+            if modname not in sys.modules:
+                try:
+                    __import__(modname)
+                except ImportError, err:
+                    raise PycodeError('error importing %r' % modname, err)
+            mod = sys.modules[modname]
+            if hasattr(mod, '__loader__'):
+                try:
+                    source = mod.__loader__.get_source(modname)
+                except Exception, err:
+                    raise PycodeError('error getting source for %r' % modname, err)
+                obj = cls.for_string(source, modname)
+                cls.cache['module', modname] = obj
+                return obj
+            filename = getattr(mod, '__file__', None)
+            if filename is None:
+                raise PycodeError('no source found for module %r' % modname)
+            filename = path.normpath(filename)
+            lfilename = filename.lower()
+            if lfilename.endswith('.pyo') or lfilename.endswith('.pyc'):
+                filename = filename[:-1]
+            elif not lfilename.endswith('.py'):
+                raise PycodeError('source is not a .py file: %r' % filename)
+            if not path.isfile(filename):
+                raise PycodeError('source file is not present: %r' % filename)
+            obj = cls.for_file(filename, modname)
+        except PycodeError, err:
+            cls.cache['module', modname] = err
+            raise
+        cls.cache['module', modname] = obj
+        return obj
+
+    def __init__(self, source, modname, srcname):
+        # name of the module
+        self.modname = modname
+        # name of the source file
+        self.srcname = srcname
+        # file-like object yielding source lines
+        self.source = source
+
+        # will be filled by tokenize()
+        self.tokens = None
+        # will be filled by parse()
+        self.parsetree = None
+        # will be filled by find_attr_docs()
+        self.attr_docs = None
+        # will be filled by find_tags()
+        self.tags = None
+
+    def tokenize(self):
+        """Generate tokens from the source."""
+        if self.tokens is not None:
+            return
+        self.tokens = list(tokenize.generate_tokens(self.source.readline))
+        self.source.close()
+
+    def parse(self):
+        """Parse the generated source tokens."""
+        if self.parsetree is not None:
+            return
+        self.tokenize()
+        self.parsetree = pydriver.parse_tokens(self.tokens)
+        # find the source code encoding
+        encoding = sys.getdefaultencoding()
+        comments = self.parsetree.get_prefix()
+        for line in comments.splitlines()[:2]:
+            match = _coding_re.search(line)
+            if match is not None:
+                encoding = match.group(1)
+                break
+        self.encoding = encoding
+
+    def find_attr_docs(self, scope=''):
+        """Find class and module-level attributes and their documentation."""
+        if self.attr_docs is not None:
+            return self.attr_docs
+        self.parse()
+        attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)
+        attr_visitor.visit(self.parsetree)
+        self.attr_docs = attr_visitor.collected
+        return attr_visitor.collected
+
+    def find_tags(self):
+        """Find class, function and method definitions and their location."""
+        if self.tags is not None:
+            return self.tags
+        self.tokenize()
+        result = {}
+        namespace = []
+        stack = []
+        indent = 0
+        defline = False
+        expect_indent = False
+        def tokeniter(ignore = (token.COMMENT, token.NL)):
+            for tokentup in self.tokens:
+                if tokentup[0] not in ignore:
+                    yield tokentup
+        tokeniter = tokeniter()
+        for type, tok, spos, epos, line in tokeniter:
+            if expect_indent:
+                if type != token.INDENT:
+                    # no suite -- one-line definition
+                    assert stack
+                    dtype, fullname, startline, _ = stack.pop()
+                    endline = epos[0]
+                    namespace.pop()
+                    result[fullname] = (dtype, startline, endline)
+                expect_indent = False
+            if tok in ('def', 'class'):
+                name = tokeniter.next()[1]
+                namespace.append(name)
+                fullname = '.'.join(namespace)
+                stack.append((tok, fullname, spos[0], indent))
+                defline = True
+            elif type == token.INDENT:
+                expect_indent = False
+                indent += 1
+            elif type == token.DEDENT:
+                indent -= 1
+                # if the stacklevel is the same as it was before the last
+                # def/class block, this dedent closes that block
+                if stack and indent == stack[-1][3]:
+                    dtype, fullname, startline, _ = stack.pop()
+                    endline = spos[0]
+                    namespace.pop()
+                    result[fullname] = (dtype, startline, endline)
+            elif type == token.NEWLINE:
+                # if this line contained a definition, expect an INDENT to start the
+                # suite; if there is no such INDENT it's a one-line definition
+                if defline:
+                    defline = False
+                    expect_indent = True
+        self.tags = result
+        return result
+
+
+if __name__ == '__main__':
+    import time, pprint
+    x0 = time.time()
+    #ma = ModuleAnalyzer.for_file(__file__.rstrip('c'), 'sphinx.builders.html')
+    ma = ModuleAnalyzer.for_file('sphinx/builders/html.py', 'sphinx.builders.html')
+    ma.tokenize()
+    x1 = time.time()
+    ma.parse()
+    x2 = time.time()
+    #for (ns, name), doc in ma.find_attr_docs().iteritems():
+    #    print '>>', ns, name
+    #    print '\n'.join(doc)
+    pprint.pprint(ma.find_tags())
+    x3 = time.time()
+    #print nodes.nice_repr(ma.parsetree, number2name)
+    print "tokenizing %.4f, parsing %.4f, finding %.4f" % (x1-x0, x2-x1, x3-x2)
--- a/sphinx/pycode/nodes.py
+++ b/sphinx/pycode/nodes.py
@ -0,0 +1,202 @@
+# -*- coding: utf-8 -*-
+"""
+    sphinx.pycode.nodes
+    ~~~~~~~~~~~~~~~~~~~
+
+    Parse tree node implementations.
+
+    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+
+class BaseNode(object):
+    """
+    Node superclass for both terminal and nonterminal nodes.
+    """
+
+    def _eq(self, other):
+        raise NotImplementedError
+
+    def __eq__(self, other):
+        if self.__class__ is not other.__class__:
+            return NotImplemented
+        return self._eq(other)
+
+    def __ne__(self, other):
+        if self.__class__ is not other.__class__:
+            return NotImplemented
+        return not self._eq(other)
+
+    def get_prev_sibling(self):
+        """Return previous child in parent's children, or None."""
+        if self.parent is None:
+            return None
+        for i, child in enumerate(self.parent.children):
+            if child is self:
+                if i == 0:
+                    return None
+                return self.parent.children[i-1]
+
+    def get_next_sibling(self):
+        """Return next child in parent's children, or None."""
+        if self.parent is None:
+            return None
+        for i, child in enumerate(self.parent.children):
+            if child is self:
+                try:
+                    return self.parent.children[i+1]
+                except IndexError:
+                    return None
+
+    def get_prev_leaf(self):
+        """Return the leaf node that precedes this node in the parse tree."""
+        def last_child(node):
+            if isinstance(node, Leaf):
+                return node
+            elif not node.children:
+                return None
+            else:
+                return last_child(node.children[-1])
+        if self.parent is None:
+            return None
+        prev = self.get_prev_sibling()
+        if isinstance(prev, Leaf):
+            return prev
+        elif prev is not None:
+            return last_child(prev)
+        return self.parent.get_prev_leaf()
+
+    def get_next_leaf(self):
+        """Return self if leaf, otherwise the leaf node that succeeds this
+        node in the parse tree.
+        """
+        node = self
+        while not isinstance(node, Leaf):
+            assert node.children
+            node = node.children[0]
+        return node
+
+    def get_lineno(self):
+        """Return the line number which generated the invocant node."""
+        return self.get_next_leaf().lineno
+
+    def get_prefix(self):
+        """Return the prefix of the next leaf node."""
+        # only leaves carry a prefix
+        return self.get_next_leaf().prefix
+
+
+class Node(BaseNode):
+    """
+    Node implementation for nonterminals.
+    """
+
+    def __init__(self, type, children, context=None):
+        # type of nonterminals is >= 256
+        # assert type >= 256, type
+        self.type = type
+        self.children = list(children)
+        for ch in self.children:
+            # assert ch.parent is None, repr(ch)
+            ch.parent = self
+
+    def __repr__(self):
+        return '%s(%s, %r)' % (self.__class__.__name__, self.type, self.children)
+
+    def __str__(self):
+        """This reproduces the input source exactly."""
+        return ''.join(map(str, self.children))
+
+    def _eq(self, other):
+        return (self.type, self.children) == (other.type, other.children)
+
+    # support indexing the node directly instead of .children
+
+    def __getitem__(self, index):
+        return self.children[index]
+
+    def __iter__(self):
+        return iter(self.children)
+
+    def __len__(self):
+        return len(self.children)
+
+
+class Leaf(BaseNode):
+    """
+    Node implementation for leaf nodes (terminals).
+    """
+    prefix = ''  # Whitespace and comments preceding this token in the input
+    lineno = 0   # Line where this token starts in the input
+    column = 0   # Column where this token tarts in the input
+
+    def __init__(self, type, value, context=None):
+        # type of terminals is below 256
+        # assert 0 <= type < 256, type
+        self.type = type
+        self.value = value
+        if context is not None:
+            self.prefix, (self.lineno, self.column) = context
+
+    def __repr__(self):
+        return '%s(%r, %r, %r)' % (self.__class__.__name__,
+                                   self.type, self.value, self.prefix)
+
+    def __str__(self):
+        """This reproduces the input source exactly."""
+        return self.prefix + str(self.value)
+
+    def _eq(self, other):
+        """Compares two nodes for equality."""
+        return (self.type, self.value) == (other.type, other.value)
+
+
+def convert(grammar, raw_node):
+    """Convert raw node to a Node or Leaf instance."""
+    type, value, context, children = raw_node
+    if children or type in grammar.number2symbol:
+        # If there's exactly one child, return that child instead of
+        # creating a new node.
+        if len(children) == 1:
+            return children[0]
+        return Node(type, children, context=context)
+    else:
+        return Leaf(type, value, context=context)
+
+
+def nice_repr(node, number2name, prefix=False):
+    def _repr(node):
+        if isinstance(node, Leaf):
+            return "%s(%r)" % (number2name[node.type], node.value)
+        else:
+            return "%s(%s)" % (number2name[node.type],
+                               ', '.join(map(_repr, node.children)))
+    def _prepr(node):
+        if isinstance(node, Leaf):
+            return "%s(%r, %r)" % (number2name[node.type], node.prefix, node.value)
+        else:
+            return "%s(%s)" % (number2name[node.type],
+                               ', '.join(map(_prepr, node.children)))
+    return (prefix and _prepr or _repr)(node)
+
+
+class NodeVisitor(object):
+    def __init__(self, number2name, *args):
+        self.number2name = number2name
+        self.init(*args)
+
+    def init(self, *args):
+        pass
+
+    def visit(self, node):
+        """Visit a node."""
+        method = 'visit_' + self.number2name[node.type]
+        visitor = getattr(self, method, self.generic_visit)
+        return visitor(node)
+
+    def generic_visit(self, node):
+        """Called if no explicit visitor function exists for a node."""
+        if isinstance(node, Node):
+            for child in node:
+                self.visit(child)
--- a/sphinx/pycode/pgen2/init.py
+++ b/sphinx/pycode/pgen2/init.py
@ -0,0 +1,4 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""The pgen2 package."""
--- a/sphinx/pycode/pgen2/driver.py
+++ b/sphinx/pycode/pgen2/driver.py
@ -0,0 +1,145 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Parser driver.
+
+This provides a high-level interface to parse a file into a syntax tree.
+
+"""
+
+__author__ = "Guido van Rossum <guido@python.org>"
+
+__all__ = ["Driver", "load_grammar"]
+
+# Python imports
+import os
+import logging
+import sys
+
+# Pgen imports
+from sphinx.pycode.pgen2 import grammar, parse, token, tokenize, pgen
+
+
+class Driver(object):
+
+    def __init__(self, grammar, convert=None, logger=None):
+        self.grammar = grammar
+        if logger is None:
+            logger = logging.getLogger()
+        self.logger = logger
+        self.convert = convert
+
+    def parse_tokens(self, tokens, debug=False):
+        """Parse a series of tokens and return the syntax tree."""
+        # XXX Move the prefix computation into a wrapper around tokenize.
+        p = parse.Parser(self.grammar, self.convert)
+        p.setup()
+        lineno = 1
+        column = 0
+        type = value = start = end = line_text = None
+        prefix = ""
+        opmap = grammar.opmap
+        for type, value, start, end, line_text in tokens:
+            if start != (lineno, column):
+                assert (lineno, column) <= start, ((lineno, column), start)
+                s_lineno, s_column = start
+                if lineno < s_lineno:
+                    prefix += "\n" * (s_lineno - lineno)
+                    lineno = s_lineno
+                    column = 0
+                if column < s_column:
+                    prefix += line_text[column:s_column]
+                    column = s_column
+            if type in (tokenize.COMMENT, tokenize.NL):
+                prefix += value
+                lineno, column = end
+                if value.endswith("\n"):
+                    lineno += 1
+                    column = 0
+                continue
+            if type == token.OP:
+                type = opmap[value]
+            # if debug:
+            #     self.logger.debug("%s %r (prefix=%r)",
+            #                       token.tok_name[type], value, prefix)
+            if p.addtoken(type, value, (prefix, start)):
+                # if debug:
+                #     self.logger.debug("Stop.")
+                break
+            prefix = ""
+            lineno, column = end
+            if value.endswith("\n"):
+                lineno += 1
+                column = 0
+        else:
+            # We never broke out -- EOF is too soon (how can this happen???)
+            raise parse.ParseError("incomplete input", type, value, line_text)
+        return p.rootnode
+
+    def parse_stream_raw(self, stream, debug=False):
+        """Parse a stream and return the syntax tree."""
+        tokens = tokenize.generate_tokens(stream.readline)
+        return self.parse_tokens(tokens, debug)
+
+    def parse_stream(self, stream, debug=False):
+        """Parse a stream and return the syntax tree."""
+        return self.parse_stream_raw(stream, debug)
+
+    def parse_file(self, filename, debug=False):
+        """Parse a file and return the syntax tree."""
+        stream = open(filename)
+        try:
+            return self.parse_stream(stream, debug)
+        finally:
+            stream.close()
+
+    def parse_string(self, text, debug=False):
+        """Parse a string and return the syntax tree."""
+        tokens = tokenize.generate_tokens(generate_lines(text).next)
+        return self.parse_tokens(tokens, debug)
+
+
+def generate_lines(text):
+    """Generator that behaves like readline without using StringIO."""
+    for line in text.splitlines(True):
+        yield line
+    while True:
+        yield ""
+
+
+def load_grammar(gt="Grammar.txt", gp=None,
+                 save=True, force=False, logger=None):
+    """Load the grammar (maybe from a pickle)."""
+    if logger is None:
+        logger = logging.getLogger()
+    if gp is None:
+        head, tail = os.path.splitext(gt)
+        if tail == ".txt":
+            tail = ""
+        gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+    if force or not _newer(gp, gt):
+        logger.info("Generating grammar tables from %s", gt)
+        g = pgen.generate_grammar(gt)
+        if save:
+            logger.info("Writing grammar tables to %s", gp)
+            try:
+                g.dump(gp)
+            except IOError, e:
+                logger.info("Writing failed:"+str(e))
+    else:
+        g = grammar.Grammar()
+        g.load(gp)
+    return g
+
+
+def _newer(a, b):
+    """Inquire whether file a was written since file b."""
+    if not os.path.exists(a):
+        return False
+    if not os.path.exists(b):
+        return True
+    return os.path.getmtime(a) >= os.path.getmtime(b)
--- a/sphinx/pycode/pgen2/grammar.py
+++ b/sphinx/pycode/pgen2/grammar.py
@ -0,0 +1,171 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""This module defines the data structures used to represent a grammar.
+
+These are a bit arcane because they are derived from the data
+structures used by Python's 'pgen' parser generator.
+
+There's also a table here mapping operators to their names in the
+token module; the Python tokenize module reports all operators as the
+fallback token code OP, but the parser needs the actual token code.
+
+"""
+
+# Python imports
+import pickle
+
+# Local imports
+from sphinx.pycode.pgen2 import token, tokenize
+
+
+class Grammar(object):
+    """Pgen parsing tables tables conversion class.
+
+    Once initialized, this class supplies the grammar tables for the
+    parsing engine implemented by parse.py.  The parsing engine
+    accesses the instance variables directly.  The class here does not
+    provide initialization of the tables; several subclasses exist to
+    do this (see the conv and pgen modules).
+
+    The load() method reads the tables from a pickle file, which is
+    much faster than the other ways offered by subclasses.  The pickle
+    file is written by calling dump() (after loading the grammar
+    tables using a subclass).  The report() method prints a readable
+    representation of the tables to stdout, for debugging.
+
+    The instance variables are as follows:
+
+    symbol2number -- a dict mapping symbol names to numbers.  Symbol
+                     numbers are always 256 or higher, to distinguish
+                     them from token numbers, which are between 0 and
+                     255 (inclusive).
+
+    number2symbol -- a dict mapping numbers to symbol names;
+                     these two are each other's inverse.
+
+    states        -- a list of DFAs, where each DFA is a list of
+                     states, each state is is a list of arcs, and each
+                     arc is a (i, j) pair where i is a label and j is
+                     a state number.  The DFA number is the index into
+                     this list.  (This name is slightly confusing.)
+                     Final states are represented by a special arc of
+                     the form (0, j) where j is its own state number.
+
+    dfas          -- a dict mapping symbol numbers to (DFA, first)
+                     pairs, where DFA is an item from the states list
+                     above, and first is a set of tokens that can
+                     begin this grammar rule (represented by a dict
+                     whose values are always 1).
+
+    labels        -- a list of (x, y) pairs where x is either a token
+                     number or a symbol number, and y is either None
+                     or a string; the strings are keywords.  The label
+                     number is the index in this list; label numbers
+                     are used to mark state transitions (arcs) in the
+                     DFAs.
+
+    start         -- the number of the grammar's start symbol.
+
+    keywords      -- a dict mapping keyword strings to arc labels.
+
+    tokens        -- a dict mapping token numbers to arc labels.
+
+    """
+
+    def __init__(self):
+        self.symbol2number = {}
+        self.number2symbol = {}
+        self.states = []
+        self.dfas = {}
+        self.labels = [(0, "EMPTY")]
+        self.keywords = {}
+        self.tokens = {}
+        self.symbol2label = {}
+        self.start = 256
+
+    def dump(self, filename):
+        """Dump the grammar tables to a pickle file."""
+        f = open(filename, "wb")
+        pickle.dump(self.__dict__, f, 2)
+        f.close()
+
+    def load(self, filename):
+        """Load the grammar tables from a pickle file."""
+        f = open(filename, "rb")
+        d = pickle.load(f)
+        f.close()
+        self.__dict__.update(d)
+
+    def report(self):
+        """Dump the grammar tables to standard output, for debugging."""
+        from pprint import pprint
+        print "s2n"
+        pprint(self.symbol2number)
+        print "n2s"
+        pprint(self.number2symbol)
+        print "states"
+        pprint(self.states)
+        print "dfas"
+        pprint(self.dfas)
+        print "labels"
+        pprint(self.labels)
+        print "start", self.start
+
+
+# Map from operator to number (since tokenize doesn't do this)
+
+opmap_raw = """
+( LPAR
+) RPAR
+[ LSQB
+] RSQB
+: COLON
+, COMMA
+; SEMI
+ PLUS
+- MINUS
+* STAR
+/ SLASH
+| VBAR
+& AMPER
+< LESS
+> GREATER
+= EQUAL
+. DOT
+% PERCENT
+` BACKQUOTE
+{ LBRACE
+} RBRACE
+@ AT
+== EQEQUAL
+!= NOTEQUAL
+<> NOTEQUAL
+<= LESSEQUAL
+>= GREATEREQUAL
+~ TILDE
+^ CIRCUMFLEX
+<< LEFTSHIFT
+>> RIGHTSHIFT
+** DOUBLESTAR
+= PLUSEQUAL
+-= MINEQUAL
+*= STAREQUAL
+/= SLASHEQUAL
+%= PERCENTEQUAL
+&= AMPEREQUAL
+|= VBAREQUAL
+^= CIRCUMFLEXEQUAL
+<<= LEFTSHIFTEQUAL
+>>= RIGHTSHIFTEQUAL
+**= DOUBLESTAREQUAL
+// DOUBLESLASH
+//= DOUBLESLASHEQUAL
+-> RARROW
+"""
+
+opmap = {}
+for line in opmap_raw.splitlines():
+    if line:
+        op, name = line.split()
+        opmap[op] = getattr(token, name)
--- a/sphinx/pycode/pgen2/literals.py
+++ b/sphinx/pycode/pgen2/literals.py
@ -0,0 +1,96 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Extended to handle raw and unicode literals by Georg Brandl.
+
+"""Safely evaluate Python string literals without using eval()."""
+
+import re
+
+simple_escapes = {"a": "\a",
+                  "b": "\b",
+                  "f": "\f",
+                  "n": "\n",
+                  "r": "\r",
+                  "t": "\t",
+                  "v": "\v",
+                  "'": "'",
+                  '"': '"',
+                  "\\": "\\"}
+
+def convert_hex(x, n):
+    if len(x) < n+1:
+        raise ValueError("invalid hex string escape ('\\%s')" % x)
+    try:
+        return int(x[1:], 16)
+    except ValueError:
+        raise ValueError("invalid hex string escape ('\\%s')" % x)
+
+def escape(m):
+    all, tail = m.group(0, 1)
+    assert all.startswith("\\")
+    esc = simple_escapes.get(tail)
+    if esc is not None:
+        return esc
+    elif tail.startswith("x"):
+        return chr(convert_hex(tail, 2))
+    elif tail.startswith('u'):
+        return unichr(convert_hex(tail, 4))
+    elif tail.startswith('U'):
+        return unichr(convert_hex(tail, 8))
+    elif tail.startswith('N'):
+        import unicodedata
+        try:
+            return unicodedata.lookup(tail[1:-1])
+        except KeyError:
+            raise ValueError("undefined character name %r" % tail[1:-1])
+    else:
+        try:
+            return chr(int(tail, 8))
+        except ValueError:
+            raise ValueError("invalid octal string escape ('\\%s')" % tail)
+
+def escaperaw(m):
+    all, tail = m.group(0, 1)
+    if tail.startswith('u'):
+        return unichr(convert_hex(tail, 4))
+    elif tail.startswith('U'):
+        return unichr(convert_hex(tail, 8))
+    else:
+        return all
+
+escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})")
+uni_escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3}|"
+                           r"u[0-9a-fA-F]{0,4}|U[0-9a-fA-F]{0,8}|N\{.+?\})")
+
+def evalString(s, encoding=None):
+    regex = escape_re
+    repl = escape
+    if encoding:
+        s = s.decode(encoding)
+    if s.startswith('u') or s.startswith('U'):
+        regex = uni_escape_re
+        s = s[1:]
+    if s.startswith('r') or s.startswith('R'):
+        repl = escaperaw
+        s = s[1:]
+    assert s.startswith("'") or s.startswith('"'), repr(s[:1])
+    q = s[0]
+    if s[:3] == q*3:
+        q = q*3
+    assert s.endswith(q), repr(s[-len(q):])
+    assert len(s) >= 2*len(q)
+    s = s[len(q):-len(q)]
+    return regex.sub(repl, s)
+
+def test():
+    for i in range(256):
+        c = chr(i)
+        s = repr(c)
+        e = evalString(s)
+        if e != c:
+            print i, c, s, e
+
+
+if __name__ == "__main__":
+    test()
--- a/sphinx/pycode/pgen2/parse.c
+++ b/sphinx/pycode/pgen2/parse.c
--- a/sphinx/pycode/pgen2/parse.py
+++ b/sphinx/pycode/pgen2/parse.py
@ -0,0 +1,201 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Parser engine for the grammar tables generated by pgen.
+
+The grammar table must be loaded first.
+
+See Parser/parser.c in the Python distribution for additional info on
+how this parsing engine works.
+
+"""
+
+# Local imports
+from sphinx.pycode.pgen2 import token
+
+class ParseError(Exception):
+    """Exception to signal the parser is stuck."""
+
+    def __init__(self, msg, type, value, context):
+        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
+                           (msg, type, value, context))
+        self.msg = msg
+        self.type = type
+        self.value = value
+        self.context = context
+
+class Parser(object):
+    """Parser engine.
+
+    The proper usage sequence is:
+
+    p = Parser(grammar, [converter])  # create instance
+    p.setup([start])                  # prepare for parsing
+    <for each input token>:
+        if p.addtoken(...):           # parse a token; may raise ParseError
+            break
+    root = p.rootnode                 # root of abstract syntax tree
+
+    A Parser instance may be reused by calling setup() repeatedly.
+
+    A Parser instance contains state pertaining to the current token
+    sequence, and should not be used concurrently by different threads
+    to parse separate token sequences.
+
+    See driver.py for how to get input tokens by tokenizing a file or
+    string.
+
+    Parsing is complete when addtoken() returns True; the root of the
+    abstract syntax tree can then be retrieved from the rootnode
+    instance variable.  When a syntax error occurs, addtoken() raises
+    the ParseError exception.  There is no error recovery; the parser
+    cannot be used after a syntax error was reported (but it can be
+    reinitialized by calling setup()).
+
+    """
+
+    def __init__(self, grammar, convert=None):
+        """Constructor.
+
+        The grammar argument is a grammar.Grammar instance; see the
+        grammar module for more information.
+
+        The parser is not ready yet for parsing; you must call the
+        setup() method to get it started.
+
+        The optional convert argument is a function mapping concrete
+        syntax tree nodes to abstract syntax tree nodes.  If not
+        given, no conversion is done and the syntax tree produced is
+        the concrete syntax tree.  If given, it must be a function of
+        two arguments, the first being the grammar (a grammar.Grammar
+        instance), and the second being the concrete syntax tree node
+        to be converted.  The syntax tree is converted from the bottom
+        up.
+
+        A concrete syntax tree node is a (type, value, context, nodes)
+        tuple, where type is the node type (a token or symbol number),
+        value is None for symbols and a string for tokens, context is
+        None or an opaque value used for error reporting (typically a
+        (lineno, offset) pair), and nodes is a list of children for
+        symbols, and None for tokens.
+
+        An abstract syntax tree node may be anything; this is entirely
+        up to the converter function.
+
+        """
+        self.grammar = grammar
+        self.convert = convert or (lambda grammar, node: node)
+
+    def setup(self, start=None):
+        """Prepare for parsing.
+
+        This *must* be called before starting to parse.
+
+        The optional argument is an alternative start symbol; it
+        defaults to the grammar's start symbol.
+
+        You can use a Parser instance to parse any number of programs;
+        each time you call setup() the parser is reset to an initial
+        state determined by the (implicit or explicit) start symbol.
+
+        """
+        if start is None:
+            start = self.grammar.start
+        # Each stack entry is a tuple: (dfa, state, node).
+        # A node is a tuple: (type, value, context, children),
+        # where children is a list of nodes or None, and context may be None.
+        newnode = (start, None, None, [])
+        stackentry = (self.grammar.dfas[start], 0, newnode)
+        self.stack = [stackentry]
+        self.rootnode = None
+        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
+
+    def addtoken(self, type, value, context):
+        """Add a token; return True iff this is the end of the program."""
+        # Map from token to label
+        ilabel = self.classify(type, value, context)
+        # Loop until the token is shifted; may raise exceptions
+        while True:
+            dfa, state, node = self.stack[-1]
+            states, first = dfa
+            arcs = states[state]
+            # Look for a state with this label
+            for i, newstate in arcs:
+                t, v = self.grammar.labels[i]
+                if ilabel == i:
+                    # Look it up in the list of labels
+                    assert t < 256
+                    # Shift a token; we're done with it
+                    self.shift(type, value, newstate, context)
+                    # Pop while we are in an accept-only state
+                    state = newstate
+                    while states[state] == [(0, state)]:
+                        self.pop()
+                        if not self.stack:
+                            # Done parsing!
+                            return True
+                        dfa, state, node = self.stack[-1]
+                        states, first = dfa
+                    # Done with this token
+                    return False
+                elif t >= 256:
+                    # See if it's a symbol and if we're in its first set
+                    itsdfa = self.grammar.dfas[t]
+                    itsstates, itsfirst = itsdfa
+                    if ilabel in itsfirst:
+                        # Push a symbol
+                        self.push(t, self.grammar.dfas[t], newstate, context)
+                        break # To continue the outer while loop
+            else:
+                if (0, state) in arcs:
+                    # An accepting state, pop it and try something else
+                    self.pop()
+                    if not self.stack:
+                        # Done parsing, but another token is input
+                        raise ParseError("too much input",
+                                         type, value, context)
+                else:
+                    # No success finding a transition
+                    raise ParseError("bad input", type, value, context)
+
+    def classify(self, type, value, context):
+        """Turn a token into a label.  (Internal)"""
+        if type == token.NAME:
+            # Keep a listing of all used names
+            self.used_names.add(value)
+            # Check for reserved words
+            ilabel = self.grammar.keywords.get(value)
+            if ilabel is not None:
+                return ilabel
+        ilabel = self.grammar.tokens.get(type)
+        if ilabel is None:
+            raise ParseError("bad token", type, value, context)
+        return ilabel
+
+    def shift(self, type, value, newstate, context):
+        """Shift a token.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = (type, value, context, None)
+        newnode = self.convert(self.grammar, newnode)
+        if newnode is not None:
+            node[-1].append(newnode)
+        self.stack[-1] = (dfa, newstate, node)
+
+    def push(self, type, newdfa, newstate, context):
+        """Push a nonterminal.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = (type, None, context, [])
+        self.stack[-1] = (dfa, newstate, node)
+        self.stack.append((newdfa, 0, newnode))
+
+    def pop(self):
+        """Pop a nonterminal.  (Internal)"""
+        popdfa, popstate, popnode = self.stack.pop()
+        newnode = self.convert(self.grammar, popnode)
+        if newnode is not None:
+            if self.stack:
+                dfa, state, node = self.stack[-1]
+                node[-1].append(newnode)
+            else:
+                self.rootnode = newnode
+                self.rootnode.used_names = self.used_names
--- a/sphinx/pycode/pgen2/parse.pyx
+++ b/sphinx/pycode/pgen2/parse.pyx
@ -0,0 +1,158 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Adapted from parse.py to be compiled with Cython by Georg Brandl.
+
+"""Parser engine for the grammar tables generated by pgen.
+
+The grammar table must be loaded first.
+
+See Parser/parser.c in the Python distribution for additional info on
+how this parsing engine works.
+
+"""
+
+from sphinx.pycode.nodes import Node, Leaf
+
+DEF NAME = 1
+
+class ParseError(Exception):
+    """Exception to signal the parser is stuck."""
+
+    def __init__(self, msg, type, value, context):
+        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
+                           (msg, type, value, context))
+        self.msg = msg
+        self.type = type
+        self.value = value
+        self.context = context
+
+
+cdef class Parser:
+    cdef public grammar, stack, rootnode, used_names
+    cdef _grammar_dfas, _grammar_labels, _grammar_keywords, _grammar_tokens
+    cdef _grammar_number2symbol
+
+    def __init__(self, grammar, convert=None):
+        self.grammar = grammar
+        #self.convert = convert or noconvert
+
+        self._grammar_dfas = grammar.dfas
+        self._grammar_labels = grammar.labels
+        self._grammar_keywords = grammar.keywords
+        self._grammar_tokens = grammar.tokens
+        self._grammar_number2symbol = grammar.number2symbol
+
+    def setup(self, start=None):
+        if start is None:
+            start = self.grammar.start
+        # Each stack entry is a tuple: (dfa, state, node).
+        # A node is a tuple: (type, value, context, children),
+        # where children is a list of nodes or None, and context may be None.
+        newnode = (start, None, None, [])
+        stackentry = (self._grammar_dfas[start], 0, newnode)
+        self.stack = [stackentry]
+        self.rootnode = None
+        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
+
+    def addtoken(self, type, value, context):
+        """Add a token; return True iff this is the end of the program."""
+        cdef int ilabel, i, t, state, newstate
+        # Map from token to label
+        ilabel = self.classify(type, value, context)
+        # Loop until the token is shifted; may raise exceptions
+        while True:
+            dfa, state, node = self.stack[-1]
+            states, first = dfa
+            arcs = states[state]
+            # Look for a state with this label
+            for i, newstate in arcs:
+                t, v = self._grammar_labels[i]
+                if ilabel == i:
+                    # Look it up in the list of labels
+                    ## assert t < 256
+                    # Shift a token; we're done with it
+                    self.shift(type, value, newstate, context)
+                    # Pop while we are in an accept-only state
+                    state = newstate
+                    while states[state] == [(0, state)]:
+                        self.pop()
+                        if not self.stack:
+                            # Done parsing!
+                            return True
+                        dfa, state, node = self.stack[-1]
+                        states, first = dfa
+                    # Done with this token
+                    return False
+                elif t >= 256:
+                    # See if it's a symbol and if we're in its first set
+                    itsdfa = self._grammar_dfas[t]
+                    itsstates, itsfirst = itsdfa
+                    if ilabel in itsfirst:
+                        # Push a symbol
+                        self.push(t, itsdfa, newstate, context)
+                        break # To continue the outer while loop
+            else:
+                if (0, state) in arcs:
+                    # An accepting state, pop it and try something else
+                    self.pop()
+                    if not self.stack:
+                        # Done parsing, but another token is input
+                        raise ParseError("too much input",
+                                         type, value, context)
+                else:
+                    # No success finding a transition
+                    raise ParseError("bad input", type, value, context)
+
+    cdef int classify(self, type, value, context):
+        """Turn a token into a label.  (Internal)"""
+        if type == NAME:
+            # Keep a listing of all used names
+            self.used_names.add(value)
+            # Check for reserved words
+            ilabel = self._grammar_keywords.get(value)
+            if ilabel is not None:
+                return ilabel
+        ilabel = self._grammar_tokens.get(type)
+        if ilabel is None:
+            raise ParseError("bad token", type, value, context)
+        return ilabel
+
+    cdef void shift(self, type, value, newstate, context):
+        """Shift a token.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = (type, value, context, None)
+        newnode = self.convert(newnode)
+        if newnode is not None:
+            node[-1].append(newnode)
+        self.stack[-1] = (dfa, newstate, node)
+
+    cdef void push(self, type, newdfa, newstate, context):
+        """Push a nonterminal.  (Internal)"""
+        dfa, state, node = self.stack[-1]
+        newnode = (type, None, context, [])
+        self.stack[-1] = (dfa, newstate, node)
+        self.stack.append((newdfa, 0, newnode))
+
+    cdef void pop(self):
+        """Pop a nonterminal.  (Internal)"""
+        popdfa, popstate, popnode = self.stack.pop()
+        newnode = self.convert(popnode)
+        if newnode is not None:
+            if self.stack:
+                dfa, state, node = self.stack[-1]
+                node[-1].append(newnode)
+            else:
+                self.rootnode = newnode
+                self.rootnode.used_names = self.used_names
+
+    cdef convert(self, raw_node):
+        type, value, context, children = raw_node
+        if children or type in self._grammar_number2symbol:
+            # If there's exactly one child, return that child instead of
+            # creating a new node.
+            if len(children) == 1:
+                return children[0]
+            return Node(type, children, context=context)
+        else:
+            return Leaf(type, value, context=context)
--- a/sphinx/pycode/pgen2/pgen.py
+++ b/sphinx/pycode/pgen2/pgen.py
@ -0,0 +1,384 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Pgen imports
+from sphinx.pycode.pgen2 import grammar, token, tokenize
+
+class PgenGrammar(grammar.Grammar):
+    pass
+
+class ParserGenerator(object):
+
+    def __init__(self, filename, stream=None):
+        close_stream = None
+        if stream is None:
+            stream = open(filename)
+            close_stream = stream.close
+        self.filename = filename
+        self.stream = stream
+        self.generator = tokenize.generate_tokens(stream.readline)
+        self.gettoken() # Initialize lookahead
+        self.dfas, self.startsymbol = self.parse()
+        if close_stream is not None:
+            close_stream()
+        self.first = {} # map from symbol name to set of tokens
+        self.addfirstsets()
+
+    def make_grammar(self):
+        c = PgenGrammar()
+        names = self.dfas.keys()
+        names.sort()
+        names.remove(self.startsymbol)
+        names.insert(0, self.startsymbol)
+        for name in names:
+            i = 256 + len(c.symbol2number)
+            c.symbol2number[name] = i
+            c.number2symbol[i] = name
+        for name in names:
+            dfa = self.dfas[name]
+            states = []
+            for state in dfa:
+                arcs = []
+                for label, next in state.arcs.iteritems():
+                    arcs.append((self.make_label(c, label), dfa.index(next)))
+                if state.isfinal:
+                    arcs.append((0, dfa.index(state)))
+                states.append(arcs)
+            c.states.append(states)
+            c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
+        c.start = c.symbol2number[self.startsymbol]
+        return c
+
+    def make_first(self, c, name):
+        rawfirst = self.first[name]
+        first = {}
+        for label in rawfirst:
+            ilabel = self.make_label(c, label)
+            ##assert ilabel not in first # XXX failed on <> ... !=
+            first[ilabel] = 1
+        return first
+
+    def make_label(self, c, label):
+        # XXX Maybe this should be a method on a subclass of converter?
+        ilabel = len(c.labels)
+        if label[0].isalpha():
+            # Either a symbol name or a named token
+            if label in c.symbol2number:
+                # A symbol name (a non-terminal)
+                if label in c.symbol2label:
+                    return c.symbol2label[label]
+                else:
+                    c.labels.append((c.symbol2number[label], None))
+                    c.symbol2label[label] = ilabel
+                    return ilabel
+            else:
+                # A named token (NAME, NUMBER, STRING)
+                itoken = getattr(token, label, None)
+                assert isinstance(itoken, int), label
+                assert itoken in token.tok_name, label
+                if itoken in c.tokens:
+                    return c.tokens[itoken]
+                else:
+                    c.labels.append((itoken, None))
+                    c.tokens[itoken] = ilabel
+                    return ilabel
+        else:
+            # Either a keyword or an operator
+            assert label[0] in ('"', "'"), label
+            value = eval(label)
+            if value[0].isalpha():
+                # A keyword
+                if value in c.keywords:
+                    return c.keywords[value]
+                else:
+                    c.labels.append((token.NAME, value))
+                    c.keywords[value] = ilabel
+                    return ilabel
+            else:
+                # An operator (any non-numeric token)
+                itoken = grammar.opmap[value] # Fails if unknown token
+                if itoken in c.tokens:
+                    return c.tokens[itoken]
+                else:
+                    c.labels.append((itoken, None))
+                    c.tokens[itoken] = ilabel
+                    return ilabel
+
+    def addfirstsets(self):
+        names = self.dfas.keys()
+        names.sort()
+        for name in names:
+            if name not in self.first:
+                self.calcfirst(name)
+            #print name, self.first[name].keys()
+
+    def calcfirst(self, name):
+        dfa = self.dfas[name]
+        self.first[name] = None # dummy to detect left recursion
+        state = dfa[0]
+        totalset = {}
+        overlapcheck = {}
+        for label, next in state.arcs.iteritems():
+            if label in self.dfas:
+                if label in self.first:
+                    fset = self.first[label]
+                    if fset is None:
+                        raise ValueError("recursion for rule %r" % name)
+                else:
+                    self.calcfirst(label)
+                    fset = self.first[label]
+                totalset.update(fset)
+                overlapcheck[label] = fset
+            else:
+                totalset[label] = 1
+                overlapcheck[label] = {label: 1}
+        inverse = {}
+        for label, itsfirst in overlapcheck.iteritems():
+            for symbol in itsfirst:
+                if symbol in inverse:
+                    raise ValueError("rule %s is ambiguous; %s is in the"
+                                     " first sets of %s as well as %s" %
+                                     (name, symbol, label, inverse[symbol]))
+                inverse[symbol] = label
+        self.first[name] = totalset
+
+    def parse(self):
+        dfas = {}
+        startsymbol = None
+        # MSTART: (NEWLINE | RULE)* ENDMARKER
+        while self.type != token.ENDMARKER:
+            while self.type == token.NEWLINE:
+                self.gettoken()
+            # RULE: NAME ':' RHS NEWLINE
+            name = self.expect(token.NAME)
+            self.expect(token.OP, ":")
+            a, z = self.parse_rhs()
+            self.expect(token.NEWLINE)
+            #self.dump_nfa(name, a, z)
+            dfa = self.make_dfa(a, z)
+            #self.dump_dfa(name, dfa)
+            oldlen = len(dfa)
+            self.simplify_dfa(dfa)
+            newlen = len(dfa)
+            dfas[name] = dfa
+            #print name, oldlen, newlen
+            if startsymbol is None:
+                startsymbol = name
+        return dfas, startsymbol
+
+    def make_dfa(self, start, finish):
+        # To turn an NFA into a DFA, we define the states of the DFA
+        # to correspond to *sets* of states of the NFA.  Then do some
+        # state reduction.  Let's represent sets as dicts with 1 for
+        # values.
+        assert isinstance(start, NFAState)
+        assert isinstance(finish, NFAState)
+        def closure(state):
+            base = {}
+            addclosure(state, base)
+            return base
+        def addclosure(state, base):
+            assert isinstance(state, NFAState)
+            if state in base:
+                return
+            base[state] = 1
+            for label, next in state.arcs:
+                if label is None:
+                    addclosure(next, base)
+        states = [DFAState(closure(start), finish)]
+        for state in states: # NB states grows while we're iterating
+            arcs = {}
+            for nfastate in state.nfaset:
+                for label, next in nfastate.arcs:
+                    if label is not None:
+                        addclosure(next, arcs.setdefault(label, {}))
+            for label, nfaset in arcs.iteritems():
+                for st in states:
+                    if st.nfaset == nfaset:
+                        break
+                else:
+                    st = DFAState(nfaset, finish)
+                    states.append(st)
+                state.addarc(st, label)
+        return states # List of DFAState instances; first one is start
+
+    def dump_nfa(self, name, start, finish):
+        print "Dump of NFA for", name
+        todo = [start]
+        for i, state in enumerate(todo):
+            print "  State", i, state is finish and "(final)" or ""
+            for label, next in state.arcs:
+                if next in todo:
+                    j = todo.index(next)
+                else:
+                    j = len(todo)
+                    todo.append(next)
+                if label is None:
+                    print "    -> %d" % j
+                else:
+                    print "    %s -> %d" % (label, j)
+
+    def dump_dfa(self, name, dfa):
+        print "Dump of DFA for", name
+        for i, state in enumerate(dfa):
+            print "  State", i, state.isfinal and "(final)" or ""
+            for label, next in state.arcs.iteritems():
+                print "    %s -> %d" % (label, dfa.index(next))
+
+    def simplify_dfa(self, dfa):
+        # This is not theoretically optimal, but works well enough.
+        # Algorithm: repeatedly look for two states that have the same
+        # set of arcs (same labels pointing to the same nodes) and
+        # unify them, until things stop changing.
+
+        # dfa is a list of DFAState instances
+        changes = True
+        while changes:
+            changes = False
+            for i, state_i in enumerate(dfa):
+                for j in range(i+1, len(dfa)):
+                    state_j = dfa[j]
+                    if state_i == state_j:
+                        #print "  unify", i, j
+                        del dfa[j]
+                        for state in dfa:
+                            state.unifystate(state_j, state_i)
+                        changes = True
+                        break
+
+    def parse_rhs(self):
+        # RHS: ALT ('|' ALT)*
+        a, z = self.parse_alt()
+        if self.value != "|":
+            return a, z
+        else:
+            aa = NFAState()
+            zz = NFAState()
+            aa.addarc(a)
+            z.addarc(zz)
+            while self.value == "|":
+                self.gettoken()
+                a, z = self.parse_alt()
+                aa.addarc(a)
+                z.addarc(zz)
+            return aa, zz
+
+    def parse_alt(self):
+        # ALT: ITEM+
+        a, b = self.parse_item()
+        while (self.value in ("(", "[") or
+               self.type in (token.NAME, token.STRING)):
+            c, d = self.parse_item()
+            b.addarc(c)
+            b = d
+        return a, b
+
+    def parse_item(self):
+        # ITEM: '[' RHS ']' | ATOM ['+' | '*']
+        if self.value == "[":
+            self.gettoken()
+            a, z = self.parse_rhs()
+            self.expect(token.OP, "]")
+            a.addarc(z)
+            return a, z
+        else:
+            a, z = self.parse_atom()
+            value = self.value
+            if value not in ("+", "*"):
+                return a, z
+            self.gettoken()
+            z.addarc(a)
+            if value == "+":
+                return a, z
+            else:
+                return a, a
+
+    def parse_atom(self):
+        # ATOM: '(' RHS ')' | NAME | STRING
+        if self.value == "(":
+            self.gettoken()
+            a, z = self.parse_rhs()
+            self.expect(token.OP, ")")
+            return a, z
+        elif self.type in (token.NAME, token.STRING):
+            a = NFAState()
+            z = NFAState()
+            a.addarc(z, self.value)
+            self.gettoken()
+            return a, z
+        else:
+            self.raise_error("expected (...) or NAME or STRING, got %s/%s",
+                             self.type, self.value)
+
+    def expect(self, type, value=None):
+        if self.type != type or (value is not None and self.value != value):
+            self.raise_error("expected %s/%s, got %s/%s",
+                             type, value, self.type, self.value)
+        value = self.value
+        self.gettoken()
+        return value
+
+    def gettoken(self):
+        tup = self.generator.next()
+        while tup[0] in (tokenize.COMMENT, tokenize.NL):
+            tup = self.generator.next()
+        self.type, self.value, self.begin, self.end, self.line = tup
+        #print token.tok_name[self.type], repr(self.value)
+
+    def raise_error(self, msg, *args):
+        if args:
+            try:
+                msg = msg % args
+            except:
+                msg = " ".join([msg] + map(str, args))
+        raise SyntaxError(msg, (self.filename, self.end[0],
+                                self.end[1], self.line))
+
+class NFAState(object):
+
+    def __init__(self):
+        self.arcs = [] # list of (label, NFAState) pairs
+
+    def addarc(self, next, label=None):
+        assert label is None or isinstance(label, str)
+        assert isinstance(next, NFAState)
+        self.arcs.append((label, next))
+
+class DFAState(object):
+
+    def __init__(self, nfaset, final):
+        assert isinstance(nfaset, dict)
+        assert isinstance(iter(nfaset).next(), NFAState)
+        assert isinstance(final, NFAState)
+        self.nfaset = nfaset
+        self.isfinal = final in nfaset
+        self.arcs = {} # map from label to DFAState
+
+    def addarc(self, next, label):
+        assert isinstance(label, str)
+        assert label not in self.arcs
+        assert isinstance(next, DFAState)
+        self.arcs[label] = next
+
+    def unifystate(self, old, new):
+        for label, next in self.arcs.iteritems():
+            if next is old:
+                self.arcs[label] = new
+
+    def __eq__(self, other):
+        # Equality test -- ignore the nfaset instance variable
+        assert isinstance(other, DFAState)
+        if self.isfinal != other.isfinal:
+            return False
+        # Can't just return self.arcs == other.arcs, because that
+        # would invoke this method recursively, with cycles...
+        if len(self.arcs) != len(other.arcs):
+            return False
+        for label, next in self.arcs.iteritems():
+            if next is not other.arcs.get(label):
+                return False
+        return True
+
+def generate_grammar(filename="Grammar.txt"):
+    p = ParserGenerator(filename)
+    return p.make_grammar()
--- a/sphinx/pycode/pgen2/token.py
+++ b/sphinx/pycode/pgen2/token.py
@ -0,0 +1,82 @@
+#! /usr/bin/env python
+
+"""Token constants (from "token.h")."""
+
+#  Taken from Python (r53757) and modified to include some tokens
+#   originally monkeypatched in by pgen2.tokenize
+
+#--start constants--
+ENDMARKER = 0
+NAME = 1
+NUMBER = 2
+STRING = 3
+NEWLINE = 4
+INDENT = 5
+DEDENT = 6
+LPAR = 7
+RPAR = 8
+LSQB = 9
+RSQB = 10
+COLON = 11
+COMMA = 12
+SEMI = 13
+PLUS = 14
+MINUS = 15
+STAR = 16
+SLASH = 17
+VBAR = 18
+AMPER = 19
+LESS = 20
+GREATER = 21
+EQUAL = 22
+DOT = 23
+PERCENT = 24
+BACKQUOTE = 25
+LBRACE = 26
+RBRACE = 27
+EQEQUAL = 28
+NOTEQUAL = 29
+LESSEQUAL = 30
+GREATEREQUAL = 31
+TILDE = 32
+CIRCUMFLEX = 33
+LEFTSHIFT = 34
+RIGHTSHIFT = 35
+DOUBLESTAR = 36
+PLUSEQUAL = 37
+MINEQUAL = 38
+STAREQUAL = 39
+SLASHEQUAL = 40
+PERCENTEQUAL = 41
+AMPEREQUAL = 42
+VBAREQUAL = 43
+CIRCUMFLEXEQUAL = 44
+LEFTSHIFTEQUAL = 45
+RIGHTSHIFTEQUAL = 46
+DOUBLESTAREQUAL = 47
+DOUBLESLASH = 48
+DOUBLESLASHEQUAL = 49
+AT = 50
+OP = 51
+COMMENT = 52
+NL = 53
+RARROW = 54
+ERRORTOKEN = 55
+N_TOKENS = 56
+NT_OFFSET = 256
+#--end constants--
+
+tok_name = {}
+for _name, _value in globals().items():
+    if type(_value) is type(0):
+        tok_name[_value] = _name
+
+
+def ISTERMINAL(x):
+    return x < NT_OFFSET
+
+def ISNONTERMINAL(x):
+    return x >= NT_OFFSET
+
+def ISEOF(x):
+    return x == ENDMARKER
--- a/sphinx/pycode/pgen2/tokenize.py
+++ b/sphinx/pycode/pgen2/tokenize.py
@ -0,0 +1,405 @@
+# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
+# All rights reserved.
+
+"""Tokenization help for Python programs.
+
+generate_tokens(readline) is a generator that breaks a stream of
+text into Python tokens.  It accepts a readline-like method which is called
+repeatedly to get the next line of input (or "" for EOF).  It generates
+5-tuples with these members:
+
+    the token type (see token.py)
+    the token (a string)
+    the starting (row, column) indices of the token (a 2-tuple of ints)
+    the ending (row, column) indices of the token (a 2-tuple of ints)
+    the original line (string)
+
+It is designed to match the working of the Python tokenizer exactly, except
+that it produces COMMENT tokens for comments and gives type OP for all
+operators
+
+Older entry points
+    tokenize_loop(readline, tokeneater)
+    tokenize(readline, tokeneater=printtoken)
+are the same, except instead of generating tokens, tokeneater is a callback
+function to which the 5 fields described above are passed as 5 arguments,
+each time a new token is found."""
+
+__author__ = 'Ka-Ping Yee <ping@lfw.org>'
+__credits__ = \
+    'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
+
+import string, re
+from sphinx.pycode.pgen2.token import *
+from sphinx.pycode.pgen2 import token
+
+__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
+           "generate_tokens", "untokenize"]
+del token
+
+def group(*choices): return '(' + '|'.join(choices) + ')'
+def any(*choices): return group(*choices) + '*'
+def maybe(*choices): return group(*choices) + '?'
+
+Whitespace = r'[ \f\t]*'
+Comment = r'#[^\r\n]*'
+Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
+Name = r'[a-zA-Z_]\w*'
+
+Binnumber = r'0[bB][01]*'
+Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
+Octnumber = r'0[oO]?[0-7]*[lL]?'
+Decnumber = r'[1-9]\d*[lL]?'
+Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
+Exponent = r'[eE][-+]?\d+'
+Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
+Expfloat = r'\d+' + Exponent
+Floatnumber = group(Pointfloat, Expfloat)
+Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
+Number = group(Imagnumber, Floatnumber, Intnumber)
+
+# Tail end of ' string.
+Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
+# Tail end of " string.
+Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
+# Tail end of ''' string.
+Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
+# Tail end of """ string.
+Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
+Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""')
+# Single-line ' or " string.
+String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
+               r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
+
+# Because of leftmost-then-longest match semantics, be sure to put the
+# longest operators first (e.g., if = came before ==, == would get
+# recognized as two instances of =).
+Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
+                 r"//=?", r"->",
+                 r"[+\-*/%&|^=<>]=?",
+                 r"~")
+
+Bracket = '[][(){}]'
+Special = group(r'\r?\n', r'[:;.,`@]')
+Funny = group(Operator, Bracket, Special)
+
+PlainToken = group(Number, Funny, String, Name)
+Token = Ignore + PlainToken
+
+# First (or only) line of ' or " string.
+ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+                group("'", r'\\\r?\n'),
+                r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+                group('"', r'\\\r?\n'))
+PseudoExtras = group(r'\\\r?\n', Comment, Triple)
+PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
+
+tokenprog, pseudoprog, single3prog, double3prog = map(
+    re.compile, (Token, PseudoToken, Single3, Double3))
+endprogs = {"'": re.compile(Single), '"': re.compile(Double),
+            "'''": single3prog, '"""': double3prog,
+            "r'''": single3prog, 'r"""': double3prog,
+            "u'''": single3prog, 'u"""': double3prog,
+            "b'''": single3prog, 'b"""': double3prog,
+            "ur'''": single3prog, 'ur"""': double3prog,
+            "br'''": single3prog, 'br"""': double3prog,
+            "R'''": single3prog, 'R"""': double3prog,
+            "U'''": single3prog, 'U"""': double3prog,
+            "B'''": single3prog, 'B"""': double3prog,
+            "uR'''": single3prog, 'uR"""': double3prog,
+            "Ur'''": single3prog, 'Ur"""': double3prog,
+            "UR'''": single3prog, 'UR"""': double3prog,
+            "bR'''": single3prog, 'bR"""': double3prog,
+            "Br'''": single3prog, 'Br"""': double3prog,
+            "BR'''": single3prog, 'BR"""': double3prog,
+            'r': None, 'R': None,
+            'u': None, 'U': None,
+            'b': None, 'B': None}
+
+triple_quoted = {}
+for t in ("'''", '"""',
+          "r'''", 'r"""', "R'''", 'R"""',
+          "u'''", 'u"""', "U'''", 'U"""',
+          "b'''", 'b"""', "B'''", 'B"""',
+          "ur'''", 'ur"""', "Ur'''", 'Ur"""',
+          "uR'''", 'uR"""', "UR'''", 'UR"""',
+          "br'''", 'br"""', "Br'''", 'Br"""',
+          "bR'''", 'bR"""', "BR'''", 'BR"""',):
+    triple_quoted[t] = t
+single_quoted = {}
+for t in ("'", '"',
+          "r'", 'r"', "R'", 'R"',
+          "u'", 'u"', "U'", 'U"',
+          "b'", 'b"', "B'", 'B"',
+          "ur'", 'ur"', "Ur'", 'Ur"',
+          "uR'", 'uR"', "UR'", 'UR"',
+          "br'", 'br"', "Br'", 'Br"',
+          "bR'", 'bR"', "BR'", 'BR"', ):
+    single_quoted[t] = t
+
+tabsize = 8
+
+class TokenError(Exception): pass
+
+class StopTokenizing(Exception): pass
+
+def printtoken(type, token, (srow, scol), (erow, ecol), line): # for testing
+    print "%d,%d-%d,%d:\t%s\t%s" % \
+        (srow, scol, erow, ecol, tok_name[type], repr(token))
+
+def tokenize(readline, tokeneater=printtoken):
+    """
+    The tokenize() function accepts two parameters: one representing the
+    input stream, and one providing an output mechanism for tokenize().
+
+    The first parameter, readline, must be a callable object which provides
+    the same interface as the readline() method of built-in file objects.
+    Each call to the function should return one line of input as a string.
+
+    The second parameter, tokeneater, must also be a callable object. It is
+    called once for each token, with five arguments, corresponding to the
+    tuples generated by generate_tokens().
+    """
+    try:
+        tokenize_loop(readline, tokeneater)
+    except StopTokenizing:
+        pass
+
+# backwards compatible interface
+def tokenize_loop(readline, tokeneater):
+    for token_info in generate_tokens(readline):
+        tokeneater(*token_info)
+
+class Untokenizer:
+
+    def __init__(self):
+        self.tokens = []
+        self.prev_row = 1
+        self.prev_col = 0
+
+    def add_whitespace(self, start):
+        row, col = start
+        assert row <= self.prev_row
+        col_offset = col - self.prev_col
+        if col_offset:
+            self.tokens.append(" " * col_offset)
+
+    def untokenize(self, iterable):
+        for t in iterable:
+            if len(t) == 2:
+                self.compat(t, iterable)
+                break
+            tok_type, token, start, end, line = t
+            self.add_whitespace(start)
+            self.tokens.append(token)
+            self.prev_row, self.prev_col = end
+            if tok_type in (NEWLINE, NL):
+                self.prev_row += 1
+                self.prev_col = 0
+        return "".join(self.tokens)
+
+    def compat(self, token, iterable):
+        startline = False
+        indents = []
+        toks_append = self.tokens.append
+        toknum, tokval = token
+        if toknum in (NAME, NUMBER):
+            tokval += ' '
+        if toknum in (NEWLINE, NL):
+            startline = True
+        for tok in iterable:
+            toknum, tokval = tok[:2]
+
+            if toknum in (NAME, NUMBER):
+                tokval += ' '
+
+            if toknum == INDENT:
+                indents.append(tokval)
+                continue
+            elif toknum == DEDENT:
+                indents.pop()
+                continue
+            elif toknum in (NEWLINE, NL):
+                startline = True
+            elif startline and indents:
+                toks_append(indents[-1])
+                startline = False
+            toks_append(tokval)
+
+def untokenize(iterable):
+    """Transform tokens back into Python source code.
+
+    Each element returned by the iterable must be a token sequence
+    with at least two elements, a token number and token value.  If
+    only two tokens are passed, the resulting output is poor.
+
+    Round-trip invariant for full input:
+        Untokenized source will match input source exactly
+
+    Round-trip invariant for limited intput:
+        # Output text will tokenize the back to the input
+        t1 = [tok[:2] for tok in generate_tokens(f.readline)]
+        newcode = untokenize(t1)
+        readline = iter(newcode.splitlines(1)).next
+        t2 = [tok[:2] for tokin generate_tokens(readline)]
+        assert t1 == t2
+    """
+    ut = Untokenizer()
+    return ut.untokenize(iterable)
+
+def generate_tokens(readline):
+    """
+    The generate_tokens() generator requires one argment, readline, which
+    must be a callable object which provides the same interface as the
+    readline() method of built-in file objects. Each call to the function
+    should return one line of input as a string.  Alternately, readline
+    can be a callable function terminating with StopIteration:
+        readline = open(myfile).next    # Example of alternate readline
+
+    The generator produces 5-tuples with these members: the token type; the
+    token string; a 2-tuple (srow, scol) of ints specifying the row and
+    column where the token begins in the source; a 2-tuple (erow, ecol) of
+    ints specifying the row and column where the token ends in the source;
+    and the line on which the token was found. The line passed is the
+    logical line; continuation lines are included.
+    """
+    lnum = parenlev = continued = 0
+    namechars, numchars = string.ascii_letters + '_', '0123456789'
+    contstr, needcont = '', 0
+    contline = None
+    indents = [0]
+
+    while 1:                                   # loop over lines in stream
+        try:
+            line = readline()
+        except StopIteration:
+            line = ''
+        lnum = lnum + 1
+        pos, max = 0, len(line)
+
+        if contstr:                            # continued string
+            if not line:
+                raise TokenError, ("EOF in multi-line string", strstart)
+            endmatch = endprog.match(line)
+            if endmatch:
+                pos = end = endmatch.end(0)
+                yield (STRING, contstr + line[:end],
+                       strstart, (lnum, end), contline + line)
+                contstr, needcont = '', 0
+                contline = None
+            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
+                yield (ERRORTOKEN, contstr + line,
+                           strstart, (lnum, len(line)), contline)
+                contstr = ''
+                contline = None
+                continue
+            else:
+                contstr = contstr + line
+                contline = contline + line
+                continue
+
+        elif parenlev == 0 and not continued:  # new statement
+            if not line: break
+            column = 0
+            while pos < max:                   # measure leading whitespace
+                if line[pos] == ' ': column = column + 1
+                elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
+                elif line[pos] == '\f': column = 0
+                else: break
+                pos = pos + 1
+            if pos == max: break
+
+            if line[pos] in '#\r\n':           # skip comments or blank lines
+                if line[pos] == '#':
+                    comment_token = line[pos:].rstrip('\r\n')
+                    nl_pos = pos + len(comment_token)
+                    yield (COMMENT, comment_token,
+                           (lnum, pos), (lnum, pos + len(comment_token)), line)
+                    yield (NL, line[nl_pos:],
+                           (lnum, nl_pos), (lnum, len(line)), line)
+                else:
+                    yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
+                           (lnum, pos), (lnum, len(line)), line)
+                continue
+
+            if column > indents[-1]:           # count indents or dedents
+                indents.append(column)
+                yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
+            while column < indents[-1]:
+                if column not in indents:
+                    raise IndentationError(
+                        "unindent does not match any outer indentation level",
+                        ("<tokenize>", lnum, pos, line))
+                indents = indents[:-1]
+                yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
+
+        else:                                  # continued statement
+            if not line:
+                raise TokenError, ("EOF in multi-line statement", (lnum, 0))
+            continued = 0
+
+        while pos < max:
+            pseudomatch = pseudoprog.match(line, pos)
+            if pseudomatch:                                # scan for tokens
+                start, end = pseudomatch.span(1)
+                spos, epos, pos = (lnum, start), (lnum, end), end
+                token, initial = line[start:end], line[start]
+
+                if initial in numchars or \
+                   (initial == '.' and token != '.'):      # ordinary number
+                    yield (NUMBER, token, spos, epos, line)
+                elif initial in '\r\n':
+                    newline = NEWLINE
+                    if parenlev > 0:
+                        newline = NL
+                    yield (newline, token, spos, epos, line)
+                elif initial == '#':
+                    assert not token.endswith("\n")
+                    yield (COMMENT, token, spos, epos, line)
+                elif token in triple_quoted:
+                    endprog = endprogs[token]
+                    endmatch = endprog.match(line, pos)
+                    if endmatch:                           # all on one line
+                        pos = endmatch.end(0)
+                        token = line[start:pos]
+                        yield (STRING, token, spos, (lnum, pos), line)
+                    else:
+                        strstart = (lnum, start)           # multiple lines
+                        contstr = line[start:]
+                        contline = line
+                        break
+                elif initial in single_quoted or \
+                    token[:2] in single_quoted or \
+                    token[:3] in single_quoted:
+                    if token[-1] == '\n':                  # continued string
+                        strstart = (lnum, start)
+                        endprog = (endprogs[initial] or endprogs[token[1]] or
+                                   endprogs[token[2]])
+                        contstr, needcont = line[start:], 1
+                        contline = line
+                        break
+                    else:                                  # ordinary string
+                        yield (STRING, token, spos, epos, line)
+                elif initial in namechars:                 # ordinary name
+                    yield (NAME, token, spos, epos, line)
+                elif initial == '\\':                      # continued stmt
+                    # This yield is new; needed for better idempotency:
+                    yield (NL, token, spos, (lnum, pos), line)
+                    continued = 1
+                else:
+                    if initial in '([{': parenlev = parenlev + 1
+                    elif initial in ')]}': parenlev = parenlev - 1
+                    yield (OP, token, spos, epos, line)
+            else:
+                yield (ERRORTOKEN, line[pos],
+                           (lnum, pos), (lnum, pos+1), line)
+                pos = pos + 1
+
+    for indent in indents[1:]:                 # pop remaining indent levels
+        yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
+    yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
+
+if __name__ == '__main__':                     # testing
+    import sys
+    if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
+    else: tokenize(sys.stdin.readline)
--- a/sphinx/roles.py
+++ b/sphinx/roles.py
@ -24,7 +24,7 @@ generic_docroles = {
    'guilabel' : nodes.strong,
    'kbd' : nodes.literal,
    'mailheader' : addnodes.literal_emphasis,
-    'makevar' : nodes.Text,
+    'makevar' : nodes.strong,
    'manpage' : addnodes.literal_emphasis,
    'mimetype' : addnodes.literal_emphasis,
    'newsgroup' : addnodes.literal_emphasis,
--- a/sphinx/util/init.py
+++ b/sphinx/util/init.py
@ -324,3 +324,40 @@ class FilenameUniqDict(dict):

    def __setstate__(self, state):
        self._existing = state
+
+
+def parselinenos(spec, total):
+    """
+    Parse a line number spec (such as "1,2,4-6") and return a list of
+    wanted line numbers.
+    """
+    items = list()
+    parts = spec.split(',')
+    for part in parts:
+        try:
+            begend = part.strip().split('-')
+            if len(begend) > 2:
+                raise ValueError
+            if len(begend) == 1:
+                items.append(int(begend[0])-1)
+            else:
+                start = (begend[0] == '') and 0 or int(begend[0])-1
+                end = (begend[1] == '') and total or int(begend[1])
+                items.extend(xrange(start, end))
+        except Exception, err:
+            raise ValueError('invalid line number spec: %r' % spec)
+    return items
+
+
+def force_decode(string, encoding):
+    if isinstance(string, str):
+        if encoding:
+            string = string.decode(encoding)
+        else:
+            try:
+                # try decoding with utf-8, should only work for real UTF-8
+                string = string.decode('utf-8')
+            except UnicodeError:
+                # last resort -- can't fail
+                string = string.decode('latin1')
+    return string
--- a/sphinx/util/docstrings.py
+++ b/sphinx/util/docstrings.py
@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+"""
+    sphinx.util.docstrings
+    ~~~~~~~~~~~~~~~~~~~~~~
+
+    Utilities for docstring processing.
+
+    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import sys
+
+
+def prepare_docstring(s):
+    """
+    Convert a docstring into lines of parseable reST.  Return it as a list of
+    lines usable for inserting into a docutils ViewList (used as argument
+    of nested_parse().)  An empty line is added to act as a separator between
+    this docstring and following content.
+    """
+    lines = s.expandtabs().splitlines()
+    # Find minimum indentation of any non-blank lines after first line.
+    margin = sys.maxint
+    for line in lines[1:]:
+        content = len(line.lstrip())
+        if content:
+            indent = len(line) - content
+            margin = min(margin, indent)
+    # Remove indentation.
+    if lines:
+        lines[0] = lines[0].lstrip()
+    if margin < sys.maxint:
+        for i in range(1, len(lines)): lines[i] = lines[i][margin:]
+    # Remove any leading blank lines.
+    while lines and not lines[0]:
+        lines.pop(0)
+    # make sure there is an empty line at the end
+    if lines and lines[-1]:
+        lines.append('')
+    return lines
+
+
+def prepare_commentdoc(s):
+    """
+    Extract documentation comment lines (starting with #:) and return them as a
+    list of lines.  Returns an empty list if there is no documentation.
+    """
+    result = []
+    lines = [line.strip() for line in s.expandtabs().splitlines()]
+    for line in lines:
+        if line.startswith('#: '):
+            result.append(line[3:])
+    if result and result[-1]:
+        result.append('')
+    return result
--- a/sphinx/util/jsdump.py
+++ b/sphinx/util/jsdump.py
@ -6,7 +6,7 @@
    This module implements a simple JavaScript serializer.
    Uses the basestring encode function from simplejson by Bob Ippolito.

-    :copyright: Copyright 2008 by the Sphinx team, see AUTHORS.
+    :copyright: Copyright 2007-2009 by the Sphinx team, see AUTHORS.
    :license: BSD, see LICENSE for details.
 """

--- a/tests/root/includes.txt
+++ b/tests/root/includes.txt
@ -15,6 +15,28 @@ Test file and literal inclusion
 .. include:: wrongenc.inc
   :encoding: latin-1

+Literalinclude options
+======================
+
+.. highlight:: text
+
+.. cssclass:: inc-pyobj1
+.. literalinclude:: literal.inc
+   :pyobject: Foo
+
+.. cssclass:: inc-pyobj2
+.. literalinclude:: literal.inc
+   :pyobject: Bar.baz
+
+.. cssclass:: inc-lines
+.. literalinclude:: literal.inc
+   :lines: 6-7,9
+
+.. cssclass:: inc-startend
+.. literalinclude:: literal.inc
+   :start-after: coding: utf-8
+   :end-before: class Foo
+

 Testing downloadable files
 ==========================
--- a/tests/root/literal.inc
+++ b/tests/root/literal.inc
@ -2,3 +2,12 @@
 # -*- coding: utf-8 -*-

 foo = u"Including Unicode characters: üöä"
+
+class Foo:
+    pass
+
+class Bar:
+    def baz():
+        pass
+
+def bar(): pass
--- a/tests/test_autodoc.py
+++ b/tests/test_autodoc.py
@ -173,13 +173,14 @@ def test_format_signature():

 def test_get_doc():
    def getdocl(*args):
-        # strip the empty line at the end
-        return list(gen.get_doc(*args))[:-1]
+        ds = gen.get_doc(*args)
+        # for testing purposes, concat them and strip the empty line at the end
+        return sum(ds, [])[:-1]

    # objects without docstring
    def f():
        pass
-    assert getdocl('function', 'f', f) == []
+    assert getdocl('function', f) == []

    # standard function, diverse docstring styles...
    def f():
@ -189,7 +190,7 @@ def test_get_doc():
        Docstring
        """
    for func in (f, g):
-        assert getdocl('function', 'f', func) == ['Docstring']
+        assert getdocl('function', func) == ['Docstring']

    # first line vs. other lines indentation
    def f():
@ -198,17 +199,17 @@ def test_get_doc():
        Other
          lines
        """
-    assert getdocl('function', 'f', f) == ['First line', '', 'Other', '  lines']
+    assert getdocl('function', f) == ['First line', '', 'Other', '  lines']

    # charset guessing (this module is encoded in utf-8)
    def f():
        """Döcstring"""
-    assert getdocl('function', 'f', f) == [u'Döcstring']
+    assert getdocl('function', f) == [u'Döcstring']

    # already-unicode docstrings must be taken literally
    def f():
        u"""Döcstring"""
-    assert getdocl('function', 'f', f) == [u'Döcstring']
+    assert getdocl('function', f) == [u'Döcstring']

    # class docstring: depends on config value which one is taken
    class C:
@ -216,11 +217,11 @@ def test_get_doc():
        def __init__(self):
            """Init docstring"""
    gen.env.config.autoclass_content = 'class'
-    assert getdocl('class', 'C', C) == ['Class docstring']
+    assert getdocl('class', C) == ['Class docstring']
    gen.env.config.autoclass_content = 'init'
-    assert getdocl('class', 'C', C) == ['Init docstring']
+    assert getdocl('class', C) == ['Init docstring']
    gen.env.config.autoclass_content = 'both'
-    assert getdocl('class', 'C', C) == ['Class docstring', '', 'Init docstring']
+    assert getdocl('class', C) == ['Class docstring', '', 'Init docstring']

    class D:
        """Class docstring"""
@ -232,18 +233,21 @@ def test_get_doc():
            """

    # Indentation is normalized for 'both'
-    assert getdocl('class', 'D', D) == ['Class docstring', '', 'Init docstring',
-                                        '', 'Other', ' lines']
+    assert getdocl('class', D) == ['Class docstring', '', 'Init docstring',
+                                   '', 'Other', ' lines']
+
+
+def test_docstring_processing():
+    def process(what, name, obj):
+        return list(gen.process_doc(gen.get_doc(what, obj), what, name, obj))

    class E:
        def __init__(self):
            """Init docstring"""

    # docstring processing by event handler
-    assert getdocl('class', 'bar', E) == ['Init docstring', '', '42']
+    assert process('class', 'bar', E) == ['Init docstring', '', '42', '']

-
-def test_docstring_processing_functions():
    lid = app.connect('autodoc-process-docstring', cut_lines(1, 1, ['function']))
    def f():
        """
@ -251,7 +255,7 @@ def test_docstring_processing_functions():
        second line
        third line
        """
-    assert list(gen.get_doc('function', 'f', f)) == ['second line', '']
+    assert process('function', 'f', f) == ['second line', '']
    app.disconnect(lid)

    lid = app.connect('autodoc-process-docstring', between('---', ['function']))
@ -263,7 +267,7 @@ def test_docstring_processing_functions():
        ---
        third line
        """
-    assert list(gen.get_doc('function', 'f', f)) == ['second line', '']
+    assert process('function', 'f', f) == ['second line', '']
    app.disconnect(lid)


@ -289,7 +293,7 @@ def test_generate():

    def assert_result_contains(item, *args):
        gen.generate(*args)
-        print '\n'.join(gen.result)
+        #print '\n'.join(gen.result)
        assert len(gen.warnings) == 0, gen.warnings
        assert item in gen.result
        del gen.result[:]
@ -325,7 +329,10 @@ def test_generate():
    assert_processes(should, 'class', 'Class', [], None)
    should.extend([('method', 'test_autodoc.Class.meth')])
    assert_processes(should, 'class', 'Class', ['meth'], None)
-    should.extend([('attribute', 'test_autodoc.Class.prop')])
+    should.extend([('attribute', 'test_autodoc.Class.prop'),
+                   ('attribute', 'test_autodoc.Class.attr'),
+                   ('attribute', 'test_autodoc.Class.docattr'),
+                   ('attribute', 'test_autodoc.Class.udocattr')])
    assert_processes(should, 'class', 'Class', ['__all__'], None)
    options.undoc_members = True
    should.append(('method', 'test_autodoc.Class.undocmeth'))
@ -369,6 +376,11 @@ def test_generate():
                      ('method', 'test_autodoc.Outer.Inner.meth')],
                     'class', 'Outer', ['__all__'], None)

+    # test generation for C modules (which have no source file)
+    gen.env.currmodule = 'time'
+    assert_processes([('function', 'time.asctime')], 'function', 'asctime', [], None)
+    assert_processes([('function', 'time.asctime')], 'function', 'asctime', [], None)
+

 # --- generate fodder ------------

@ -398,10 +410,22 @@ class Class(Base):
        """Method that should be skipped."""
        pass

+    # should not be documented
+    skipattr = 'foo'
+
+    #: should be documented -- süß
+    attr = 'bar'
+
    @property
    def prop(self):
        """Property."""

+    docattr = 'baz'
+    """should likewise be documented -- süß"""
+
+    udocattr = 'quux'
+    u"""should be documented as well - süß"""
+
 class CustomDict(dict):
    """Docstring."""

@ -421,4 +445,5 @@ class Outer(object):
        def meth(self):
            """Foo"""

+    # should be documented as an alias
    factory = dict
--- a/tests/test_build.py
+++ b/tests/test_build.py
@ -10,6 +10,7 @@
 """

 import os
+import re
 import sys
 import difflib
 import htmlentitydefs
@ -32,7 +33,7 @@ WARNING: %(root)s/images.txt:9: Image file not readable: foo.png
 WARNING: %(root)s/images.txt:23: Nonlocal image URI found: http://www.python.org/logo.png
 WARNING: %(root)s/includes.txt:: (WARNING/2) Encoding 'utf-8' used for reading included \
 file u'wrongenc.inc' seems to be wrong, try giving an :encoding: option
-WARNING: %(root)s/includes.txt:34: Download file not readable: nonexisting.png
+WARNING: %(root)s/includes.txt:56: Download file not readable: nonexisting.png
 """

 HTML_WARNINGS = ENV_WARNINGS + """\
@ -61,11 +62,19 @@ HTML_XPATH = {
        ".//pre": u'Max Strauß',
        ".//a[@href='_downloads/img.png']": '',
        ".//a[@href='_downloads/img1.png']": '',
+        ".//div[@class='inc-pyobj1 highlight-text']/div/pre":
+            r'^class Foo:\n    pass\n\s*$',
+        ".//div[@class='inc-pyobj2 highlight-text']/div/pre":
+            r'^    def baz\(\):\n        pass\n\s*$',
+        ".//div[@class='inc-lines highlight-text']/div/pre":
+            r'^class Foo:\n    pass\nclass Bar:\n$',
+        ".//div[@class='inc-startend highlight-text']/div/pre":
+            ur'^foo = u"Including Unicode characters: üöä"\n$',
    },
    'autodoc.html': {
        ".//dt[@id='test_autodoc.Class']": '',
-        ".//dt[@id='test_autodoc.function']/em": '**kwds',
-        ".//dd": 'Return spam.',
+        ".//dt[@id='test_autodoc.function']/em": r'\*\*kwds',
+        ".//dd": r'Return spam\.',
    },
    'markup.html': {
        ".//meta[@name='author'][@content='Me']": '',
@ -81,7 +90,7 @@ HTML_XPATH = {
    },
    'contents.html': {
        ".//meta[@name='hc'][@content='hcval']": '',
-        ".//td[@class='label']": '[Ref1]',
+        ".//td[@class='label']": r'\[Ref1\]',
        ".//li[@class='toctree-l1']/a": 'Testing various markup',
        ".//li[@class='toctree-l2']/a": 'Admonitions',
        ".//title": 'Sphinx <Tests>',
@ -117,18 +126,23 @@ def test_html(app):
        parser = NslessParser()
        parser.entity.update(htmlentitydefs.entitydefs)
        etree = ET.parse(os.path.join(app.outdir, fname), parser)
-        for path, text in paths.iteritems():
+        for path, check in paths.iteritems():
            nodes = list(etree.findall(path))
            assert nodes != []
-            if not text:
+            if callable(check):
+                check(nodes)
+            elif not check:
                # only check for node presence
                continue
-            for node in nodes:
-                if node.text and text in node.text:
-                    break
            else:
-                assert False, ('%r not found in any node matching '
-                               'path %s in %s' % (text, path, fname))
+                rex = re.compile(check)
+                for node in nodes:
+                    if node.text and rex.search(node.text):
+                        break
+                else:
+                    assert False, ('%r not found in any node matching '
+                                   'path %s in %s: %r' % (check, path, fname,
+                                   [node.text for node in nodes]))


@with_app(buildername='latex', warning=latex_warnfile)
--- a/tests/test_markup.py
+++ b/tests/test_markup.py
@ -17,11 +17,13 @@ from docutils import frontend, utils, nodes
 from docutils.parsers import rst

 from sphinx import addnodes
+from sphinx.util import texescape
 from sphinx.writers.html import HTMLWriter, SmartyPantsHTMLTranslator
 from sphinx.writers.latex import LaTeXWriter, LaTeXTranslator

 def setup_module():
    global app, settings, parser
+    texescape.init()  # otherwise done by the latex builder
    app = TestApp(cleanenv=True)
    optparser = frontend.OptionParser(components=(rst.Parser, HTMLWriter, LaTeXWriter))
    settings = optparser.get_default_values()