Merge pull request #4294 from tk0miya/refactor_parser

Refactor docutils components of Sphinx (reader, parser, FileInput and so on).
2025-02-25 18:55:22 -06:00 · 2017-12-14 23:13:28 +09:00 · 2017-12-14 23:13:28 +09:00 · acf5eaae84
commit acf5eaae84
parent 9d44cb5952 51580fabb4
9 changed files with 341 additions and 115 deletions
--- a/sphinx/application.py
+++ b/sphinx/application.py
@ -83,6 +83,7 @@ builtin_extensions = (
    'sphinx.directives.code',
    'sphinx.directives.other',
    'sphinx.directives.patches',
+    'sphinx.io',
    'sphinx.parsers',
    'sphinx.roles',
    'sphinx.transforms.post_transforms',
--- a/sphinx/io.py
+++ b/sphinx/io.py
@ -8,13 +8,15 @@
    :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
    :license: BSD, see LICENSE for details.
 """
+import re
 import codecs

 from docutils.io import FileInput, NullOutput
 from docutils.core import Publisher
 from docutils.readers import standalone
+from docutils.statemachine import StringList, string2lines
 from docutils.writers import UnfilteredWriter
-from six import string_types, text_type, iteritems
+from six import text_type
 from typing import Any, Union  # NOQA

 from sphinx.transforms import (
@ -28,7 +30,6 @@ from sphinx.transforms.i18n import (
    PreserveTranslatableMessages, Locale, RemoveTranslatableInline,
 )
 from sphinx.util import logging
-from sphinx.util import import_object, split_docinfo
 from sphinx.util.docutils import LoggingReporter

 if False:
@ -42,44 +43,18 @@ if False:
    from sphinx.builders import Builder  # NOQA
    from sphinx.environment import BuildEnvironment  # NOQA

+docinfo_re = re.compile(':\\w+:.*?')
+

 logger = logging.getLogger(__name__)


 class SphinxBaseReader(standalone.Reader):
    """
-    Add our source parsers
+    A base class of readers for Sphinx.
+
+    This replaces reporter by Sphinx's on generating document.
    """
-    def __init__(self, app, parsers={}, *args, **kwargs):
-        # type: (Sphinx, Dict[unicode, Parser], Any, Any) -> None
-        standalone.Reader.__init__(self, *args, **kwargs)
-        self.parser_map = {}  # type: Dict[unicode, Parser]
-        for suffix, parser_class in parsers.items():
-            if isinstance(parser_class, string_types):
-                parser_class = import_object(parser_class, 'source parser')  # type: ignore
-            parser = parser_class()
-            if hasattr(parser, 'set_application'):
-                parser.set_application(app)
-            self.parser_map[suffix] = parser
-
-    def read(self, source, parser, settings):
-        # type: (Input, Parser, Dict) -> nodes.document
-        self.source = source
-
-        for suffix in self.parser_map:
-            if source.source_path.endswith(suffix):
-                self.parser = self.parser_map[suffix]
-                break
-        else:
-            # use special parser for unknown file-extension '*' (if exists)
-            self.parser = self.parser_map.get('*')
-
-        if not self.parser:
-            self.parser = parser
-        self.settings = settings
-        self.input = self.source.read()
-        self.parse()
-        return self.document

    def get_transforms(self):
        # type: () -> List[Transform]
@ -87,17 +62,19 @@ class SphinxBaseReader(standalone.Reader):

    def new_document(self):
        # type: () -> nodes.document
+        """Creates a new document object which having a special reporter object good
+        for logging.
+        """
        document = standalone.Reader.new_document(self)
        reporter = document.reporter
-        document.reporter = LoggingReporter(reporter.source, reporter.report_level,
-                                            reporter.halt_level, reporter.debug_flag,
-                                            reporter.error_handler)
+        document.reporter = LoggingReporter.from_reporter(reporter)
+        document.reporter.set_source(self.source)
        return document


 class SphinxStandaloneReader(SphinxBaseReader):
    """
-    Add our own transforms.
+    A basic document reader for Sphinx.
    """
    transforms = [ApplySourceWorkaround, ExtraTranslatableNodes, PreserveTranslatableMessages,
                  Locale, CitationReferences, DefaultSubstitutions, MoveModuleTargets,
@ -108,29 +85,30 @@ class SphinxStandaloneReader(SphinxBaseReader):

 class SphinxI18nReader(SphinxBaseReader):
    """
-    Replacer for document.reporter.get_source_and_line method.
+    A document reader for i18n.

-    reST text lines for translation do not have the original source line number.
-    This class provides the correct line numbers when reporting.
+    This returns the source line number of original text as current source line number
+    to let users know where the error happened.
+    Because the translated texts are partial and they don't have correct line numbers.
    """

+    lineno = None  # type: int
    transforms = [ApplySourceWorkaround, ExtraTranslatableNodes, CitationReferences,
                  DefaultSubstitutions, MoveModuleTargets, HandleCodeBlocks,
                  AutoNumbering, SortIds, RemoveTranslatableInline,
                  FilterSystemMessages, RefOnlyBulletListTransform,
                  UnreferencedFootnotesDetector]

-    def __init__(self, *args, **kwargs):
-        # type: (Any, Any) -> None
-        SphinxBaseReader.__init__(self, *args, **kwargs)
-        self.lineno = None  # type: int
-
    def set_lineno_for_reporter(self, lineno):
        # type: (int) -> None
+        """Stores the source line number of original text."""
        self.lineno = lineno

    def new_document(self):
        # type: () -> nodes.document
+        """Creates a new document object which having a special reporter object for
+        translation.
+        """
        document = SphinxBaseReader.new_document(self)
        reporter = document.reporter

@ -143,6 +121,8 @@ class SphinxI18nReader(SphinxBaseReader):


 class SphinxDummyWriter(UnfilteredWriter):
+    """Dummy writer module used for generating doctree."""
+
    supported = ('html',)  # needed to keep "meta" nodes

    def translate(self):
@ -155,7 +135,13 @@ def SphinxDummySourceClass(source, *args, **kwargs):
    return source


-class SphinxFileInput(FileInput):
+class SphinxBaseFileInput(FileInput):
+    """A base class of SphinxFileInput.
+
+    It supports to replace unknown Unicode characters to '?'. And it also emits
+    Sphinx events ``source-read`` on reading.
+    """
+
    def __init__(self, app, env, *args, **kwds):
        # type: (Sphinx, BuildEnvironment, Any, Any) -> None
        self.app = app
@ -175,27 +161,16 @@ class SphinxFileInput(FileInput):

    def read(self):
        # type: () -> unicode
-        def get_parser_type(source_path):
-            # type: (unicode) -> Tuple[unicode]
-            for suffix, parser_class in iteritems(self.app.registry.get_source_parsers()):
-                if source_path.endswith(suffix):
-                    if isinstance(parser_class, string_types):
-                        parser_class = import_object(parser_class, 'source parser')  # type: ignore  # NOQA
-                    return parser_class.supported
-            return ('restructuredtext',)
+        """Reads the contents from file.

+        After reading, it emits Sphinx event ``source-read``.
+        """
        data = FileInput.read(self)
-        if self.app:
-            arg = [data]
-            self.app.emit('source-read', self.env.docname, arg)
-            data = arg[0]
-        docinfo, data = split_docinfo(data)
-        if 'restructuredtext' in get_parser_type(self.source_path):
-            if self.env.config.rst_epilog:
-                data = data + '\n' + self.env.config.rst_epilog + '\n'
-            if self.env.config.rst_prolog:
-                data = self.env.config.rst_prolog + '\n' + data
-        return docinfo + data
+
+        # emit source-read event
+        arg = [data]
+        self.app.emit('source-read', self.env.docname, arg)
+        return arg[0]

    def warn_and_replace(self, error):
        # type: (Any) -> Tuple
@ -213,14 +188,84 @@ class SphinxFileInput(FileInput):
        return (u'?', error.end)


+class SphinxFileInput(SphinxBaseFileInput):
+    """A basic FileInput for Sphinx."""
+    pass
+
+
+class SphinxRSTFileInput(SphinxBaseFileInput):
+    """A reST FileInput for Sphinx.
+
+    This FileInput automatically prepends and appends text by :confval:`rst_prolog` and
+    :confval:`rst_epilog`.
+
+    .. important::
+
+       This FileInput uses an instance of ``StringList`` as a return value of ``read()``
+       method to indicate original source filename and line numbers after prepending and
+       appending.
+       For that reason, ``sphinx.parsers.RSTParser`` should be used with this to parse
+       a content correctly.
+    """
+
+    def prepend_prolog(self, text, prolog):
+        # type: (StringList, unicode) -> None
+        docinfo = self.count_docinfo_lines(text)
+        if docinfo:
+            # insert a blank line after docinfo
+            text.insert(docinfo, '', '<generated>', 0)
+            docinfo += 1
+
+        # insert prolog (after docinfo if exists)
+        for lineno, line in enumerate(prolog.splitlines()):
+            text.insert(docinfo + lineno, line, '<rst_prolog>', lineno)
+
+        text.insert(docinfo + lineno + 1, '', '<generated>', 0)
+
+    def append_epilog(self, text, epilog):
+        # type: (StringList, unicode) -> None
+        # append a blank line and rst_epilog
+        text.append('', '<generated>', 0)
+        for lineno, line in enumerate(epilog.splitlines()):
+            text.append(line, '<rst_epilog>', lineno)
+
+    def read(self):
+        # type: () -> StringList
+        inputstring = SphinxBaseFileInput.read(self)
+        lines = string2lines(inputstring, convert_whitespace=True)
+        content = StringList()
+        for lineno, line in enumerate(lines):
+            content.append(line, self.source_path, lineno)
+
+        if self.env.config.rst_prolog:
+            self.prepend_prolog(content, self.env.config.rst_prolog)
+        if self.env.config.rst_epilog:
+            self.append_epilog(content, self.env.config.rst_epilog)
+
+        return content
+
+    def count_docinfo_lines(self, content):
+        # type: (StringList) -> int
+        if len(content) == 0:
+            return 0
+        else:
+            for lineno, line in enumerate(content.data):
+                if not docinfo_re.match(line):
+                    break
+            return lineno
+
+
 def read_doc(app, env, filename):
    # type: (Sphinx, BuildEnvironment, unicode) -> nodes.document
    """Parse a document and convert to doctree."""
-    reader = SphinxStandaloneReader(app, parsers=app.registry.get_source_parsers())
-    source = SphinxFileInput(app, env, source=None, source_path=filename,
-                             encoding=env.config.source_encoding)
+    input_class = app.registry.get_source_input(filename)
+    reader = SphinxStandaloneReader()
+    source = input_class(app, env, source=None, source_path=filename,
+                         encoding=env.config.source_encoding)
+    parser = app.registry.create_source_parser(app, filename)

    pub = Publisher(reader=reader,
+                    parser=parser,
                    writer=SphinxDummyWriter(),
                    source_class=SphinxDummySourceClass,
                    destination=NullOutput())
@ -229,3 +274,8 @@ def read_doc(app, env, filename):
    pub.set_source(source, filename)
    pub.publish()
    return pub.document
+
+
+def setup(app):
+    app.registry.add_source_input('*', SphinxFileInput)
+    app.registry.add_source_input('restructuredtext', SphinxRSTFileInput)
--- a/sphinx/parsers.py
+++ b/sphinx/parsers.py
@ -11,6 +11,8 @@

 import docutils.parsers
 import docutils.parsers.rst
+from docutils.parsers.rst import states
+from docutils.statemachine import StringList
 from docutils.transforms.universal import SmartQuotes

 from sphinx.transforms import SphinxSmartQuotes
@ -18,6 +20,7 @@ from sphinx.transforms import SphinxSmartQuotes
 if False:
    # For type annotation
    from typing import Any, Dict, List, Type  # NOQA
+    from docutils import nodes  # NOQA
    from docutils.transforms import Transform  # NOQA
    from sphinx.application import Sphinx  # NOQA

@ -56,7 +59,7 @@ class Parser(docutils.parsers.Parser):


 class RSTParser(docutils.parsers.rst.Parser):
-    """A reST parser customized for Sphinx."""
+    """A reST parser for Sphinx."""

    def get_transforms(self):
        # type: () -> List[Type[Transform]]
@ -66,6 +69,26 @@ class RSTParser(docutils.parsers.rst.Parser):
        transforms.append(SphinxSmartQuotes)
        return transforms

+    def parse(self, inputstring, document):
+        # type: (Any, nodes.document) -> None
+        """Parse text and generate a document tree.
+
+        This accepts StringList as an inputstring parameter.
+        It enables to handle mixed contents (cf. :confval:`rst_prolog`) correctly.
+        """
+        if isinstance(inputstring, StringList):
+            self.setup_parse(inputstring, document)
+            self.statemachine = states.RSTStateMachine(
+                state_classes=self.state_classes,
+                initial_state=self.initial_state,
+                debug=document.reporter.debug_flag)
+            # Give inputstring directly to statemachine.
+            self.statemachine.run(inputstring, document, inliner=self.inliner)
+            self.finish_parse()
+        else:
+            # otherwise, inputstring might be a string. It will be handled by superclass.
+            docutils.parsers.rst.Parser.parse(self, inputstring, document)
+

 def setup(app):
    # type: (Sphinx) -> Dict[unicode, Any]
--- a/sphinx/registry.py
+++ b/sphinx/registry.py
@ -13,21 +13,24 @@ from __future__ import print_function
 import traceback

 from pkg_resources import iter_entry_points
-from six import itervalues
+from six import iteritems, itervalues, string_types

 from sphinx.errors import ExtensionError, SphinxError, VersionRequirementError
 from sphinx.extension import Extension
 from sphinx.domains import ObjType
 from sphinx.domains.std import GenericObject, Target
 from sphinx.locale import __
+from sphinx.parsers import Parser as SphinxParser
 from sphinx.roles import XRefRole
 from sphinx.util import logging
+from sphinx.util import import_object
 from sphinx.util.docutils import directive_helper

 if False:
    # For type annotation
    from typing import Any, Callable, Dict, Iterator, List, Type  # NOQA
    from docutils import nodes  # NOQA
+    from docutils.io import Input  # NOQA
    from docutils.parsers import Parser  # NOQA
    from sphinx.application import Sphinx  # NOQA
    from sphinx.builders import Builder  # NOQA
@ -48,6 +51,7 @@ class SphinxComponentRegistry(object):
        self.builders = {}          # type: Dict[unicode, Type[Builder]]
        self.domains = {}           # type: Dict[unicode, Type[Domain]]
        self.source_parsers = {}    # type: Dict[unicode, Parser]
+        self.source_inputs = {}     # type: Dict[unicode, Input]
        self.translators = {}       # type: Dict[unicode, nodes.NodeVisitor]

    def add_builder(self, builder):
@ -155,15 +159,61 @@ class SphinxComponentRegistry(object):
        stddomain.object_types[directivename] = ObjType(objname or directivename, rolename)

    def add_source_parser(self, suffix, parser):
-        # type: (unicode, Parser) -> None
+        # type: (unicode, Type[Parser]) -> None
        if suffix in self.source_parsers:
            raise ExtensionError(__('source_parser for %r is already registered') % suffix)
        self.source_parsers[suffix] = parser

+    def get_source_parser(self, filename):
+        # type: (unicode) -> Type[Parser]
+        for suffix, parser_class in iteritems(self.source_parsers):
+            if filename.endswith(suffix):
+                break
+        else:
+            # use special parser for unknown file-extension '*' (if exists)
+            parser_class = self.source_parsers.get('*')
+
+        if parser_class is None:
+            raise SphinxError(__('Source parser for %s not registered') % filename)
+        else:
+            if isinstance(parser_class, string_types):
+                parser_class = import_object(parser_class, 'source parser')  # type: ignore
+            return parser_class
+
    def get_source_parsers(self):
        # type: () -> Dict[unicode, Parser]
        return self.source_parsers

+    def create_source_parser(self, app, filename):
+        # type: (Sphinx, unicode) -> Parser
+        parser_class = self.get_source_parser(filename)
+        parser = parser_class()
+        if isinstance(parser, SphinxParser):
+            parser.set_application(app)
+        return parser
+
+    def add_source_input(self, filetype, input_class):
+        # type: (unicode, Type[Input]) -> None
+        if filetype in self.source_inputs:
+            raise ExtensionError(__('source_input for %r is already registered') % filetype)
+        self.source_inputs[filetype] = input_class
+
+    def get_source_input(self, filename):
+        # type: (unicode) -> Type[Input]
+        parser = self.get_source_parser(filename)
+        for filetype in parser.supported:
+            if filetype in self.source_inputs:
+                input_class = self.source_inputs[filetype]
+                break
+        else:
+            # use special source_input for unknown file-type '*' (if exists)
+            input_class = self.source_inputs.get('*')
+
+        if input_class is None:
+            raise SphinxError(__('source_input for %s not registered') % filename)
+        else:
+            return input_class
+
    def add_translator(self, name, translator):
        # type: (unicode, Type[nodes.NodeVisitor]) -> None
        self.translators[name] = translator
--- a/sphinx/transforms/i18n.py
+++ b/sphinx/transforms/i18n.py
@ -50,15 +50,12 @@ def publish_msgstr(app, source, source_path, source_line, config, settings):
    :rtype: docutils.nodes.document
    """
    from sphinx.io import SphinxI18nReader
-    reader = SphinxI18nReader(
-        app=app,
-        parsers=app.registry.get_source_parsers(),
-        parser_name='restructuredtext',  # default parser
-    )
+    reader = SphinxI18nReader()
    reader.set_lineno_for_reporter(source_line)
+    parser = app.registry.create_source_parser(app, '')
    doc = reader.read(
        source=StringInput(source=source, source_path=source_path),
-        parser=reader.parser,
+        parser=parser,
        settings=settings,
    )
    try:
--- a/sphinx/util/init.py
+++ b/sphinx/util/init.py
@ -564,16 +564,6 @@ def encode_uri(uri):
    return urlunsplit(split)


-def split_docinfo(text):
-    # type: (unicode) -> Sequence[unicode]
-    docinfo_re = re.compile('\\A((?:\\s*:\\w+:.*?\n(?:[ \\t]+.*?\n)*)+)', re.M)
-    result = docinfo_re.split(text, 1)  # type: ignore
-    if len(result) == 1:
-        return '', result[0]
-    else:
-        return result[1:]
-
-
 def display_chunk(chunk):
    # type: (Any) -> unicode
    if isinstance(chunk, (list, tuple)):
--- a/sphinx/util/docutils.py
+++ b/sphinx/util/docutils.py
@ -18,8 +18,9 @@ from contextlib import contextmanager

 import docutils
 from docutils.languages import get_language
-from docutils.utils import Reporter
+from docutils.statemachine import ViewList
 from docutils.parsers.rst import directives, roles, convert_directive_function
+from docutils.utils import Reporter

 from sphinx.errors import ExtensionError
 from sphinx.locale import __
@ -33,6 +34,7 @@ if False:
    from typing import Any, Callable, Iterator, List, Tuple  # NOQA
    from docutils import nodes  # NOQA
    from sphinx.environment import BuildEnvironment  # NOQA
+    from sphinx.io import SphinxFileInput  # NOQA


 __version_info__ = tuple(LooseVersion(docutils.__version__).version)
@ -167,16 +169,34 @@ class WarningStream(object):


 class LoggingReporter(Reporter):
+    @classmethod
+    def from_reporter(cls, reporter):
+        # type: (Reporter) -> LoggingReporter
+        """Create an instance of LoggingReporter from other reporter object."""
+        return cls(reporter.source, reporter.report_level, reporter.halt_level,
+                   reporter.debug_flag, reporter.error_handler)
+
    def __init__(self, source, report_level, halt_level,
                 debug=False, error_handler='backslashreplace'):
        # type: (unicode, int, int, bool, unicode) -> None
        stream = WarningStream()
        Reporter.__init__(self, source, report_level, halt_level,
                          stream, debug, error_handler=error_handler)
+        self.source_and_line = None  # type: SphinxFileInput

-    def set_conditions(self, category, report_level, halt_level, debug=False):
-        # type: (unicode, int, int, bool) -> None
-        Reporter.set_conditions(self, category, report_level, halt_level, debug=debug)
+    def set_source(self, source):
+        # type: (SphinxFileInput) -> None
+        self.source_and_line = source
+
+    def system_message(self, *args, **kwargs):
+        # type: (Any, Any) -> Any
+        if kwargs.get('line') and isinstance(self.source_and_line, ViewList):
+            # replace source parameter if source is set
+            source, lineno = self.source_and_line.info(kwargs.get('line'))
+            kwargs['source'] = source
+            kwargs['line'] = lineno
+
+        return Reporter.system_message(self, *args, **kwargs)


 def is_html5_writer_available():
--- a/tests/test_io.py
+++ b/tests/test_io.py
@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+"""
+    test_sphinx_io
+    ~~~~~~~~~~~~~~
+
+    Tests io modules.
+
+    :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import pytest
+from six import StringIO
+
+from sphinx.io import SphinxRSTFileInput
+
+
+@pytest.mark.sphinx(testroot='basic')
+def test_SphinxRSTFileInput(app):
+    app.env.temp_data['docname'] = 'index'
+
+    # normal case
+    text = ('hello Sphinx world\n'
+            'Sphinx is a document generator')
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == ['hello Sphinx world',
+                           'Sphinx is a document generator']
+    assert result.info(0) == ('dummy.rst', 0)
+    assert result.info(1) == ('dummy.rst', 1)
+    assert result.info(2) == ('dummy.rst', None)  # out of range
+
+    # having rst_prolog ends without CR
+    app.env.config.rst_prolog = 'this is rst_prolog\nhello reST!'
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == ['this is rst_prolog',
+                           'hello reST!',
+                           '',
+                           'hello Sphinx world',
+                           'Sphinx is a document generator']
+    assert result.info(0) == ('<rst_prolog>', 0)
+    assert result.info(1) == ('<rst_prolog>', 1)
+    assert result.info(2) == ('<generated>', 0)
+    assert result.info(3) == ('dummy.rst', 0)
+    assert result.info(4) == ('dummy.rst', 1)
+
+    # having rst_prolog ends with CR
+    app.env.config.rst_prolog = 'this is rst_prolog\nhello reST!\n'
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == ['this is rst_prolog',
+                           'hello reST!',
+                           '',
+                           'hello Sphinx world',
+                           'Sphinx is a document generator']
+
+    # having docinfo and rst_prolog
+    docinfo_text = (':title: test of SphinxFileInput\n'
+                    ':author: Sphinx team\n'
+                    '\n'
+                    'hello Sphinx world\n'
+                    'Sphinx is a document generator\n')
+    app.env.config.rst_prolog = 'this is rst_prolog\nhello reST!'
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(docinfo_text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == [':title: test of SphinxFileInput',
+                           ':author: Sphinx team',
+                           '',
+                           'this is rst_prolog',
+                           'hello reST!',
+                           '',
+                           '',
+                           'hello Sphinx world',
+                           'Sphinx is a document generator']
+    assert result.info(0) == ('dummy.rst', 0)
+    assert result.info(1) == ('dummy.rst', 1)
+    assert result.info(2) == ('<generated>', 0)
+    assert result.info(3) == ('<rst_prolog>', 0)
+    assert result.info(4) == ('<rst_prolog>', 1)
+    assert result.info(5) == ('<generated>', 0)
+    assert result.info(6) == ('dummy.rst', 2)
+    assert result.info(7) == ('dummy.rst', 3)
+    assert result.info(8) == ('dummy.rst', 4)
+    assert result.info(9) == ('dummy.rst', None)  # out of range
+
+    # having rst_epilog
+    app.env.config.rst_prolog = None
+    app.env.config.rst_epilog = 'this is rst_epilog\ngood-bye reST!'
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == ['hello Sphinx world',
+                           'Sphinx is a document generator',
+                           '',
+                           'this is rst_epilog',
+                           'good-bye reST!']
+    assert result.info(0) == ('dummy.rst', 0)
+    assert result.info(1) == ('dummy.rst', 1)
+    assert result.info(2) == ('<generated>', 0)
+    assert result.info(3) == ('<rst_epilog>', 0)
+    assert result.info(4) == ('<rst_epilog>', 1)
+    assert result.info(5) == ('<rst_epilog>', None)  # out of range
+
+    # expandtabs / convert whitespaces
+    app.env.config.rst_prolog = None
+    app.env.config.rst_epilog = None
+    text = ('\thello Sphinx world\n'
+            '\v\fSphinx is a document generator')
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == ['        hello Sphinx world',
+                           '  Sphinx is a document generator']
--- a/tests/test_util.py
+++ b/tests/test_util.py
@ -14,8 +14,7 @@ from mock import patch

 from sphinx.util import logging
 from sphinx.util import (
-    display_chunk, encode_uri, parselinenos, split_docinfo, status_iterator,
-    xmlname_checker
+    display_chunk, encode_uri, parselinenos, status_iterator, xmlname_checker
 )

 from sphinx.testing.util import strip_escseq
@ -36,28 +35,6 @@ def test_encode_uri():
    assert expected, encode_uri(uri)


-def test_splitdocinfo():
-    source = "Hello world.\n"
-    docinfo, content = split_docinfo(source)
-    assert docinfo == ''
-    assert content == 'Hello world.\n'
-
-    source = ":orphan:\n\nHello world.\n"
-    docinfo, content = split_docinfo(source)
-    assert docinfo == ':orphan:\n'
-    assert content == '\nHello world.\n'
-
-    source = ":author: Georg Brandl\n:title: Manual of Sphinx\n\nHello world.\n"
-    docinfo, content = split_docinfo(source)
-    assert docinfo == ':author: Georg Brandl\n:title: Manual of Sphinx\n'
-    assert content == '\nHello world.\n'
-
-    source = ":multiline: one\n\ttwo\n\tthree\n\nHello world.\n"
-    docinfo, content = split_docinfo(source)
-    assert docinfo == ":multiline: one\n\ttwo\n\tthree\n"
-    assert content == '\nHello world.\n'
-
-
 def test_display_chunk():
    assert display_chunk('hello') == 'hello'
    assert display_chunk(['hello']) == 'hello'