From a12399dbe429f552ab012099491c577de668a81d Mon Sep 17 00:00:00 2001
From: Takeshi KOMIYA <i.tkomiya@gmail.com>
Date: Thu, 7 Dec 2017 23:49:32 +0900
Subject: [PATCH 01/10] Refactor sphinx.io; independ parser generation from
 reader class

---
 sphinx/io.py              | 41 ++++-----------------------------------
 sphinx/registry.py        | 23 +++++++++++++++++++++-
 sphinx/transforms/i18n.py |  9 +++------
 3 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/sphinx/io.py b/sphinx/io.py
index 84e1d7bb3..47e701f35 100644
--- a/sphinx/io.py
+++ b/sphinx/io.py
@@ -50,37 +50,6 @@ class SphinxBaseReader(standalone.Reader):
     """
     Add our source parsers
     """
-    def __init__(self, app, parsers={}, *args, **kwargs):
-        # type: (Sphinx, Dict[unicode, Parser], Any, Any) -> None
-        standalone.Reader.__init__(self, *args, **kwargs)
-        self.parser_map = {}  # type: Dict[unicode, Parser]
-        for suffix, parser_class in parsers.items():
-            if isinstance(parser_class, string_types):
-                parser_class = import_object(parser_class, 'source parser')  # type: ignore
-            parser = parser_class()
-            if hasattr(parser, 'set_application'):
-                parser.set_application(app)
-            self.parser_map[suffix] = parser
-
-    def read(self, source, parser, settings):
-        # type: (Input, Parser, Dict) -> nodes.document
-        self.source = source
-
-        for suffix in self.parser_map:
-            if source.source_path.endswith(suffix):
-                self.parser = self.parser_map[suffix]
-                break
-        else:
-            # use special parser for unknown file-extension '*' (if exists)
-            self.parser = self.parser_map.get('*')
-
-        if not self.parser:
-            self.parser = parser
-        self.settings = settings
-        self.input = self.source.read()
-        self.parse()
-        return self.document
-
     def get_transforms(self):
         # type: () -> List[Transform]
         return standalone.Reader.get_transforms(self) + self.transforms
@@ -114,17 +83,13 @@ class SphinxI18nReader(SphinxBaseReader):
     This class provides the correct line numbers when reporting.
     """
 
+    lineno = None  # type: int
     transforms = [ApplySourceWorkaround, ExtraTranslatableNodes, CitationReferences,
                   DefaultSubstitutions, MoveModuleTargets, HandleCodeBlocks,
                   AutoNumbering, SortIds, RemoveTranslatableInline,
                   FilterSystemMessages, RefOnlyBulletListTransform,
                   UnreferencedFootnotesDetector]
 
-    def __init__(self, *args, **kwargs):
-        # type: (Any, Any) -> None
-        SphinxBaseReader.__init__(self, *args, **kwargs)
-        self.lineno = None  # type: int
-
     def set_lineno_for_reporter(self, lineno):
         # type: (int) -> None
         self.lineno = lineno
@@ -216,11 +181,13 @@ class SphinxFileInput(FileInput):
 def read_doc(app, env, filename):
     # type: (Sphinx, BuildEnvironment, unicode) -> nodes.document
     """Parse a document and convert to doctree."""
-    reader = SphinxStandaloneReader(app, parsers=app.registry.get_source_parsers())
+    reader = SphinxStandaloneReader()
     source = SphinxFileInput(app, env, source=None, source_path=filename,
                              encoding=env.config.source_encoding)
+    parser = app.registry.create_source_parser(app, filename)
 
     pub = Publisher(reader=reader,
+                    parser=parser,
                     writer=SphinxDummyWriter(),
                     source_class=SphinxDummySourceClass,
                     destination=NullOutput())
diff --git a/sphinx/registry.py b/sphinx/registry.py
index 0861575db..c8ce4ad5e 100644
--- a/sphinx/registry.py
+++ b/sphinx/registry.py
@@ -13,15 +13,17 @@ from __future__ import print_function
 import traceback
 
 from pkg_resources import iter_entry_points
-from six import itervalues
+from six import iteritems, itervalues, string_types
 
 from sphinx.errors import ExtensionError, SphinxError, VersionRequirementError
 from sphinx.extension import Extension
 from sphinx.domains import ObjType
 from sphinx.domains.std import GenericObject, Target
 from sphinx.locale import __
+from sphinx.parsers import Parser as SphinxParser
 from sphinx.roles import XRefRole
 from sphinx.util import logging
+from sphinx.util import import_object
 from sphinx.util.docutils import directive_helper
 
 if False:
@@ -160,6 +162,25 @@ class SphinxComponentRegistry(object):
             raise ExtensionError(__('source_parser for %r is already registered') % suffix)
         self.source_parsers[suffix] = parser
 
+    def create_source_parser(self, app, filename):
+        # type: (Sphinx, unicode) -> Parser
+        for suffix, parser_class in iteritems(self.source_parsers):
+            if filename.endswith(suffix):
+                break
+        else:
+            # use special parser for unknown file-extension '*' (if exists)
+            parser_class = self.source_parsers.get('*')
+
+        if parser_class is None:
+            raise SphinxError(__('Source parser for %s not registered') % filename)
+        else:
+            if isinstance(parser_class, string_types):
+                parser_class = import_object(parser_class, 'source parser')  # type: ignore
+            parser = parser_class()
+            if isinstance(parser, SphinxParser):
+                parser.set_application(app)
+            return parser
+
     def get_source_parsers(self):
         # type: () -> Dict[unicode, Parser]
         return self.source_parsers
diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py
index 4c1fbc2a7..d08cc81f4 100644
--- a/sphinx/transforms/i18n.py
+++ b/sphinx/transforms/i18n.py
@@ -50,15 +50,12 @@ def publish_msgstr(app, source, source_path, source_line, config, settings):
     :rtype: docutils.nodes.document
     """
     from sphinx.io import SphinxI18nReader
-    reader = SphinxI18nReader(
-        app=app,
-        parsers=app.registry.get_source_parsers(),
-        parser_name='restructuredtext',  # default parser
-    )
+    reader = SphinxI18nReader()
     reader.set_lineno_for_reporter(source_line)
+    parser = app.registry.create_source_parser(app, '')
     doc = reader.read(
         source=StringInput(source=source, source_path=source_path),
-        parser=reader.parser,
+        parser=parser,
         settings=settings,
     )
     try:

From a0200ad4995967fc28f0e6ac70d0f15779d0e8bb Mon Sep 17 00:00:00 2001
From: Takeshi KOMIYA <i.tkomiya@gmail.com>
Date: Wed, 15 Nov 2017 11:02:36 +0900
Subject: [PATCH 02/10] Refactor sphinx.io; dependent parser detection from
 SphinxFileInput

---
 sphinx/io.py       | 13 +++----------
 sphinx/registry.py | 19 ++++++++++++-------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/sphinx/io.py b/sphinx/io.py
index 47e701f35..71701e10d 100644
--- a/sphinx/io.py
+++ b/sphinx/io.py
@@ -140,22 +140,15 @@ class SphinxFileInput(FileInput):
 
     def read(self):
         # type: () -> unicode
-        def get_parser_type(source_path):
-            # type: (unicode) -> Tuple[unicode]
-            for suffix, parser_class in iteritems(self.app.registry.get_source_parsers()):
-                if source_path.endswith(suffix):
-                    if isinstance(parser_class, string_types):
-                        parser_class = import_object(parser_class, 'source parser')  # type: ignore  # NOQA
-                    return parser_class.supported
-            return ('restructuredtext',)
-
         data = FileInput.read(self)
         if self.app:
             arg = [data]
             self.app.emit('source-read', self.env.docname, arg)
             data = arg[0]
+
+        parser = self.app.registry.get_source_parser(self.source_path)
         docinfo, data = split_docinfo(data)
-        if 'restructuredtext' in get_parser_type(self.source_path):
+        if 'restructuredtext' in parser.supported:
             if self.env.config.rst_epilog:
                 data = data + '\n' + self.env.config.rst_epilog + '\n'
             if self.env.config.rst_prolog:
diff --git a/sphinx/registry.py b/sphinx/registry.py
index c8ce4ad5e..3723bcc29 100644
--- a/sphinx/registry.py
+++ b/sphinx/registry.py
@@ -157,13 +157,13 @@ class SphinxComponentRegistry(object):
         stddomain.object_types[directivename] = ObjType(objname or directivename, rolename)
 
     def add_source_parser(self, suffix, parser):
-        # type: (unicode, Parser) -> None
+        # type: (unicode, Type[Parser]) -> None
         if suffix in self.source_parsers:
             raise ExtensionError(__('source_parser for %r is already registered') % suffix)
         self.source_parsers[suffix] = parser
 
-    def create_source_parser(self, app, filename):
-        # type: (Sphinx, unicode) -> Parser
+    def get_source_parser(self, filename):
+        # type: (unicode) -> Type[Parser]
         for suffix, parser_class in iteritems(self.source_parsers):
             if filename.endswith(suffix):
                 break
@@ -176,15 +176,20 @@ class SphinxComponentRegistry(object):
         else:
             if isinstance(parser_class, string_types):
                 parser_class = import_object(parser_class, 'source parser')  # type: ignore
-            parser = parser_class()
-            if isinstance(parser, SphinxParser):
-                parser.set_application(app)
-            return parser
+            return parser_class
 
     def get_source_parsers(self):
         # type: () -> Dict[unicode, Parser]
         return self.source_parsers
 
+    def create_source_parser(self, app, filename):
+        # type: (Sphinx, unicode) -> Parser
+        parser_class = self.get_source_parser(filename)
+        parser = parser_class()
+        if isinstance(parser, SphinxParser):
+            parser.set_application(app)
+        return parser
+
     def add_translator(self, name, translator):
         # type: (unicode, Type[nodes.NodeVisitor]) -> None
         self.translators[name] = translator

From aa4fd0e1b7e3043dee6ad3344c477cc59d41f0e8 Mon Sep 17 00:00:00 2001
From: Takeshi KOMIYA <i.tkomiya@gmail.com>
Date: Wed, 15 Nov 2017 11:34:52 +0900
Subject: [PATCH 03/10] refactor sphinx.io; SphinxFileInput can expect that app
 argument is always available

---
 sphinx/io.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sphinx/io.py b/sphinx/io.py
index 71701e10d..c290e7831 100644
--- a/sphinx/io.py
+++ b/sphinx/io.py
@@ -141,10 +141,11 @@ class SphinxFileInput(FileInput):
     def read(self):
         # type: () -> unicode
         data = FileInput.read(self)
-        if self.app:
-            arg = [data]
-            self.app.emit('source-read', self.env.docname, arg)
-            data = arg[0]
+
+        # emit source-read event
+        arg = [data]
+        self.app.emit('source-read', self.env.docname, arg)
+        data = arg[0]
 
         parser = self.app.registry.get_source_parser(self.source_path)
         docinfo, data = split_docinfo(data)

From 0e86ff2f1133e529d148cea6cc235218d73cc7d4 Mon Sep 17 00:00:00 2001
From: Takeshi KOMIYA <i.tkomiya@gmail.com>
Date: Wed, 15 Nov 2017 14:48:30 +0900
Subject: [PATCH 04/10] Refactor sphinx.io; separate FileInput class for each
 file type

---
 sphinx/application.py |  1 +
 sphinx/io.py          | 41 ++++++++++++++++++++++++++++-------------
 sphinx/registry.py    | 24 ++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/sphinx/application.py b/sphinx/application.py
index 209c73202..05d302c81 100644
--- a/sphinx/application.py
+++ b/sphinx/application.py
@@ -83,6 +83,7 @@ builtin_extensions = (
     'sphinx.directives.code',
     'sphinx.directives.other',
     'sphinx.directives.patches',
+    'sphinx.io',
     'sphinx.parsers',
     'sphinx.roles',
     'sphinx.transforms.post_transforms',
diff --git a/sphinx/io.py b/sphinx/io.py
index c290e7831..418e91b12 100644
--- a/sphinx/io.py
+++ b/sphinx/io.py
@@ -120,7 +120,9 @@ def SphinxDummySourceClass(source, *args, **kwargs):
     return source
 
 
-class SphinxFileInput(FileInput):
+class SphinxBaseFileInput(FileInput):
+    """A base class of SphinxFileInput."""
+
     def __init__(self, app, env, *args, **kwds):
         # type: (Sphinx, BuildEnvironment, Any, Any) -> None
         self.app = app
@@ -145,16 +147,7 @@ class SphinxFileInput(FileInput):
         # emit source-read event
         arg = [data]
         self.app.emit('source-read', self.env.docname, arg)
-        data = arg[0]
-
-        parser = self.app.registry.get_source_parser(self.source_path)
-        docinfo, data = split_docinfo(data)
-        if 'restructuredtext' in parser.supported:
-            if self.env.config.rst_epilog:
-                data = data + '\n' + self.env.config.rst_epilog + '\n'
-            if self.env.config.rst_prolog:
-                data = self.env.config.rst_prolog + '\n' + data
-        return docinfo + data
+        return arg[0]
 
     def warn_and_replace(self, error):
         # type: (Any) -> Tuple
@@ -172,12 +165,29 @@ class SphinxFileInput(FileInput):
         return (u'?', error.end)
 
 
+class SphinxFileInput(SphinxBaseFileInput):
+    pass
+
+
+class SphinxRSTFileInput(SphinxBaseFileInput):
+    def read(self):
+        # type: () -> unicode
+        data = SphinxBaseFileInput.read(self)
+        docinfo, data = split_docinfo(data)
+        if self.env.config.rst_epilog:
+            data = data + '\n' + self.env.config.rst_epilog + '\n'
+        if self.env.config.rst_prolog:
+            data = self.env.config.rst_prolog + '\n' + data
+        return docinfo + data
+
+
 def read_doc(app, env, filename):
     # type: (Sphinx, BuildEnvironment, unicode) -> nodes.document
     """Parse a document and convert to doctree."""
+    input_class = app.registry.get_source_input(filename)
     reader = SphinxStandaloneReader()
-    source = SphinxFileInput(app, env, source=None, source_path=filename,
-                             encoding=env.config.source_encoding)
+    source = input_class(app, env, source=None, source_path=filename,
+                         encoding=env.config.source_encoding)
     parser = app.registry.create_source_parser(app, filename)
 
     pub = Publisher(reader=reader,
@@ -190,3 +200,8 @@ def read_doc(app, env, filename):
     pub.set_source(source, filename)
     pub.publish()
     return pub.document
+
+
+def setup(app):
+    app.registry.add_source_input('*', SphinxFileInput)
+    app.registry.add_source_input('restructuredtext', SphinxRSTFileInput)
diff --git a/sphinx/registry.py b/sphinx/registry.py
index 3723bcc29..b627f23af 100644
--- a/sphinx/registry.py
+++ b/sphinx/registry.py
@@ -30,6 +30,7 @@ if False:
     # For type annotation
     from typing import Any, Callable, Dict, Iterator, List, Type  # NOQA
     from docutils import nodes  # NOQA
+    from docutils.io import Input  # NOQA
     from docutils.parsers import Parser  # NOQA
     from sphinx.application import Sphinx  # NOQA
     from sphinx.builders import Builder  # NOQA
@@ -50,6 +51,7 @@ class SphinxComponentRegistry(object):
         self.builders = {}          # type: Dict[unicode, Type[Builder]]
         self.domains = {}           # type: Dict[unicode, Type[Domain]]
         self.source_parsers = {}    # type: Dict[unicode, Parser]
+        self.source_inputs = {}     # type: Dict[unicode, Input]
         self.translators = {}       # type: Dict[unicode, nodes.NodeVisitor]
 
     def add_builder(self, builder):
@@ -190,6 +192,28 @@ class SphinxComponentRegistry(object):
             parser.set_application(app)
         return parser
 
+    def add_source_input(self, filetype, input_class):
+        # type: (unicode, Type[Input]) -> None
+        if filetype in self.source_inputs:
+            raise ExtensionError(__('source_input for %r is already registered') % filetype)
+        self.source_inputs[filetype] = input_class
+
+    def get_source_input(self, filename):
+        # type: (unicode) -> Type[Input]
+        parser = self.get_source_parser(filename)
+        for filetype in parser.supported:
+            if filetype in self.source_inputs:
+                input_class = self.source_inputs[filetype]
+                break
+        else:
+            # use special source_input for unknown file-type '*' (if exists)
+            input_class = self.source_inputs.get('*')
+
+        if input_class is None:
+            raise SphinxError(__('source_input for %s not registered') % filename)
+        else:
+            return input_class
+
     def add_translator(self, name, translator):
         # type: (unicode, Type[nodes.NodeVisitor]) -> None
         self.translators[name] = translator

From 16c244cd6af79afa85202af1a0f1e325c175994f Mon Sep 17 00:00:00 2001
From: Takeshi KOMIYA <i.tkomiya@gmail.com>
Date: Tue, 14 Nov 2017 22:33:56 +0900
Subject: [PATCH 05/10] Add LoggingReporter.from_reporter() constructor

---
 sphinx/io.py            | 4 +---
 sphinx/util/docutils.py | 7 +++++++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/sphinx/io.py b/sphinx/io.py
index 418e91b12..5868bd7b5 100644
--- a/sphinx/io.py
+++ b/sphinx/io.py
@@ -58,9 +58,7 @@ class SphinxBaseReader(standalone.Reader):
         # type: () -> nodes.document
         document = standalone.Reader.new_document(self)
         reporter = document.reporter
-        document.reporter = LoggingReporter(reporter.source, reporter.report_level,
-                                            reporter.halt_level, reporter.debug_flag,
-                                            reporter.error_handler)
+        document.reporter = LoggingReporter.from_reporter(reporter)
         return document
 
 
diff --git a/sphinx/util/docutils.py b/sphinx/util/docutils.py
index 92e6c8c22..d24a8f827 100644
--- a/sphinx/util/docutils.py
+++ b/sphinx/util/docutils.py
@@ -167,6 +167,13 @@ class WarningStream(object):
 
 
 class LoggingReporter(Reporter):
+    @classmethod
+    def from_reporter(cls, reporter):
+        # type: (Reporter) -> LoggingReporter
+        """Create an instance of LoggingReporter from other reporter object."""
+        return cls(reporter.source, reporter.report_level, reporter.halt_level,
+                   reporter.debug_flag, reporter.error_handler)
+
     def __init__(self, source, report_level, halt_level,
                  debug=False, error_handler='backslashreplace'):
         # type: (unicode, int, int, bool, unicode) -> None

From 773173b11f04f08547999c0799911fc7aa96d0d3 Mon Sep 17 00:00:00 2001
From: Takeshi KOMIYA <i.tkomiya@gmail.com>
Date: Tue, 14 Nov 2017 22:34:32 +0900
Subject: [PATCH 06/10] Remove LoggingReporter.set_conditions() (unused)

---
 sphinx/util/docutils.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sphinx/util/docutils.py b/sphinx/util/docutils.py
index d24a8f827..8da89713d 100644
--- a/sphinx/util/docutils.py
+++ b/sphinx/util/docutils.py
@@ -181,10 +181,6 @@ class LoggingReporter(Reporter):
         Reporter.__init__(self, source, report_level, halt_level,
                           stream, debug, error_handler=error_handler)
 
-    def set_conditions(self, category, report_level, halt_level, debug=False):
-        # type: (unicode, int, int, bool) -> None
-        Reporter.set_conditions(self, category, report_level, halt_level, debug=debug)
-
 
 def is_html5_writer_available():
     # type: () -> bool

From 07c5348a56471201b5901881b0d9d83f4823a6fd Mon Sep 17 00:00:00 2001
From: Takeshi KOMIYA <i.tkomiya@gmail.com>
Date: Thu, 7 Dec 2017 15:45:29 +0900
Subject: [PATCH 07/10] Add test_io.py

---
 sphinx/io.py            |  55 ++++++++++++++++++---
 sphinx/parsers.py       |  22 +++++++++
 sphinx/util/__init__.py |  10 ----
 sphinx/util/docutils.py |  19 ++++++-
 tests/test_io.py        | 107 ++++++++++++++++++++++++++++++++++++++++
 tests/test_util.py      |  25 +---------
 6 files changed, 195 insertions(+), 43 deletions(-)
 create mode 100644 tests/test_io.py

diff --git a/sphinx/io.py b/sphinx/io.py
index 5868bd7b5..056c763b1 100644
--- a/sphinx/io.py
+++ b/sphinx/io.py
@@ -8,13 +8,15 @@
     :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
+import re
 import codecs
 
 from docutils.io import FileInput, NullOutput
 from docutils.core import Publisher
 from docutils.readers import standalone
+from docutils.statemachine import StringList
 from docutils.writers import UnfilteredWriter
-from six import string_types, text_type, iteritems
+from six import text_type
 from typing import Any, Union  # NOQA
 
 from sphinx.transforms import (
@@ -28,7 +30,6 @@ from sphinx.transforms.i18n import (
     PreserveTranslatableMessages, Locale, RemoveTranslatableInline,
 )
 from sphinx.util import logging
-from sphinx.util import import_object, split_docinfo
 from sphinx.util.docutils import LoggingReporter
 
 if False:
@@ -42,6 +43,8 @@ if False:
     from sphinx.builders import Builder  # NOQA
     from sphinx.environment import BuildEnvironment  # NOQA
 
+docinfo_re = re.compile(':\\w+:.*?')
+
 
 logger = logging.getLogger(__name__)
 
@@ -59,6 +62,7 @@ class SphinxBaseReader(standalone.Reader):
         document = standalone.Reader.new_document(self)
         reporter = document.reporter
         document.reporter = LoggingReporter.from_reporter(reporter)
+        document.reporter.set_source(self.source)
         return document
 
 
@@ -168,15 +172,50 @@ class SphinxFileInput(SphinxBaseFileInput):
 
 
 class SphinxRSTFileInput(SphinxBaseFileInput):
+    def prepend_prolog(self, text, prolog):
+        # type: (StringList, unicode) -> None
+        docinfo = self.count_docinfo_lines(text)
+        if docinfo:
+            # insert a blank line after docinfo
+            text.insert(docinfo, '', '<generated>', 0)
+            docinfo += 1
+
+        # insert prolog (after docinfo if exists)
+        for lineno, line in enumerate(prolog.splitlines()):
+            text.insert(docinfo + lineno, line, '<rst_prolog>', lineno)
+
+        text.insert(docinfo + lineno + 1, '', '<generated>', 0)
+
+    def append_epilog(self, text, epilog):
+        # type: (StringList, unicode) -> None
+        # append a blank line and rst_epilog
+        text.append('', '<generated>', 0)
+        for lineno, line in enumerate(epilog.splitlines()):
+            text.append(line, '<rst_epilog>', lineno)
+
     def read(self):
-        # type: () -> unicode
+        # type: () -> StringList
         data = SphinxBaseFileInput.read(self)
-        docinfo, data = split_docinfo(data)
-        if self.env.config.rst_epilog:
-            data = data + '\n' + self.env.config.rst_epilog + '\n'
+        content = StringList()
+        for lineno, line in enumerate(data.splitlines()):
+            content.append(line, self.source_path, lineno)
+
         if self.env.config.rst_prolog:
-            data = self.env.config.rst_prolog + '\n' + data
-        return docinfo + data
+            self.prepend_prolog(content, self.env.config.rst_prolog)
+        if self.env.config.rst_epilog:
+            self.append_epilog(content, self.env.config.rst_epilog)
+
+        return content
+
+    def count_docinfo_lines(self, content):
+        # type: (StringList) -> int
+        if len(content) == 0:
+            return 0
+        else:
+            for lineno, line in enumerate(content.data):
+                if not docinfo_re.match(line):
+                    break
+            return lineno
 
 
 def read_doc(app, env, filename):
diff --git a/sphinx/parsers.py b/sphinx/parsers.py
index 33556e487..085e45070 100644
--- a/sphinx/parsers.py
+++ b/sphinx/parsers.py
@@ -11,6 +11,8 @@
 
 import docutils.parsers
 import docutils.parsers.rst
+from docutils.parsers.rst import states
+from docutils.statemachine import StringList
 from docutils.transforms.universal import SmartQuotes
 
 from sphinx.transforms import SphinxSmartQuotes
@@ -66,6 +68,26 @@ class RSTParser(docutils.parsers.rst.Parser):
         transforms.append(SphinxSmartQuotes)
         return transforms
 
+    def parse(self, inputstring, document):
+        # type: (Any, nodes.document) -> None
+        """Parse text and generate a document tree.
+
+        This derived method accepts StringList as a inputstring parameter.
+        It enables to handle mixed contents (cf. rst_prolog) correctly.
+        """
+        if isinstance(inputstring, StringList):
+            self.setup_parse(inputstring, document)
+            self.statemachine = states.RSTStateMachine(
+                state_classes=self.state_classes,
+                initial_state=self.initial_state,
+                debug=document.reporter.debug_flag)
+            # Give inputstring directly to statemachine.
+            self.statemachine.run(inputstring, document, inliner=self.inliner)
+            self.finish_parse()
+        else:
+            # otherwise, inputstring might be a string. It will be handled by superclass.
+            docutils.parsers.rst.Parser.parse(self, inputstring, document)
+
 
 def setup(app):
     # type: (Sphinx) -> Dict[unicode, Any]
diff --git a/sphinx/util/__init__.py b/sphinx/util/__init__.py
index 03f8ce6a3..da0f6ac3a 100644
--- a/sphinx/util/__init__.py
+++ b/sphinx/util/__init__.py
@@ -564,16 +564,6 @@ def encode_uri(uri):
     return urlunsplit(split)
 
 
-def split_docinfo(text):
-    # type: (unicode) -> Sequence[unicode]
-    docinfo_re = re.compile('\\A((?:\\s*:\\w+:.*?\n(?:[ \\t]+.*?\n)*)+)', re.M)
-    result = docinfo_re.split(text, 1)  # type: ignore
-    if len(result) == 1:
-        return '', result[0]
-    else:
-        return result[1:]
-
-
 def display_chunk(chunk):
     # type: (Any) -> unicode
     if isinstance(chunk, (list, tuple)):
diff --git a/sphinx/util/docutils.py b/sphinx/util/docutils.py
index 8da89713d..d1b7ac431 100644
--- a/sphinx/util/docutils.py
+++ b/sphinx/util/docutils.py
@@ -18,8 +18,9 @@ from contextlib import contextmanager
 
 import docutils
 from docutils.languages import get_language
-from docutils.utils import Reporter
+from docutils.statemachine import ViewList
 from docutils.parsers.rst import directives, roles, convert_directive_function
+from docutils.utils import Reporter
 
 from sphinx.errors import ExtensionError
 from sphinx.locale import __
@@ -33,6 +34,7 @@ if False:
     from typing import Any, Callable, Iterator, List, Tuple  # NOQA
     from docutils import nodes  # NOQA
     from sphinx.environment import BuildEnvironment  # NOQA
+    from sphinx.io import SphinxFileInput  # NOQA
 
 
 __version_info__ = tuple(LooseVersion(docutils.__version__).version)
@@ -180,6 +182,21 @@ class LoggingReporter(Reporter):
         stream = WarningStream()
         Reporter.__init__(self, source, report_level, halt_level,
                           stream, debug, error_handler=error_handler)
+        self.source_and_line = None
+
+    def set_source(self, source):
+        # type: (SphinxFileInput) -> None
+        self.source_and_line = source
+
+    def system_message(self, *args, **kwargs):
+        # type: (Any, Any) -> Any
+        if kwargs.get('line') and isinstance(self.source_and_line, ViewList):
+            # replace source parameter if source is set
+            source, lineno = self.source_and_line.info(kwargs.get('line'))
+            kwargs['source'] = source
+            kwargs['line'] = lineno
+
+        return Reporter.system_message(self, *args, **kwargs)
 
 
 def is_html5_writer_available():
diff --git a/tests/test_io.py b/tests/test_io.py
new file mode 100644
index 000000000..a017a2cc0
--- /dev/null
+++ b/tests/test_io.py
@@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+"""
+    test_sphinx_io
+    ~~~~~~~~~~~~~~
+
+    Tests io modules.
+
+    :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import pytest
+from six import StringIO
+
+from sphinx.io import SphinxRSTFileInput
+
+
+@pytest.mark.sphinx(testroot='basic')
+def test_SphinxRSTFileInput(app):
+    app.env.temp_data['docname'] = 'index'
+
+    # normal case
+    text = ('hello Sphinx world\n'
+            'Sphinx is a document generator')
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == ['hello Sphinx world',
+                           'Sphinx is a document generator']
+    assert result.info(0) == ('dummy.rst', 0)
+    assert result.info(1) == ('dummy.rst', 1)
+    assert result.info(2) == ('dummy.rst', None)  # out of range
+
+    # having rst_prolog ends without CR
+    app.env.config.rst_prolog = 'this is rst_prolog\nhello reST!'
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == ['this is rst_prolog',
+                           'hello reST!',
+                           '',
+                           'hello Sphinx world',
+                           'Sphinx is a document generator']
+    assert result.info(0) == ('<rst_prolog>', 0)
+    assert result.info(1) == ('<rst_prolog>', 1)
+    assert result.info(2) == ('<generated>', 0)
+    assert result.info(3) == ('dummy.rst', 0)
+    assert result.info(4) == ('dummy.rst', 1)
+
+    # having rst_prolog ends with CR
+    app.env.config.rst_prolog = 'this is rst_prolog\nhello reST!\n'
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == ['this is rst_prolog',
+                           'hello reST!',
+                           '',
+                           'hello Sphinx world',
+                           'Sphinx is a document generator']
+
+    # having docinfo and rst_prolog
+    docinfo_text = (':title: test of SphinxFileInput\n'
+                    ':author: Sphinx team\n'
+                    '\n'
+                    'hello Sphinx world\n'
+                    'Sphinx is a document generator\n')
+    app.env.config.rst_prolog = 'this is rst_prolog\nhello reST!'
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(docinfo_text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == [':title: test of SphinxFileInput',
+                           ':author: Sphinx team',
+                           '',
+                           'this is rst_prolog',
+                           'hello reST!',
+                           '',
+                           '',
+                           'hello Sphinx world',
+                           'Sphinx is a document generator']
+    assert result.info(0) == ('dummy.rst', 0)
+    assert result.info(1) == ('dummy.rst', 1)
+    assert result.info(2) == ('<generated>', 0)
+    assert result.info(3) == ('<rst_prolog>', 0)
+    assert result.info(4) == ('<rst_prolog>', 1)
+    assert result.info(5) == ('<generated>', 0)
+    assert result.info(6) == ('dummy.rst', 2)
+    assert result.info(7) == ('dummy.rst', 3)
+    assert result.info(8) == ('dummy.rst', 4)
+    assert result.info(9) == ('dummy.rst', None)  # out of range
+
+    # having rst_epilog
+    app.env.config.rst_prolog = None
+    app.env.config.rst_epilog = 'this is rst_epilog\ngood-bye reST!'
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == ['hello Sphinx world',
+                           'Sphinx is a document generator',
+                           '',
+                           'this is rst_epilog',
+                           'good-bye reST!']
+    assert result.info(0) == ('dummy.rst', 0)
+    assert result.info(1) == ('dummy.rst', 1)
+    assert result.info(2) == ('<generated>', 0)
+    assert result.info(3) == ('<rst_epilog>', 0)
+    assert result.info(4) == ('<rst_epilog>', 1)
+    assert result.info(5) == ('<rst_epilog>', None)  # out of range
diff --git a/tests/test_util.py b/tests/test_util.py
index 84ce44007..aae54eaf0 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -14,8 +14,7 @@ from mock import patch
 
 from sphinx.util import logging
 from sphinx.util import (
-    display_chunk, encode_uri, parselinenos, split_docinfo, status_iterator,
-    xmlname_checker
+    display_chunk, encode_uri, parselinenos, status_iterator, xmlname_checker
 )
 
 from sphinx.testing.util import strip_escseq
@@ -36,28 +35,6 @@ def test_encode_uri():
     assert expected, encode_uri(uri)
 
 
-def test_splitdocinfo():
-    source = "Hello world.\n"
-    docinfo, content = split_docinfo(source)
-    assert docinfo == ''
-    assert content == 'Hello world.\n'
-
-    source = ":orphan:\n\nHello world.\n"
-    docinfo, content = split_docinfo(source)
-    assert docinfo == ':orphan:\n'
-    assert content == '\nHello world.\n'
-
-    source = ":author: Georg Brandl\n:title: Manual of Sphinx\n\nHello world.\n"
-    docinfo, content = split_docinfo(source)
-    assert docinfo == ':author: Georg Brandl\n:title: Manual of Sphinx\n'
-    assert content == '\nHello world.\n'
-
-    source = ":multiline: one\n\ttwo\n\tthree\n\nHello world.\n"
-    docinfo, content = split_docinfo(source)
-    assert docinfo == ":multiline: one\n\ttwo\n\tthree\n"
-    assert content == '\nHello world.\n'
-
-
 def test_display_chunk():
     assert display_chunk('hello') == 'hello'
     assert display_chunk(['hello']) == 'hello'

From 6dae5db9af55807de1db4067203e57dabb6ed774 Mon Sep 17 00:00:00 2001
From: Takeshi KOMIYA <i.tkomiya@gmail.com>
Date: Wed, 13 Dec 2017 20:49:50 +0900
Subject: [PATCH 08/10] Fix SphinxRSTFileInput should expand tabs

---
 sphinx/io.py     |  7 ++++---
 tests/test_io.py | 11 +++++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/sphinx/io.py b/sphinx/io.py
index 056c763b1..5f34b74dd 100644
--- a/sphinx/io.py
+++ b/sphinx/io.py
@@ -14,7 +14,7 @@ import codecs
 from docutils.io import FileInput, NullOutput
 from docutils.core import Publisher
 from docutils.readers import standalone
-from docutils.statemachine import StringList
+from docutils.statemachine import StringList, string2lines
 from docutils.writers import UnfilteredWriter
 from six import text_type
 from typing import Any, Union  # NOQA
@@ -195,9 +195,10 @@ class SphinxRSTFileInput(SphinxBaseFileInput):
 
     def read(self):
         # type: () -> StringList
-        data = SphinxBaseFileInput.read(self)
+        inputstring = SphinxBaseFileInput.read(self)
+        lines = string2lines(inputstring, convert_whitespace=True)
         content = StringList()
-        for lineno, line in enumerate(data.splitlines()):
+        for lineno, line in enumerate(lines):
             content.append(line, self.source_path, lineno)
 
         if self.env.config.rst_prolog:
diff --git a/tests/test_io.py b/tests/test_io.py
index a017a2cc0..ecd4a1009 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -105,3 +105,14 @@ def test_SphinxRSTFileInput(app):
     assert result.info(3) == ('<rst_epilog>', 0)
     assert result.info(4) == ('<rst_epilog>', 1)
     assert result.info(5) == ('<rst_epilog>', None)  # out of range
+
+    # expandtabs / convert whitespaces
+    app.env.config.rst_prolog = None
+    app.env.config.rst_epilog = None
+    text = ('\thello Sphinx world\n'
+            '\v\fSphinx is a document generator')
+    source = SphinxRSTFileInput(app, app.env, source=StringIO(text),
+                                source_path='dummy.rst', encoding='utf-8')
+    result = source.read()
+    assert result.data == ['        hello Sphinx world',
+                           '  Sphinx is a document generator']

From 0dfa88ec4bd6d6443d6d63b8b1f1d87cdb308191 Mon Sep 17 00:00:00 2001
From: Takeshi KOMIYA <i.tkomiya@gmail.com>
Date: Thu, 14 Dec 2017 21:15:36 +0900
Subject: [PATCH 09/10] Fix mypy violation

---
 sphinx/parsers.py       | 1 +
 sphinx/util/docutils.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/sphinx/parsers.py b/sphinx/parsers.py
index 085e45070..5d19ae67c 100644
--- a/sphinx/parsers.py
+++ b/sphinx/parsers.py
@@ -20,6 +20,7 @@ from sphinx.transforms import SphinxSmartQuotes
 if False:
     # For type annotation
     from typing import Any, Dict, List, Type  # NOQA
+    from docutils import nodes  # NOQA
     from docutils.transforms import Transform  # NOQA
     from sphinx.application import Sphinx  # NOQA
 
diff --git a/sphinx/util/docutils.py b/sphinx/util/docutils.py
index d1b7ac431..00ea5919e 100644
--- a/sphinx/util/docutils.py
+++ b/sphinx/util/docutils.py
@@ -182,7 +182,7 @@ class LoggingReporter(Reporter):
         stream = WarningStream()
         Reporter.__init__(self, source, report_level, halt_level,
                           stream, debug, error_handler=error_handler)
-        self.source_and_line = None
+        self.source_and_line = None  # type: SphinxFileInput
 
     def set_source(self, source):
         # type: (SphinxFileInput) -> None

From 51580fabb4be6d596f10dbc5de4757e99c076628 Mon Sep 17 00:00:00 2001
From: Takeshi KOMIYA <i.tkomiya@gmail.com>
Date: Thu, 14 Dec 2017 21:15:36 +0900
Subject: [PATCH 10/10] Update docstrings

---
 sphinx/io.py      | 48 +++++++++++++++++++++++++++++++++++++++++------
 sphinx/parsers.py |  6 +++---
 2 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/sphinx/io.py b/sphinx/io.py
index 5f34b74dd..8a41069db 100644
--- a/sphinx/io.py
+++ b/sphinx/io.py
@@ -51,14 +51,20 @@ logger = logging.getLogger(__name__)
 
 class SphinxBaseReader(standalone.Reader):
     """
-    Add our source parsers
+    A base class of readers for Sphinx.
+
+    This replaces reporter by Sphinx's on generating document.
     """
+
     def get_transforms(self):
         # type: () -> List[Transform]
         return standalone.Reader.get_transforms(self) + self.transforms
 
     def new_document(self):
         # type: () -> nodes.document
+        """Creates a new document object which having a special reporter object good
+        for logging.
+        """
         document = standalone.Reader.new_document(self)
         reporter = document.reporter
         document.reporter = LoggingReporter.from_reporter(reporter)
@@ -68,7 +74,7 @@ class SphinxBaseReader(standalone.Reader):
 
 class SphinxStandaloneReader(SphinxBaseReader):
     """
-    Add our own transforms.
+    A basic document reader for Sphinx.
     """
     transforms = [ApplySourceWorkaround, ExtraTranslatableNodes, PreserveTranslatableMessages,
                   Locale, CitationReferences, DefaultSubstitutions, MoveModuleTargets,
@@ -79,10 +85,11 @@ class SphinxStandaloneReader(SphinxBaseReader):
 
 class SphinxI18nReader(SphinxBaseReader):
     """
-    Replacer for document.reporter.get_source_and_line method.
+    A document reader for i18n.
 
-    reST text lines for translation do not have the original source line number.
-    This class provides the correct line numbers when reporting.
+    This returns the source line number of original text as current source line number
+    to let users know where the error happened.
+    Because the translated texts are partial and they don't have correct line numbers.
     """
 
     lineno = None  # type: int
@@ -94,10 +101,14 @@ class SphinxI18nReader(SphinxBaseReader):
 
     def set_lineno_for_reporter(self, lineno):
         # type: (int) -> None
+        """Stores the source line number of original text."""
         self.lineno = lineno
 
     def new_document(self):
         # type: () -> nodes.document
+        """Creates a new document object which having a special reporter object for
+        translation.
+        """
         document = SphinxBaseReader.new_document(self)
         reporter = document.reporter
 
@@ -110,6 +121,8 @@ class SphinxI18nReader(SphinxBaseReader):
 
 
 class SphinxDummyWriter(UnfilteredWriter):
+    """Dummy writer module used for generating doctree."""
+
     supported = ('html',)  # needed to keep "meta" nodes
 
     def translate(self):
@@ -123,7 +136,11 @@ def SphinxDummySourceClass(source, *args, **kwargs):
 
 
 class SphinxBaseFileInput(FileInput):
-    """A base class of SphinxFileInput."""
+    """A base class of SphinxFileInput.
+
+    It supports to replace unknown Unicode characters to '?'. And it also emits
+    Sphinx events ``source-read`` on reading.
+    """
 
     def __init__(self, app, env, *args, **kwds):
         # type: (Sphinx, BuildEnvironment, Any, Any) -> None
@@ -144,6 +161,10 @@ class SphinxBaseFileInput(FileInput):
 
     def read(self):
         # type: () -> unicode
+        """Reads the contents from file.
+
+        After reading, it emits Sphinx event ``source-read``.
+        """
         data = FileInput.read(self)
 
         # emit source-read event
@@ -168,10 +189,25 @@ class SphinxBaseFileInput(FileInput):
 
 
 class SphinxFileInput(SphinxBaseFileInput):
+    """A basic FileInput for Sphinx."""
     pass
 
 
 class SphinxRSTFileInput(SphinxBaseFileInput):
+    """A reST FileInput for Sphinx.
+
+    This FileInput automatically prepends and appends text by :confval:`rst_prolog` and
+    :confval:`rst_epilog`.
+
+    .. important::
+
+       This FileInput uses an instance of ``StringList`` as a return value of ``read()``
+       method to indicate original source filename and line numbers after prepending and
+       appending.
+       For that reason, ``sphinx.parsers.RSTParser`` should be used with this to parse
+       a content correctly.
+    """
+
     def prepend_prolog(self, text, prolog):
         # type: (StringList, unicode) -> None
         docinfo = self.count_docinfo_lines(text)
diff --git a/sphinx/parsers.py b/sphinx/parsers.py
index 5d19ae67c..92bea9461 100644
--- a/sphinx/parsers.py
+++ b/sphinx/parsers.py
@@ -59,7 +59,7 @@ class Parser(docutils.parsers.Parser):
 
 
 class RSTParser(docutils.parsers.rst.Parser):
-    """A reST parser customized for Sphinx."""
+    """A reST parser for Sphinx."""
 
     def get_transforms(self):
         # type: () -> List[Type[Transform]]
@@ -73,8 +73,8 @@ class RSTParser(docutils.parsers.rst.Parser):
         # type: (Any, nodes.document) -> None
         """Parse text and generate a document tree.
 
-        This derived method accepts StringList as a inputstring parameter.
-        It enables to handle mixed contents (cf. rst_prolog) correctly.
+        This accepts StringList as an inputstring parameter.
+        It enables to handle mixed contents (cf. :confval:`rst_prolog`) correctly.
         """
         if isinstance(inputstring, StringList):
             self.setup_parse(inputstring, document)