merge with parallel repo

2025-02-25 18:55:22 -06:00 · 2013-03-29 13:01:21 +01:00
parent 00beefb6d7 59dc803aba
commit ba39edb5db
8 changed files with 130 additions and 12 deletions
--- a/2
+++ b/2
@@ -136,6 +136,8 @@ Features added
  - Speed up building the search index by caching the results of the word
    stemming routines.  Saves about 20 seconds when building the Python
    documentation.
+  - PR#108: Add experimental support for parallel building with a new
+    :option:`-j` option.

 Documentation
 -------------
--- a/doc/invocation.rst
+++ b/doc/invocation.rst
@@ -96,6 +96,15 @@ The :program:`sphinx-build` script has several options:
   the build directory; with this option you can select a different cache
   directory (the doctrees can be shared between all builders).

+.. option:: -j N
+
+   Distribute the build over *N* processes in parallel, to make building on
+   multiprocessor machines more effective.  Note that not all parts and not all
+   builders of Sphinx can be parallelized.
+
+   .. versionadded:: 1.2
+      This option should be considered *experimental*.
+
 .. option:: -c path

   Don't look for the :file:`conf.py` in the source directory, but use the given
--- a/sphinx/application.py
+++ b/sphinx/application.py
@@ -61,7 +61,8 @@ class Sphinx(object):

    def __init__(self, srcdir, confdir, outdir, doctreedir, buildername,
                 confoverrides=None, status=sys.stdout, warning=sys.stderr,
-                 freshenv=False, warningiserror=False, tags=None, verbosity=0):
+                 freshenv=False, warningiserror=False, tags=None, verbosity=0,
+                 parallel=0):
        self.verbosity = verbosity
        self.next_listener_id = 0
        self._extensions = {}
@@ -76,6 +77,8 @@ class Sphinx(object):
        self.outdir = outdir
        self.doctreedir = doctreedir

+        self.parallel = parallel
+
        if status is None:
            self._status = StringIO()
            self.quiet = True
--- a/sphinx/builders/init.py
+++ b/sphinx/builders/init.py
@@ -12,6 +12,12 @@
 import os
 from os import path

+try:
+    import multiprocessing
+    import threading
+except ImportError:
+    multiprocessing = threading = None
+
 from docutils import nodes

 from sphinx.util.osutil import SEP, relative_uri
@@ -33,6 +39,8 @@ class Builder(object):
    format = ''
    # doctree versioning method
    versioning_method = 'none'
+    # allow parallel write_doc() calls
+    allow_parallel = False

    def __init__(self, app):
        self.env = app.env
@@ -98,19 +106,21 @@ class Builder(object):
        """
        raise NotImplementedError

-    def old_status_iterator(self, iterable, summary, colorfunc=darkgreen):
+    def old_status_iterator(self, iterable, summary, colorfunc=darkgreen,
+                            stringify_func=str):
        l = 0
        for item in iterable:
            if l == 0:
                self.info(bold(summary), nonl=1)
                l = 1
-            self.info(colorfunc(item) + ' ', nonl=1)
+            self.info(colorfunc(stringify_func(item)) + ' ', nonl=1)
            yield item
        if l == 1:
            self.info()

    # new version with progress info
-    def status_iterator(self, iterable, summary, colorfunc=darkgreen, length=0):
+    def status_iterator(self, iterable, summary, colorfunc=darkgreen, length=0,
+                        stringify_func=str):
        if length == 0:
            for item in self.old_status_iterator(iterable, summary, colorfunc):
                yield item
@@ -120,7 +130,7 @@ class Builder(object):
        for item in iterable:
            l += 1
            s = '%s[%3d%%] %s' % (summary, 100*l/length,
-                                  colorfunc(item))
+                                  colorfunc(stringify_func(item)))
            if self.app.verbosity:
                s += '\n'
            else:
@@ -287,16 +297,87 @@ class Builder(object):
        self.prepare_writing(docnames)
        self.info('done')

-        # write target files
        warnings = []
        self.env.set_warnfunc(lambda *args: warnings.append(args))
+        # check for prerequisites to parallel build
+        # (parallel only works on POSIX, because the forking impl of
+        # multiprocessing is required)
+        if not (multiprocessing and
+                self.app.parallel > 1 and
+                self.allow_parallel and
+                os.name == 'posix'):
+            self._write_serial(sorted(docnames), warnings)
+        else:
+            # number of subprocesses is parallel-1 because the main process
+            # is busy loading doctrees and doing write_doc_serialized()
+            self._write_parallel(sorted(docnames), warnings,
+                                 nproc=self.app.parallel - 1)
+        self.env.set_warnfunc(self.warn)
+
+    def _write_serial(self, docnames, warnings):
        for docname in self.status_iterator(
-            sorted(docnames), 'writing output... ', darkgreen, len(docnames)):
+                docnames, 'writing output... ', darkgreen, len(docnames)):
            doctree = self.env.get_and_resolve_doctree(docname, self)
+            self.write_doc_serialized(docname, doctree)
            self.write_doc(docname, doctree)
        for warning in warnings:
            self.warn(*warning)
-        self.env.set_warnfunc(self.warn)
+
+    def _write_parallel(self, docnames, warnings, nproc):
+        def write_process(docs):
+            try:
+                for docname, doctree in docs:
+                    self.write_doc(docname, doctree)
+            except KeyboardInterrupt:
+                pass  # do not print a traceback on Ctrl-C
+            finally:
+                for warning in warnings:
+                    self.warn(*warning)
+
+        def process_thread(docs):
+            p = multiprocessing.Process(target=write_process, args=(docs,))
+            p.start()
+            p.join()
+            semaphore.release()
+
+        # allow only "nproc" worker processes at once
+        semaphore = threading.Semaphore(nproc)
+        # list of threads to join when waiting for completion
+        threads = []
+
+        # warm up caches/compile templates using the first document
+        firstname, docnames = docnames[0], docnames[1:]
+        doctree = self.env.get_and_resolve_doctree(firstname, self)
+        self.write_doc_serialized(firstname, doctree)
+        self.write_doc(firstname, doctree)
+        # for the rest, determine how many documents to write in one go
+        docnames = docnames[1:]
+        ndocs = len(docnames)
+        chunksize = min(ndocs // nproc, 10)
+        nchunks, rest = divmod(ndocs, chunksize)
+        if rest:
+            nchunks += 1
+        # partition documents in "chunks" that will be written by one Process
+        chunks = [docnames[i*chunksize:(i+1)*chunksize] for i in range(nchunks)]
+        for docnames in self.status_iterator(
+                chunks, 'writing output... ', darkgreen, len(chunks),
+                lambda chk: '%s .. %s' % (chk[0], chk[-1])):
+            docs = []
+            for docname in docnames:
+                doctree = self.env.get_and_resolve_doctree(docname, self)
+                self.write_doc_serialized(docname, doctree)
+                docs.append((docname, doctree))
+            # start a new thread to oversee the completion of this chunk
+            semaphore.acquire()
+            t = threading.Thread(target=process_thread, args=(docs,))
+            t.setDaemon(True)
+            t.start()
+            threads.append(t)
+
+        # make sure all threads have finished
+        self.info(bold('waiting for workers... '))#, nonl=True)
+        for t in threads:
+            t.join()

    def prepare_writing(self, docnames):
        raise NotImplementedError
@@ -304,6 +385,12 @@ class Builder(object):
    def write_doc(self, docname, doctree):
        raise NotImplementedError

+    def write_doc_serialized(self, docname, doctree):
+        """Handle parts of write_doc that must be called in the main process
+        if parallel build is active.
+        """
+        pass
+
    def finish(self):
        """Finish the building process.

--- a/sphinx/builders/html.py
+++ b/sphinx/builders/html.py
@@ -73,6 +73,7 @@ class StandaloneHTMLBuilder(Builder):
    name = 'html'
    format = 'html'
    copysource = True
+    allow_parallel = True
    out_suffix = '.html'
    link_suffix = '.html'  # defaults to matching out_suffix
    indexer_format = js_index
@@ -426,7 +427,6 @@ class StandaloneHTMLBuilder(Builder):

        self.secnumbers = self.env.toc_secnumbers.get(docname, {})
        self.imgpath = relative_uri(self.get_target_uri(docname), '_images')
-        self.post_process_images(doctree)
        self.dlpath = relative_uri(self.get_target_uri(docname), '_downloads')
        self.current_docname = docname
        self.docwriter.write(doctree, destination)
@@ -435,9 +435,15 @@ class StandaloneHTMLBuilder(Builder):
        metatags = self.docwriter.clean_meta

        ctx = self.get_doc_context(docname, body, metatags)
-        self.index_page(docname, doctree, ctx.get('title', ''))
        self.handle_page(docname, ctx, event_arg=doctree)

+    def write_doc_serialized(self, docname, doctree):
+        self.imgpath = relative_uri(self.get_target_uri(docname), '_images')
+        self.post_process_images(doctree)
+        title = self.env.longtitles.get(docname)
+        title = title and self.render_partial(title)['title'] or ''
+        self.index_page(docname, doctree, title)
+
    def finish(self):
        self.info(bold('writing additional files...'), nonl=1)

--- a/sphinx/builders/text.py
+++ b/sphinx/builders/text.py
@@ -23,6 +23,7 @@ class TextBuilder(Builder):
    name = 'text'
    format = 'text'
    out_suffix = '.txt'
+    allow_parallel = True

    def init(self):
        pass
--- a/sphinx/builders/xml.py
+++ b/sphinx/builders/xml.py
@@ -26,6 +26,7 @@ class XMLBuilder(Builder):
    name = 'xml'
    format = 'xml'
    out_suffix = '.xml'
+    allow_parallel = True

    _writer_class = XMLWriter

--- a/sphinx/cmdline.py
+++ b/sphinx/cmdline.py
@@ -48,6 +48,7 @@ General options
 -E            don't use a saved environment, always read all files
 -d <path>     path for the cached environment and doctree files
                (default: outdir/.doctrees)
+-j <N>        build in parallel with N processes where possible

 Build configuration options
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -89,7 +90,7 @@ def main(argv):
        nocolor()

    try:
-        opts, args = getopt.getopt(argv[1:], 'ab:t:d:c:CD:A:nNEqQWw:PThv',
+        opts, args = getopt.getopt(argv[1:], 'ab:t:d:c:CD:A:nNEqQWw:PThvj:',
                                   ['help', 'version'])
        allopts = set(opt[0] for opt in opts)
        if '-h' in allopts or '--help' in allopts:
@@ -139,6 +140,7 @@ def main(argv):
    force_all = freshenv = warningiserror = use_pdb = False
    show_traceback = False
    verbosity = 0
+    parallel = 0
    status = sys.stdout
    warning = sys.stderr
    error = sys.stderr
@@ -220,6 +222,13 @@ def main(argv):
        elif opt == '-v':
            verbosity += 1
            show_traceback = True
+        elif opt == '-j':
+            try:
+                parallel = int(val)
+            except ValueError:
+                print >>sys.stderr, ('Error: -j option argument must be an '
+                                     'integer.')
+                return 1

    if warning and warnfile:
        warnfp = open(warnfile, 'w')
@@ -234,7 +243,7 @@ def main(argv):
    try:
        app = Sphinx(srcdir, confdir, outdir, doctreedir, buildername,
                     confoverrides, status, warning, freshenv,
-                     warningiserror, tags, verbosity)
+                     warningiserror, tags, verbosity, parallel)
        app.build(force_all, filenames)
        return app.statuscode
    except (Exception, KeyboardInterrupt), err: