merge with parallel repo

This commit is contained in:
Georg Brandl
2013-03-29 13:01:21 +01:00
8 changed files with 130 additions and 12 deletions

View File

@@ -136,6 +136,8 @@ Features added
- Speed up building the search index by caching the results of the word
stemming routines. Saves about 20 seconds when building the Python
documentation.
- PR#108: Add experimental support for parallel building with a new
:option:`-j` option.
Documentation
-------------

View File

@@ -96,6 +96,15 @@ The :program:`sphinx-build` script has several options:
the build directory; with this option you can select a different cache
directory (the doctrees can be shared between all builders).
.. option:: -j N
Distribute the build over *N* processes in parallel, to make building on
multiprocessor machines more effective. Note that not all parts and not all
builders of Sphinx can be parallelized.
.. versionadded:: 1.2
This option should be considered *experimental*.
.. option:: -c path
Don't look for the :file:`conf.py` in the source directory, but use the given

View File

@@ -61,7 +61,8 @@ class Sphinx(object):
def __init__(self, srcdir, confdir, outdir, doctreedir, buildername,
confoverrides=None, status=sys.stdout, warning=sys.stderr,
freshenv=False, warningiserror=False, tags=None, verbosity=0):
freshenv=False, warningiserror=False, tags=None, verbosity=0,
parallel=0):
self.verbosity = verbosity
self.next_listener_id = 0
self._extensions = {}
@@ -76,6 +77,8 @@ class Sphinx(object):
self.outdir = outdir
self.doctreedir = doctreedir
self.parallel = parallel
if status is None:
self._status = StringIO()
self.quiet = True

View File

@@ -12,6 +12,12 @@
import os
from os import path
try:
import multiprocessing
import threading
except ImportError:
multiprocessing = threading = None
from docutils import nodes
from sphinx.util.osutil import SEP, relative_uri
@@ -33,6 +39,8 @@ class Builder(object):
format = ''
# doctree versioning method
versioning_method = 'none'
# allow parallel write_doc() calls
allow_parallel = False
def __init__(self, app):
self.env = app.env
@@ -98,19 +106,21 @@ class Builder(object):
"""
raise NotImplementedError
def old_status_iterator(self, iterable, summary, colorfunc=darkgreen):
def old_status_iterator(self, iterable, summary, colorfunc=darkgreen,
stringify_func=str):
l = 0
for item in iterable:
if l == 0:
self.info(bold(summary), nonl=1)
l = 1
self.info(colorfunc(item) + ' ', nonl=1)
self.info(colorfunc(stringify_func(item)) + ' ', nonl=1)
yield item
if l == 1:
self.info()
# new version with progress info
def status_iterator(self, iterable, summary, colorfunc=darkgreen, length=0):
def status_iterator(self, iterable, summary, colorfunc=darkgreen, length=0,
stringify_func=str):
if length == 0:
for item in self.old_status_iterator(iterable, summary, colorfunc):
yield item
@@ -120,7 +130,7 @@ class Builder(object):
for item in iterable:
l += 1
s = '%s[%3d%%] %s' % (summary, 100*l/length,
colorfunc(item))
colorfunc(stringify_func(item)))
if self.app.verbosity:
s += '\n'
else:
@@ -287,16 +297,87 @@ class Builder(object):
self.prepare_writing(docnames)
self.info('done')
# write target files
warnings = []
self.env.set_warnfunc(lambda *args: warnings.append(args))
# check for prerequisites to parallel build
# (parallel only works on POSIX, because the forking impl of
# multiprocessing is required)
if not (multiprocessing and
self.app.parallel > 1 and
self.allow_parallel and
os.name == 'posix'):
self._write_serial(sorted(docnames), warnings)
else:
# number of subprocesses is parallel-1 because the main process
# is busy loading doctrees and doing write_doc_serialized()
self._write_parallel(sorted(docnames), warnings,
nproc=self.app.parallel - 1)
self.env.set_warnfunc(self.warn)
def _write_serial(self, docnames, warnings):
for docname in self.status_iterator(
sorted(docnames), 'writing output... ', darkgreen, len(docnames)):
docnames, 'writing output... ', darkgreen, len(docnames)):
doctree = self.env.get_and_resolve_doctree(docname, self)
self.write_doc_serialized(docname, doctree)
self.write_doc(docname, doctree)
for warning in warnings:
self.warn(*warning)
self.env.set_warnfunc(self.warn)
def _write_parallel(self, docnames, warnings, nproc):
def write_process(docs):
try:
for docname, doctree in docs:
self.write_doc(docname, doctree)
except KeyboardInterrupt:
pass # do not print a traceback on Ctrl-C
finally:
for warning in warnings:
self.warn(*warning)
def process_thread(docs):
p = multiprocessing.Process(target=write_process, args=(docs,))
p.start()
p.join()
semaphore.release()
# allow only "nproc" worker processes at once
semaphore = threading.Semaphore(nproc)
# list of threads to join when waiting for completion
threads = []
# warm up caches/compile templates using the first document
firstname, docnames = docnames[0], docnames[1:]
doctree = self.env.get_and_resolve_doctree(firstname, self)
self.write_doc_serialized(firstname, doctree)
self.write_doc(firstname, doctree)
# for the rest, determine how many documents to write in one go
docnames = docnames[1:]
ndocs = len(docnames)
chunksize = min(ndocs // nproc, 10)
nchunks, rest = divmod(ndocs, chunksize)
if rest:
nchunks += 1
# partition documents in "chunks" that will be written by one Process
chunks = [docnames[i*chunksize:(i+1)*chunksize] for i in range(nchunks)]
for docnames in self.status_iterator(
chunks, 'writing output... ', darkgreen, len(chunks),
lambda chk: '%s .. %s' % (chk[0], chk[-1])):
docs = []
for docname in docnames:
doctree = self.env.get_and_resolve_doctree(docname, self)
self.write_doc_serialized(docname, doctree)
docs.append((docname, doctree))
# start a new thread to oversee the completion of this chunk
semaphore.acquire()
t = threading.Thread(target=process_thread, args=(docs,))
t.setDaemon(True)
t.start()
threads.append(t)
# make sure all threads have finished
self.info(bold('waiting for workers... '))#, nonl=True)
for t in threads:
t.join()
def prepare_writing(self, docnames):
raise NotImplementedError
@@ -304,6 +385,12 @@ class Builder(object):
def write_doc(self, docname, doctree):
raise NotImplementedError
def write_doc_serialized(self, docname, doctree):
"""Handle parts of write_doc that must be called in the main process
if parallel build is active.
"""
pass
def finish(self):
"""Finish the building process.

View File

@@ -73,6 +73,7 @@ class StandaloneHTMLBuilder(Builder):
name = 'html'
format = 'html'
copysource = True
allow_parallel = True
out_suffix = '.html'
link_suffix = '.html' # defaults to matching out_suffix
indexer_format = js_index
@@ -426,7 +427,6 @@ class StandaloneHTMLBuilder(Builder):
self.secnumbers = self.env.toc_secnumbers.get(docname, {})
self.imgpath = relative_uri(self.get_target_uri(docname), '_images')
self.post_process_images(doctree)
self.dlpath = relative_uri(self.get_target_uri(docname), '_downloads')
self.current_docname = docname
self.docwriter.write(doctree, destination)
@@ -435,9 +435,15 @@ class StandaloneHTMLBuilder(Builder):
metatags = self.docwriter.clean_meta
ctx = self.get_doc_context(docname, body, metatags)
self.index_page(docname, doctree, ctx.get('title', ''))
self.handle_page(docname, ctx, event_arg=doctree)
def write_doc_serialized(self, docname, doctree):
self.imgpath = relative_uri(self.get_target_uri(docname), '_images')
self.post_process_images(doctree)
title = self.env.longtitles.get(docname)
title = title and self.render_partial(title)['title'] or ''
self.index_page(docname, doctree, title)
def finish(self):
self.info(bold('writing additional files...'), nonl=1)

View File

@@ -23,6 +23,7 @@ class TextBuilder(Builder):
name = 'text'
format = 'text'
out_suffix = '.txt'
allow_parallel = True
def init(self):
pass

View File

@@ -26,6 +26,7 @@ class XMLBuilder(Builder):
name = 'xml'
format = 'xml'
out_suffix = '.xml'
allow_parallel = True
_writer_class = XMLWriter

View File

@@ -48,6 +48,7 @@ General options
-E don't use a saved environment, always read all files
-d <path> path for the cached environment and doctree files
(default: outdir/.doctrees)
-j <N> build in parallel with N processes where possible
Build configuration options
^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -89,7 +90,7 @@ def main(argv):
nocolor()
try:
opts, args = getopt.getopt(argv[1:], 'ab:t:d:c:CD:A:nNEqQWw:PThv',
opts, args = getopt.getopt(argv[1:], 'ab:t:d:c:CD:A:nNEqQWw:PThvj:',
['help', 'version'])
allopts = set(opt[0] for opt in opts)
if '-h' in allopts or '--help' in allopts:
@@ -139,6 +140,7 @@ def main(argv):
force_all = freshenv = warningiserror = use_pdb = False
show_traceback = False
verbosity = 0
parallel = 0
status = sys.stdout
warning = sys.stderr
error = sys.stderr
@@ -220,6 +222,13 @@ def main(argv):
elif opt == '-v':
verbosity += 1
show_traceback = True
elif opt == '-j':
try:
parallel = int(val)
except ValueError:
print >>sys.stderr, ('Error: -j option argument must be an '
'integer.')
return 1
if warning and warnfile:
warnfp = open(warnfile, 'w')
@@ -234,7 +243,7 @@ def main(argv):
try:
app = Sphinx(srcdir, confdir, outdir, doctreedir, buildername,
confoverrides, status, warning, freshenv,
warningiserror, tags, verbosity)
warningiserror, tags, verbosity, parallel)
app.build(force_all, filenames)
return app.statuscode
except (Exception, KeyboardInterrupt), err: