From 5cd0841e5f041f3ef03840fafac425654a48b40d Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 13 Jan 2013 19:46:34 +0100 Subject: [PATCH] builder: fix parallel build globals problems by splitting write_doc in two stages: write_doc() and write_doc_serialized(), the latter of which is not called in the parallel processes. This costs speedup, of course: from about 50% we are down to about 30% improvement on my 4-core machine. --- sphinx/builders/__init__.py | 39 +++++++++++++++++++++++++------------ sphinx/builders/html.py | 9 +++++++-- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py index f217c2c00..bbf7fdd3d 100644 --- a/sphinx/builders/__init__.py +++ b/sphinx/builders/__init__.py @@ -315,23 +315,24 @@ class Builder(object): for docname in self.status_iterator( docnames, 'writing output... ', darkgreen, len(docnames)): doctree = self.env.get_and_resolve_doctree(docname, self) + self.write_doc_serialized(docname, doctree) self.write_doc(docname, doctree) for warning in warnings: self.warn(*warning) def _write_parallel(self, docnames, warnings, nproc): - def write_process(docnames): + def write_process(docs): try: - for docname in docnames: - doctree = self.env.get_and_resolve_doctree(docname, self) + for docname, doctree in docs: self.write_doc(docname, doctree) - for warning in warnings: - self.warn(*warning) except KeyboardInterrupt: pass # do not print a traceback on Ctrl-C + finally: + for warning in warnings: + self.warn(*warning) - def process_thread(docnames): - p = multiprocessing.Process(target=write_process, args=(docnames,)) + def process_thread(docs): + p = multiprocessing.Process(target=write_process, args=(docs,)) p.start() p.join() semaphore.release() @@ -341,11 +342,14 @@ class Builder(object): # list of threads to join when waiting for completion threads = [] - # warm up caches/compile templates using the first docname - write_process([docnames[0]]) + # warm up caches/compile templates using the first document + firstname, docnames = docnames[0], docnames[1:] + doctree = self.env.get_and_resolve_doctree(firstname, self) + self.write_doc_serialized(firstname, doctree) + self.write_doc(firstname, doctree) + # for the rest, determine how many documents to write in one go docnames = docnames[1:] ndocs = len(docnames) - # determine how many documents to write in one go chunksize = min(ndocs // nproc, 10) nchunks, rest = divmod(ndocs, chunksize) if rest: @@ -355,9 +359,14 @@ class Builder(object): for docnames in self.status_iterator( chunks, 'writing output... ', darkgreen, len(chunks), lambda chk: '%s .. %s' % (chk[0], chk[-1])): - semaphore.acquire() + docs = [] + for docname in docnames: + doctree = self.env.get_and_resolve_doctree(docname, self) + self.write_doc_serialized(docname, doctree) + docs.append((docname, doctree)) # start a new thread to oversee the completion of this chunk - t = threading.Thread(target=process_thread, args=(docnames,)) + semaphore.acquire() + t = threading.Thread(target=process_thread, args=(docs,)) t.setDaemon(True) t.start() threads.append(t) @@ -373,6 +382,12 @@ class Builder(object): def write_doc(self, docname, doctree): raise NotImplementedError + def write_doc_serialized(self, docname, doctree): + """Handle parts of write_doc that must be called in the main process + if parallel build is active. + """ + pass + def finish(self): """Finish the building process. diff --git a/sphinx/builders/html.py b/sphinx/builders/html.py index 4c700c8ab..323763339 100644 --- a/sphinx/builders/html.py +++ b/sphinx/builders/html.py @@ -415,7 +415,6 @@ class StandaloneHTMLBuilder(Builder): self.secnumbers = self.env.toc_secnumbers.get(docname, {}) self.imgpath = relative_uri(self.get_target_uri(docname), '_images') - self.post_process_images(doctree) self.dlpath = relative_uri(self.get_target_uri(docname), '_downloads') self.current_docname = docname self.docwriter.write(doctree, destination) @@ -424,9 +423,15 @@ class StandaloneHTMLBuilder(Builder): metatags = self.docwriter.clean_meta ctx = self.get_doc_context(docname, body, metatags) - self.index_page(docname, doctree, ctx.get('title', '')) self.handle_page(docname, ctx, event_arg=doctree) + def write_doc_serialized(self, docname, doctree): + self.imgpath = relative_uri(self.get_target_uri(docname), '_images') + self.post_process_images(doctree) + title = self.env.longtitles.get(docname) + title = title and self.render_partial(title)['title'] or '' + self.index_page(docname, doctree, title) + def finish(self): self.info(bold('writing additional files...'), nonl=1)