Remove doctree cache from pickled BuildEnvironment (#12431)

This commit removes `_write_doc_doctree_cache` from the pickled `BuildEnvironment`,
greatly reducing the on-disk and parallel forking memory consumption,
and ensuring outdated doctrees are not used for re-builds.
This commit is contained in:
Chris Sewell 2024-06-17 12:36:20 +02:00 committed by GitHub
parent 1e77e6bc9a
commit 9c834ff8c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -184,11 +184,21 @@ class BuildEnvironment:
# docnames to re-read unconditionally on next build
self.reread_always: set[str] = set()
# docname -> pickled doctree
self._pickled_doctree_cache: dict[str, bytes] = {}
"""In-memory cache for reading pickled doctrees from disk.
docname -> pickled doctree
This cache is used in the ``get_doctree`` method to avoid reading the
doctree from disk multiple times.
"""
# docname -> doctree
self._write_doc_doctree_cache: dict[str, nodes.document] = {}
"""In-memory cache for unpickling doctrees from disk.
docname -> doctree
Items are added in ``Builder.write_doctree``, during the read phase,
then used only in the ``get_and_resolve_doctree`` method.
"""
# File metadata
# docname -> dict of metadata items
@ -265,10 +275,12 @@ class BuildEnvironment:
def __getstate__(self) -> dict:
"""Obtains serializable data for pickling."""
__dict__ = self.__dict__.copy()
__dict__.update(app=None, domains={}, events=None) # clear unpickable attributes
# ensure that upon restoring the state, the most recent pickled files
# clear unpickable attributes
__dict__.update(app=None, domains={}, events=None)
# clear in-memory doctree caches, to reduce memory consumption and
# ensure that, upon restoring the state, the most recent pickled files
# on the disk are used instead of those from a possibly outdated state
__dict__.update(_pickled_doctree_cache={})
__dict__.update(_pickled_doctree_cache={}, _write_doc_doctree_cache={})
return __dict__
def __setstate__(self, state: dict) -> None: