Remove non-determinism

To enable packages using Sphinx to build reproducibly, its output
needs to be the same from one build to another.

Its output now strips memory references such as:

  <__main__.A at 0x7f68cb685710>

In addition, various generated files (objects.inv, searchindex.js,
translations) are now written with their keys in a determinstic order.

Based on a patch by Chris Lamb <lamby@debian.org>.
This commit is contained in:
Dmitry Shachnev 2015-01-28 19:28:53 +03:00
parent 81ffb36772
commit d24bd73d0c
5 changed files with 32 additions and 17 deletions

View File

@ -17,7 +17,7 @@ import posixpath
from os import path
from hashlib import md5
from six import iteritems, itervalues, text_type, string_types
from six import iteritems, text_type, string_types
from six.moves import cPickle as pickle
from docutils import nodes
from docutils.io import DocTreeInput, StringOutput
@ -268,7 +268,8 @@ class StandaloneHTMLBuilder(Builder):
# html_domain_indices can be False/True or a list of index names
indices_config = self.config.html_domain_indices
if indices_config:
for domain in itervalues(self.env.domains):
for domain_name in sorted(self.env.domains):
domain = self.env.domains[domain_name]
for indexcls in domain.indices:
indexname = '%s-%s' % (domain.name, indexcls.name)
if isinstance(indices_config, list):
@ -817,7 +818,7 @@ class StandaloneHTMLBuilder(Builder):
compressor = zlib.compressobj(9)
for domainname, domain in iteritems(self.env.domains):
for name, dispname, type, docname, anchor, prio in \
domain.get_objects():
sorted(domain.get_objects()):
if anchor.endswith(name):
# this can shorten the inventory by as much as 25%
anchor = anchor[:-len(name)] + '$'

View File

@ -30,7 +30,7 @@ from sphinx.application import ExtensionError
from sphinx.util.nodes import nested_parse_with_titles
from sphinx.util.compat import Directive
from sphinx.util.inspect import getargspec, isdescriptor, safe_getmembers, \
safe_getattr, safe_repr, is_builtin_class_method
safe_getattr, object_description, is_builtin_class_method
from sphinx.util.docstrings import prepare_docstring
@ -243,6 +243,11 @@ def between(marker, what=None, keepempty=False, exclude=False):
return process
def formatargspec(*argspec):
return inspect.formatargspec(*argspec,
formatvalue=lambda x: '=' + object_description(x))
class Documenter(object):
"""
A Documenter knows how to autodocument a single object type. When
@ -1054,7 +1059,7 @@ class FunctionDocumenter(DocstringSignatureMixin, ModuleLevelDocumenter):
argspec = getargspec(self.object.__init__)
if argspec[0]:
del argspec[0][0]
args = inspect.formatargspec(*argspec)
args = formatargspec(*argspec)
# escape backslashes for reST
args = args.replace('\\', '\\\\')
return args
@ -1109,7 +1114,7 @@ class ClassDocumenter(DocstringSignatureMixin, ModuleLevelDocumenter):
return None
if argspec[0] and argspec[0][0] in ('cls', 'self'):
del argspec[0][0]
return inspect.formatargspec(*argspec)
return formatargspec(*argspec)
def format_signature(self):
if self.doc_as_attr:
@ -1220,7 +1225,7 @@ class DataDocumenter(ModuleLevelDocumenter):
sourcename = self.get_sourcename()
if not self.options.annotation:
try:
objrepr = safe_repr(self.object)
objrepr = object_description(self.object)
except ValueError:
pass
else:
@ -1276,7 +1281,7 @@ class MethodDocumenter(DocstringSignatureMixin, ClassLevelDocumenter):
argspec = getargspec(self.object)
if argspec[0] and argspec[0][0] in ('cls', 'self'):
del argspec[0][0]
args = inspect.formatargspec(*argspec)
args = formatargspec(*argspec)
# escape backslashes for reST
args = args.replace('\\', '\\\\')
return args
@ -1333,7 +1338,7 @@ class AttributeDocumenter(DocstringStripSignatureMixin, ClassLevelDocumenter):
if not self.options.annotation:
if not self._datadescriptor:
try:
objrepr = safe_repr(self.object)
objrepr = object_description(self.object)
except ValueError:
pass
else:

View File

@ -313,13 +313,13 @@ class IndexBuilder(object):
if fn in fn2index:
rv[k] = fn2index[fn]
else:
rv[k] = [fn2index[fn] for fn in v if fn in fn2index]
rv[k] = sorted([fn2index[fn] for fn in v if fn in fn2index])
return rvs
def freeze(self):
"""Create a usable data structure for serializing."""
filenames = list(self._titles.keys())
titles = list(self._titles.values())
filenames = sorted(self._titles.keys())
titles = sorted(self._titles.values())
fn2index = dict((f, i) for (i, f) in enumerate(filenames))
terms, title_terms = self.get_terms(fn2index)

View File

@ -9,6 +9,8 @@
:license: BSD, see LICENSE for details.
"""
import re
# this imports the standard library inspect module without resorting to
# relatively import this module
inspect = __import__('inspect')
@ -18,6 +20,8 @@ from six.moves import builtins
from sphinx.util import force_decode
memory_address_re = re.compile(r' at 0x[0-9a-f]{8,16}(?=>$)')
if PY3:
from functools import partial
@ -123,14 +127,17 @@ def safe_getmembers(object, predicate=None, attr_getter=safe_getattr):
return results
def safe_repr(object):
def object_description(object):
"""A repr() implementation that returns text safe to use in reST context."""
try:
s = repr(object)
except Exception:
raise ValueError
if isinstance(s, binary_type):
return force_decode(s, None).replace('\n', ' ')
s = force_decode(s, None)
# Strip non-deterministic memory addresses such as
# ``<__main__.A at 0x7f68cb685710>``
s = memory_address_re.sub('', s)
return s.replace('\n', ' ')

View File

@ -89,11 +89,13 @@ def dumps(obj, key=False):
elif isinstance(obj, integer_types + (float,)):
return str(obj)
elif isinstance(obj, dict):
return '{%s}' % ','.join('%s:%s' % (
return '{%s}' % ','.join(sorted('%s:%s' % (
dumps(key, True),
dumps(value)
) for key, value in iteritems(obj))
elif isinstance(obj, (tuple, list, set)):
) for key, value in iteritems(obj)))
elif isinstance(obj, set):
return '[%s]' % ','.join(sorted(dumps(x) for x in obj))
elif isinstance(obj, (tuple, list)):
return '[%s]' % ','.join(dumps(x) for x in obj)
elif isinstance(obj, string_types):
return encode_string(obj)