Make `searchindex.js deterministic (#11665)

This commit is contained in:
Pietro Albini 2023-09-16 05:36:51 +02:00 committed by GitHub
parent f0c25a0ada
commit 8e768e6c23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 40 additions and 1 deletions

View File

@ -18,6 +18,8 @@ Bugs fixed
* #11668: Raise a useful error when ``theme.conf`` is missing.
Patch by Vinay Sajip.
* #11622: Ensure that the order of keys in ``searchindex.js`` is deterministic.
Patch by Pietro Albini.
Testing
-------

View File

@ -162,7 +162,7 @@ class _JavaScriptIndex:
SUFFIX = ')'
def dumps(self, data: Any) -> str:
return self.PREFIX + json.dumps(data) + self.SUFFIX
return self.PREFIX + json.dumps(data, sort_keys=True) + self.SUFFIX
def loads(self, s: str) -> Any:
data = s[len(self.PREFIX):-len(self.SUFFIX)]

View File

@ -304,3 +304,40 @@ def test_parallel(app):
app.build()
index = load_searchindex(app.outdir / 'searchindex.js')
assert index['docnames'] == ['index', 'nosearch', 'tocitem']
@pytest.mark.sphinx(testroot='search')
def test_search_index_is_deterministic(app):
lists_not_to_sort = {
# Each element of .titles is related to the element of .docnames in the same position.
# The ordering is deterministic because .docnames is sorted.
'.titles',
# Each element of .filenames is related to the element of .docnames in the same position.
# The ordering is deterministic because .docnames is sorted.
'.filenames',
}
# In the search index, titles inside .alltitles are stored as a tuple of
# (document_idx, title_anchor). Tuples are represented as lists in JSON,
# but their contents must not be sorted. We cannot sort them anyway, as
# document_idx is an int and title_anchor is a str.
def is_title_tuple_type(item):
return len(item) == 2 and isinstance(item[0], int) and isinstance(item[1], str)
def assert_is_sorted(item, path):
err_path = path if path else '<root>'
if isinstance(item, dict):
assert list(item.keys()) == sorted(item.keys()), f'{err_path} is not sorted'
for key, value in item.items():
assert_is_sorted(value, f'{path}.{key}')
elif isinstance(item, list):
if not is_title_tuple_type(item) and path not in lists_not_to_sort:
assert item == sorted(item), f'{err_path} is not sorted'
for i, child in enumerate(item):
assert_is_sorted(child, f'{path}[{i}]')
app.builder.build_all()
index = load_searchindex(app.outdir / 'searchindex.js')
# Pretty print the index. Only shown by pytest on failure.
print(f'searchindex.js contents:\n\n{json.dumps(index, indent=2)}')
assert_is_sorted(index, '')