mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
intersphinx: Fix cache handling (#12087)
Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com> Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import concurrent.futures
|
||||
import functools
|
||||
import posixpath
|
||||
import time
|
||||
from operator import itemgetter
|
||||
from os import path
|
||||
from typing import TYPE_CHECKING
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
@@ -138,36 +139,41 @@ def load_mappings(app: Sphinx) -> None:
|
||||
intersphinx_cache: dict[InventoryURI, InventoryCacheEntry] = inventories.cache
|
||||
intersphinx_mapping: IntersphinxMapping = app.config.intersphinx_mapping
|
||||
|
||||
expected_uris = {uri for _name, (uri, _invs) in intersphinx_mapping.values()}
|
||||
|
||||
for uri in frozenset(intersphinx_cache):
|
||||
if intersphinx_cache[uri][0] not in intersphinx_mapping:
|
||||
# Remove all cached entries that are no longer in `intersphinx_mapping`.
|
||||
del intersphinx_cache[uri]
|
||||
elif uri not in expected_uris:
|
||||
# Remove cached entries with a different target URI
|
||||
# than the one in `intersphinx_mapping`.
|
||||
# This happens when the URI in `intersphinx_mapping` is changed.
|
||||
del intersphinx_cache[uri]
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as pool:
|
||||
futures = []
|
||||
for name, (uri, invs) in intersphinx_mapping.values():
|
||||
futures.append(pool.submit(
|
||||
fetch_inventory_group, name, uri, invs, intersphinx_cache, app, now,
|
||||
))
|
||||
futures = [
|
||||
pool.submit(fetch_inventory_group, name, uri, invs, intersphinx_cache, app, now)
|
||||
for name, (uri, invs) in app.config.intersphinx_mapping.values()
|
||||
]
|
||||
updated = [f.result() for f in concurrent.futures.as_completed(futures)]
|
||||
|
||||
if any(updated):
|
||||
# clear the local inventories
|
||||
inventories.clear()
|
||||
|
||||
# Duplicate values in different inventories will shadow each
|
||||
# other; which one will override which can vary between builds
|
||||
# since they are specified using an unordered dict. To make
|
||||
# it more consistent, we sort the named inventories and then
|
||||
# add the unnamed inventories last. This means that the
|
||||
# unnamed inventories will shadow the named ones but the named
|
||||
# ones can still be accessed when the name is specified.
|
||||
named_vals = []
|
||||
unnamed_vals = []
|
||||
for name, _expiry, invdata in intersphinx_cache.values():
|
||||
if name:
|
||||
named_vals.append((name, invdata))
|
||||
else:
|
||||
unnamed_vals.append((name, invdata))
|
||||
for name, invdata in sorted(named_vals) + unnamed_vals:
|
||||
if name:
|
||||
inventories.named_inventory[name] = invdata
|
||||
for type, objects in invdata.items():
|
||||
inventories.main_inventory.setdefault(type, {}).update(objects)
|
||||
# other; which one will override which can vary between builds.
|
||||
#
|
||||
# In an attempt to make this more consistent,
|
||||
# we sort the named inventories in the cache
|
||||
# by their name and expiry time ``(NAME, EXPIRY)``.
|
||||
by_name_and_time = itemgetter(0, 1) # 0: name, 1: expiry
|
||||
cache_values = sorted(intersphinx_cache.values(), key=by_name_and_time)
|
||||
for name, _expiry, invdata in cache_values:
|
||||
inventories.named_inventory[name] = invdata
|
||||
for objtype, objects in invdata.items():
|
||||
inventories.main_inventory.setdefault(objtype, {}).update(objects)
|
||||
|
||||
|
||||
def fetch_inventory_group(
|
||||
@@ -179,39 +185,43 @@ def fetch_inventory_group(
|
||||
now: int,
|
||||
) -> bool:
|
||||
cache_time = now - app.config.intersphinx_cache_limit * 86400
|
||||
|
||||
updated = False
|
||||
failures = []
|
||||
try:
|
||||
for inv in invs:
|
||||
if not inv:
|
||||
inv = posixpath.join(uri, INVENTORY_FILENAME)
|
||||
# decide whether the inventory must be read: always read local
|
||||
# files; remote ones only if the cache time is expired
|
||||
if '://' not in inv or uri not in cache or cache[uri][1] < cache_time:
|
||||
safe_inv_url = _get_safe_url(inv)
|
||||
inv_descriptor = name or 'main_inventory'
|
||||
LOGGER.info(__("loading intersphinx inventory '%s' from %s..."),
|
||||
inv_descriptor, safe_inv_url)
|
||||
try:
|
||||
invdata = fetch_inventory(app, uri, inv)
|
||||
except Exception as err:
|
||||
failures.append(err.args)
|
||||
continue
|
||||
if invdata:
|
||||
cache[uri] = name, now, invdata
|
||||
return True
|
||||
return False
|
||||
finally:
|
||||
if not failures:
|
||||
pass
|
||||
elif len(failures) < len(invs):
|
||||
LOGGER.info(__('encountered some issues with some of the inventories,'
|
||||
' but they had working alternatives:'))
|
||||
for fail in failures:
|
||||
LOGGER.info(*fail)
|
||||
else:
|
||||
issues = '\n'.join(f[0] % f[1:] for f in failures)
|
||||
LOGGER.warning(__('failed to reach any of the inventories '
|
||||
'with the following issues:') + '\n' + issues)
|
||||
|
||||
for location in invs:
|
||||
# location is either None or a non-empty string
|
||||
inv = f'{uri}/{INVENTORY_FILENAME}' if location is None else location
|
||||
|
||||
# decide whether the inventory must be read: always read local
|
||||
# files; remote ones only if the cache time is expired
|
||||
if '://' not in inv or uri not in cache or cache[uri][1] < cache_time:
|
||||
LOGGER.info(__("loading intersphinx inventory '%s' from %s ..."),
|
||||
name, _get_safe_url(inv))
|
||||
|
||||
try:
|
||||
invdata = fetch_inventory(app, uri, inv)
|
||||
except Exception as err:
|
||||
failures.append(err.args)
|
||||
continue
|
||||
|
||||
if invdata:
|
||||
cache[uri] = name, now, invdata
|
||||
updated = True
|
||||
break
|
||||
|
||||
if not failures:
|
||||
pass
|
||||
elif len(failures) < len(invs):
|
||||
LOGGER.info(__('encountered some issues with some of the inventories,'
|
||||
' but they had working alternatives:'))
|
||||
for fail in failures:
|
||||
LOGGER.info(*fail)
|
||||
else:
|
||||
issues = '\n'.join(f[0] % f[1:] for f in failures)
|
||||
LOGGER.warning(__('failed to reach any of the inventories '
|
||||
'with the following issues:') + '\n' + issues)
|
||||
return updated
|
||||
|
||||
|
||||
def fetch_inventory(app: Sphinx, uri: InventoryURI, inv: str) -> Inventory:
|
||||
|
||||
298
tests/test_extensions/test_ext_intersphinx_cache.py
Normal file
298
tests/test_extensions/test_ext_intersphinx_cache.py
Normal file
@@ -0,0 +1,298 @@
|
||||
"""Test the intersphinx extension."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import posixpath
|
||||
import re
|
||||
import zlib
|
||||
from http.server import BaseHTTPRequestHandler
|
||||
from io import BytesIO
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from sphinx.ext.intersphinx import InventoryAdapter
|
||||
from sphinx.testing.util import SphinxTestApp
|
||||
|
||||
from tests.utils import http_server
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
from typing import BinaryIO
|
||||
|
||||
from sphinx.util.typing import InventoryItem
|
||||
|
||||
BASE_CONFIG = {
|
||||
'extensions': ['sphinx.ext.intersphinx'],
|
||||
'intersphinx_timeout': 0.1,
|
||||
}
|
||||
|
||||
|
||||
class InventoryEntry:
|
||||
"""Entry in the Intersphinx inventory."""
|
||||
|
||||
__slots__ = (
|
||||
'name', 'display_name', 'domain_name',
|
||||
'object_type', 'uri', 'anchor', 'priority',
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str = 'this',
|
||||
*,
|
||||
display_name: str | None = None,
|
||||
domain_name: str = 'py',
|
||||
object_type: str = 'obj',
|
||||
uri: str = 'index.html',
|
||||
anchor: str = '',
|
||||
priority: int = 0,
|
||||
):
|
||||
if anchor.endswith(name):
|
||||
anchor = anchor[:-len(name)] + '$'
|
||||
|
||||
if anchor:
|
||||
uri += '#' + anchor
|
||||
|
||||
if display_name is None or display_name == name:
|
||||
display_name = '-'
|
||||
|
||||
self.name = name
|
||||
self.display_name = display_name
|
||||
self.domain_name = domain_name
|
||||
self.object_type = object_type
|
||||
self.uri = uri
|
||||
self.anchor = anchor
|
||||
self.priority = priority
|
||||
|
||||
def format(self) -> str:
|
||||
"""Format the entry as it appears in the inventory file."""
|
||||
return (f'{self.name} {self.domain_name}:{self.object_type} '
|
||||
f'{self.priority} {self.uri} {self.display_name}\n')
|
||||
|
||||
|
||||
class IntersphinxProject:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
name: str = 'spam',
|
||||
version: str | int = 1,
|
||||
baseurl: str = '',
|
||||
baseuri: str = '',
|
||||
file: str | None = None,
|
||||
) -> None:
|
||||
#: The project name.
|
||||
self.name = name
|
||||
#: The escaped project name.
|
||||
self.safe_name = re.sub(r'\\s+', ' ', name)
|
||||
|
||||
#: The project version as a string.
|
||||
self.version = version = str(version)
|
||||
#: The escaped project version.
|
||||
self.safe_version = re.sub(r'\\s+', ' ', version)
|
||||
|
||||
#: The project base URL (e.g., http://localhost:9341).
|
||||
self.baseurl = baseurl
|
||||
#: The project base URI, relative to *baseurl* (e.g., 'spam').
|
||||
self.uri = baseuri
|
||||
#: The project URL, as specified in :confval:`intersphinx_mapping`.
|
||||
self.url = posixpath.join(baseurl, baseuri)
|
||||
#: The project local file, if any.
|
||||
self.file = file
|
||||
|
||||
@property
|
||||
def record(self) -> dict[str, tuple[str | None, str | None]]:
|
||||
"""The :confval:`intersphinx_mapping` record for this project."""
|
||||
return {self.name: (self.url, self.file)}
|
||||
|
||||
def normalise(self, entry: InventoryEntry) -> tuple[str, InventoryItem]:
|
||||
"""Format an inventory entry as if it were part of this project."""
|
||||
url = posixpath.join(self.url, entry.uri)
|
||||
return entry.name, (self.safe_name, self.safe_version, url, entry.display_name)
|
||||
|
||||
|
||||
class FakeInventory:
|
||||
protocol_version: int
|
||||
|
||||
def __init__(self, project: IntersphinxProject | None = None) -> None:
|
||||
self.project = project or IntersphinxProject()
|
||||
|
||||
def serialise(self, entries: Iterable[InventoryEntry] | None = None) -> bytes:
|
||||
buffer = BytesIO()
|
||||
self._write_headers(buffer)
|
||||
entries = entries or [InventoryEntry()]
|
||||
self._write_body(buffer, (item.format().encode() for item in entries))
|
||||
return buffer.getvalue()
|
||||
|
||||
def _write_headers(self, buffer: BinaryIO) -> None:
|
||||
buffer.write((f'# Sphinx inventory version {self.protocol_version}\n'
|
||||
f'# Project: {self.project.safe_name}\n'
|
||||
f'# Version: {self.project.safe_version}\n').encode())
|
||||
|
||||
def _write_body(self, buffer: BinaryIO, lines: Iterable[bytes]) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class FakeInventoryV2(FakeInventory):
|
||||
protocol_version = 2
|
||||
|
||||
def _write_headers(self, buffer: BinaryIO) -> None:
|
||||
super()._write_headers(buffer)
|
||||
buffer.write(b'# The remainder of this file is compressed using zlib.\n')
|
||||
|
||||
def _write_body(self, buffer: BinaryIO, lines: Iterable[bytes]) -> None:
|
||||
compressor = zlib.compressobj(9)
|
||||
buffer.writelines(map(compressor.compress, lines))
|
||||
buffer.write(compressor.flush())
|
||||
|
||||
|
||||
class SingleEntryProject(IntersphinxProject):
|
||||
name = 'spam'
|
||||
port = 9341 # needed since otherwise it's an automatic port
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
version: int,
|
||||
route: str,
|
||||
*,
|
||||
item_name: str = 'ham',
|
||||
domain_name: str = 'py',
|
||||
object_type: str = 'module'
|
||||
) -> None:
|
||||
super().__init__(
|
||||
name=self.name,
|
||||
version=version,
|
||||
baseurl=f'http://localhost:{self.port}',
|
||||
baseuri=route,
|
||||
)
|
||||
self.item_name = item_name
|
||||
self.domain_name = domain_name
|
||||
self.object_type = object_type
|
||||
self.reftype = f'{domain_name}:{object_type}'
|
||||
|
||||
def make_entry(self) -> InventoryEntry:
|
||||
"""Get an inventory entry for this project."""
|
||||
name = f'{self.item_name}_{self.version}'
|
||||
return InventoryEntry(name, domain_name=self.domain_name, object_type=self.object_type)
|
||||
|
||||
|
||||
def make_inventory_handler(*projects: SingleEntryProject) -> type[BaseHTTPRequestHandler]:
|
||||
name, port = projects[0].name, projects[0].port
|
||||
assert all(p.name == name for p in projects)
|
||||
assert all(p.port == port for p in projects)
|
||||
|
||||
class InventoryHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
self.send_response(200, 'OK')
|
||||
|
||||
data = b''
|
||||
for project in projects:
|
||||
# create the data to return depending on the endpoint
|
||||
if self.path.startswith(f'/{project.uri}/'):
|
||||
entry = project.make_entry()
|
||||
data = FakeInventoryV2(project).serialise([entry])
|
||||
break
|
||||
|
||||
self.send_header('Content-Length', str(len(data)))
|
||||
self.end_headers()
|
||||
self.wfile.write(data)
|
||||
|
||||
def log_message(*args, **kwargs):
|
||||
pass
|
||||
|
||||
return InventoryHandler
|
||||
|
||||
|
||||
def test_intersphinx_project_fixture():
|
||||
# check that our fixture class is correct
|
||||
project = SingleEntryProject(1, 'route')
|
||||
assert project.url == 'http://localhost:9341/route'
|
||||
|
||||
|
||||
def test_load_mappings_cache(tmp_path):
|
||||
tmp_path.joinpath('conf.py').touch()
|
||||
tmp_path.joinpath('index.rst').touch()
|
||||
project = SingleEntryProject(1, 'a')
|
||||
|
||||
InventoryHandler = make_inventory_handler(project)
|
||||
with http_server(InventoryHandler, port=project.port):
|
||||
# clean build
|
||||
confoverrides = BASE_CONFIG | {'intersphinx_mapping': project.record}
|
||||
app = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides)
|
||||
app.build()
|
||||
app.cleanup()
|
||||
|
||||
# the inventory when querying the 'old' URL
|
||||
entry = project.make_entry()
|
||||
item = dict((project.normalise(entry),))
|
||||
inventories = InventoryAdapter(app.env)
|
||||
assert list(inventories.cache) == ['http://localhost:9341/a']
|
||||
e_name, e_time, e_inv = inventories.cache['http://localhost:9341/a']
|
||||
assert e_name == 'spam'
|
||||
assert e_inv == {'py:module': item}
|
||||
assert inventories.named_inventory == {'spam': {'py:module': item}}
|
||||
|
||||
|
||||
def test_load_mappings_cache_update(tmp_path):
|
||||
tmp_path.joinpath('conf.py').touch()
|
||||
tmp_path.joinpath('index.rst').touch()
|
||||
old_project = SingleEntryProject(1337, 'old')
|
||||
new_project = SingleEntryProject(1701, 'new')
|
||||
|
||||
InventoryHandler = make_inventory_handler(old_project, new_project)
|
||||
with http_server(InventoryHandler, port=SingleEntryProject.port):
|
||||
# build normally to create an initial cache
|
||||
confoverrides1 = BASE_CONFIG | {'intersphinx_mapping': old_project.record}
|
||||
app1 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides1)
|
||||
app1.build()
|
||||
app1.cleanup()
|
||||
|
||||
# switch to new url and assert that the old URL is no more stored
|
||||
confoverrides2 = BASE_CONFIG | {'intersphinx_mapping': new_project.record}
|
||||
app2 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides2)
|
||||
app2.build()
|
||||
app2.cleanup()
|
||||
|
||||
entry = new_project.make_entry()
|
||||
item = dict((new_project.normalise(entry),))
|
||||
inventories = InventoryAdapter(app2.env)
|
||||
# check that the URLs were changed accordingly
|
||||
assert list(inventories.cache) == ['http://localhost:9341/new']
|
||||
e_name, e_time, e_inv = inventories.cache['http://localhost:9341/new']
|
||||
assert e_name == 'spam'
|
||||
assert e_inv == {'py:module': item}
|
||||
assert inventories.named_inventory == {'spam': {'py:module': item}}
|
||||
|
||||
|
||||
def test_load_mappings_cache_revert_update(tmp_path):
|
||||
tmp_path.joinpath('conf.py').touch()
|
||||
tmp_path.joinpath('index.rst').touch()
|
||||
old_project = SingleEntryProject(1337, 'old')
|
||||
new_project = SingleEntryProject(1701, 'new')
|
||||
|
||||
InventoryHandler = make_inventory_handler(old_project, new_project)
|
||||
with http_server(InventoryHandler, port=SingleEntryProject.port):
|
||||
# build normally to create an initial cache
|
||||
confoverrides1 = BASE_CONFIG | {'intersphinx_mapping': old_project.record}
|
||||
app1 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides1)
|
||||
app1.build()
|
||||
app1.cleanup()
|
||||
|
||||
# switch to new url and build
|
||||
confoverrides2 = BASE_CONFIG | {'intersphinx_mapping': new_project.record}
|
||||
app2 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides2)
|
||||
app2.build()
|
||||
app2.cleanup()
|
||||
|
||||
# switch back to old url (re-use 'old_item')
|
||||
confoverrides3 = BASE_CONFIG | {'intersphinx_mapping': old_project.record}
|
||||
app3 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides3)
|
||||
app3.build()
|
||||
app3.cleanup()
|
||||
|
||||
entry = old_project.make_entry()
|
||||
item = dict((old_project.normalise(entry),))
|
||||
inventories = InventoryAdapter(app3.env)
|
||||
# check that the URLs were changed accordingly
|
||||
assert list(inventories.cache) == ['http://localhost:9341/old']
|
||||
e_name, e_time, e_inv = inventories.cache['http://localhost:9341/old']
|
||||
assert e_name == 'spam'
|
||||
assert e_inv == {'py:module': item}
|
||||
assert inventories.named_inventory == {'spam': {'py:module': item}}
|
||||
Reference in New Issue
Block a user