mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
intersphinx: Simplify `_fetch_inventory()
` (#13209)
The streams-based interfaces in intersphinx and ``sphinx.util.inventory`` are clever, but also complex and prevent using compression methods that don't support incrememntal decoding. This change refactors ``_fetch_inventory()`` to read all inventory content from disk or an HTTP request at once.
This commit is contained in:
parent
e17ed74fe0
commit
619a10efa7
@ -3,6 +3,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from sphinx.ext.intersphinx._load import _fetch_inventory
|
from sphinx.ext.intersphinx._load import _fetch_inventory
|
||||||
|
|
||||||
@ -29,7 +30,7 @@ def inspect_main(argv: list[str], /) -> int:
|
|||||||
target_uri='',
|
target_uri='',
|
||||||
inv_location=filename,
|
inv_location=filename,
|
||||||
config=MockConfig(), # type: ignore[arg-type]
|
config=MockConfig(), # type: ignore[arg-type]
|
||||||
srcdir='', # type: ignore[arg-type]
|
srcdir=Path(''),
|
||||||
)
|
)
|
||||||
for key in sorted(inv_data or {}):
|
for key in sorted(inv_data or {}):
|
||||||
print(key)
|
print(key)
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
import io
|
||||||
import os.path
|
import os.path
|
||||||
import posixpath
|
import posixpath
|
||||||
import time
|
import time
|
||||||
@ -20,8 +21,6 @@ from sphinx.util.inventory import InventoryFile
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from urllib3.response import HTTPResponse
|
|
||||||
|
|
||||||
from sphinx.application import Sphinx
|
from sphinx.application import Sphinx
|
||||||
from sphinx.config import Config
|
from sphinx.config import Config
|
||||||
from sphinx.ext.intersphinx._shared import (
|
from sphinx.ext.intersphinx._shared import (
|
||||||
@ -31,7 +30,7 @@ if TYPE_CHECKING:
|
|||||||
InventoryName,
|
InventoryName,
|
||||||
InventoryURI,
|
InventoryURI,
|
||||||
)
|
)
|
||||||
from sphinx.util.typing import Inventory, _ReadableStream
|
from sphinx.util.typing import Inventory
|
||||||
|
|
||||||
|
|
||||||
def validate_intersphinx_mapping(app: Sphinx, config: Config) -> None:
|
def validate_intersphinx_mapping(app: Sphinx, config: Config) -> None:
|
||||||
@ -297,13 +296,38 @@ def _fetch_inventory(
|
|||||||
# and *inv_location* (actual location of the inventory file)
|
# and *inv_location* (actual location of the inventory file)
|
||||||
# can be local or remote URIs
|
# can be local or remote URIs
|
||||||
if '://' in target_uri:
|
if '://' in target_uri:
|
||||||
# case: inv URI points to remote resource; strip any existing auth
|
# inv URI points to remote resource; strip any existing auth
|
||||||
target_uri = _strip_basic_auth(target_uri)
|
target_uri = _strip_basic_auth(target_uri)
|
||||||
|
if '://' in inv_location:
|
||||||
|
raw_data, target_uri = _fetch_inventory_url(
|
||||||
|
target_uri=target_uri, inv_location=inv_location, config=config
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raw_data = _fetch_inventory_file(inv_location=inv_location, srcdir=srcdir)
|
||||||
|
|
||||||
|
stream = io.BytesIO(raw_data)
|
||||||
try:
|
try:
|
||||||
if '://' in inv_location:
|
invdata = InventoryFile.load(stream, target_uri, posixpath.join)
|
||||||
f: _ReadableStream[bytes] = _read_from_url(inv_location, config=config)
|
except ValueError as exc:
|
||||||
else:
|
msg = f'unknown or unsupported inventory version: {exc!r}'
|
||||||
f = open(os.path.join(srcdir, inv_location), 'rb') # NoQA: SIM115
|
raise ValueError(msg) from exc
|
||||||
|
return invdata
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_inventory_url(
|
||||||
|
*, target_uri: InventoryURI, inv_location: str, config: Config
|
||||||
|
) -> tuple[bytes, str]:
|
||||||
|
try:
|
||||||
|
with requests.get(
|
||||||
|
inv_location,
|
||||||
|
stream=True,
|
||||||
|
timeout=config.intersphinx_timeout,
|
||||||
|
_user_agent=config.user_agent,
|
||||||
|
_tls_info=(config.tls_verify, config.tls_cacerts),
|
||||||
|
) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
raw_data = r.content
|
||||||
|
new_inv_location = r.url
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
err.args = (
|
err.args = (
|
||||||
'intersphinx inventory %r not fetchable due to %s: %s',
|
'intersphinx inventory %r not fetchable due to %s: %s',
|
||||||
@ -312,25 +336,25 @@ def _fetch_inventory(
|
|||||||
str(err),
|
str(err),
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
try:
|
|
||||||
if hasattr(f, 'url'):
|
|
||||||
new_inv_location = f.url
|
|
||||||
if inv_location != new_inv_location:
|
|
||||||
msg = __('intersphinx inventory has moved: %s -> %s')
|
|
||||||
LOGGER.info(msg, inv_location, new_inv_location)
|
|
||||||
|
|
||||||
if target_uri in {
|
if inv_location != new_inv_location:
|
||||||
inv_location,
|
msg = __('intersphinx inventory has moved: %s -> %s')
|
||||||
os.path.dirname(inv_location),
|
LOGGER.info(msg, inv_location, new_inv_location)
|
||||||
os.path.dirname(inv_location) + '/',
|
|
||||||
}:
|
if target_uri in {
|
||||||
target_uri = os.path.dirname(new_inv_location)
|
inv_location,
|
||||||
with f:
|
os.path.dirname(inv_location),
|
||||||
try:
|
os.path.dirname(inv_location) + '/',
|
||||||
invdata = InventoryFile.load(f, target_uri, posixpath.join)
|
}:
|
||||||
except ValueError as exc:
|
target_uri = os.path.dirname(new_inv_location)
|
||||||
msg = f'unknown or unsupported inventory version: {exc!r}'
|
|
||||||
raise ValueError(msg) from exc
|
return raw_data, target_uri
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_inventory_file(*, inv_location: str, srcdir: Path) -> bytes:
|
||||||
|
try:
|
||||||
|
with open(srcdir / inv_location, 'rb') as f:
|
||||||
|
raw_data = f.read()
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
err.args = (
|
err.args = (
|
||||||
'intersphinx inventory %r not readable due to %s: %s',
|
'intersphinx inventory %r not readable due to %s: %s',
|
||||||
@ -339,8 +363,7 @@ def _fetch_inventory(
|
|||||||
str(err),
|
str(err),
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
else:
|
return raw_data
|
||||||
return invdata
|
|
||||||
|
|
||||||
|
|
||||||
def _get_safe_url(url: str) -> str:
|
def _get_safe_url(url: str) -> str:
|
||||||
@ -387,37 +410,3 @@ def _strip_basic_auth(url: str) -> str:
|
|||||||
if '@' in frags[1]:
|
if '@' in frags[1]:
|
||||||
frags[1] = frags[1].split('@')[1]
|
frags[1] = frags[1].split('@')[1]
|
||||||
return urlunsplit(frags)
|
return urlunsplit(frags)
|
||||||
|
|
||||||
|
|
||||||
def _read_from_url(url: str, *, config: Config) -> HTTPResponse:
|
|
||||||
"""Reads data from *url* with an HTTP *GET*.
|
|
||||||
|
|
||||||
This function supports fetching from resources which use basic HTTP auth as
|
|
||||||
laid out by RFC1738 § 3.1. See § 5 for grammar definitions for URLs.
|
|
||||||
|
|
||||||
.. seealso:
|
|
||||||
|
|
||||||
https://www.ietf.org/rfc/rfc1738.txt
|
|
||||||
|
|
||||||
:param url: URL of an HTTP resource
|
|
||||||
:type url: ``str``
|
|
||||||
|
|
||||||
:return: data read from resource described by *url*
|
|
||||||
:rtype: ``file``-like object
|
|
||||||
"""
|
|
||||||
r = requests.get(
|
|
||||||
url,
|
|
||||||
stream=True,
|
|
||||||
timeout=config.intersphinx_timeout,
|
|
||||||
_user_agent=config.user_agent,
|
|
||||||
_tls_info=(config.tls_verify, config.tls_cacerts),
|
|
||||||
)
|
|
||||||
r.raise_for_status()
|
|
||||||
|
|
||||||
# For inv_location / new_inv_location
|
|
||||||
r.raw.url = r.url # type: ignore[union-attr]
|
|
||||||
|
|
||||||
# Decode content-body based on the header.
|
|
||||||
# xref: https://github.com/psf/requests/issues/2155
|
|
||||||
r.raw.decode_content = True
|
|
||||||
return r.raw
|
|
||||||
|
@ -70,14 +70,15 @@ def set_config(app, mapping):
|
|||||||
|
|
||||||
|
|
||||||
@mock.patch('sphinx.ext.intersphinx._load.InventoryFile')
|
@mock.patch('sphinx.ext.intersphinx._load.InventoryFile')
|
||||||
@mock.patch('sphinx.ext.intersphinx._load._read_from_url')
|
@mock.patch('sphinx.ext.intersphinx._load.requests.get')
|
||||||
@pytest.mark.sphinx('html', testroot='root')
|
@pytest.mark.sphinx('html', testroot='root')
|
||||||
def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQA: PT019
|
def test_fetch_inventory_redirection(get_request, InventoryFile, app):
|
||||||
|
mocked_get = get_request.return_value.__enter__.return_value
|
||||||
intersphinx_setup(app)
|
intersphinx_setup(app)
|
||||||
_read_from_url().readline.return_value = b'# Sphinx inventory version 2'
|
mocked_get.content = b'# Sphinx inventory version 2'
|
||||||
|
|
||||||
# same uri and inv, not redirected
|
# same uri and inv, not redirected
|
||||||
_read_from_url().url = 'https://hostname/' + INVENTORY_FILENAME
|
mocked_get.url = 'https://hostname/' + INVENTORY_FILENAME
|
||||||
_fetch_inventory(
|
_fetch_inventory(
|
||||||
target_uri='https://hostname/',
|
target_uri='https://hostname/',
|
||||||
inv_location='https://hostname/' + INVENTORY_FILENAME,
|
inv_location='https://hostname/' + INVENTORY_FILENAME,
|
||||||
@ -90,7 +91,7 @@ def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQ
|
|||||||
# same uri and inv, redirected
|
# same uri and inv, redirected
|
||||||
app.status.seek(0)
|
app.status.seek(0)
|
||||||
app.status.truncate(0)
|
app.status.truncate(0)
|
||||||
_read_from_url().url = 'https://hostname/new/' + INVENTORY_FILENAME
|
mocked_get.url = 'https://hostname/new/' + INVENTORY_FILENAME
|
||||||
|
|
||||||
_fetch_inventory(
|
_fetch_inventory(
|
||||||
target_uri='https://hostname/',
|
target_uri='https://hostname/',
|
||||||
@ -108,7 +109,7 @@ def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQ
|
|||||||
# different uri and inv, not redirected
|
# different uri and inv, not redirected
|
||||||
app.status.seek(0)
|
app.status.seek(0)
|
||||||
app.status.truncate(0)
|
app.status.truncate(0)
|
||||||
_read_from_url().url = 'https://hostname/new/' + INVENTORY_FILENAME
|
mocked_get.url = 'https://hostname/new/' + INVENTORY_FILENAME
|
||||||
|
|
||||||
_fetch_inventory(
|
_fetch_inventory(
|
||||||
target_uri='https://hostname/',
|
target_uri='https://hostname/',
|
||||||
@ -122,7 +123,7 @@ def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQ
|
|||||||
# different uri and inv, redirected
|
# different uri and inv, redirected
|
||||||
app.status.seek(0)
|
app.status.seek(0)
|
||||||
app.status.truncate(0)
|
app.status.truncate(0)
|
||||||
_read_from_url().url = 'https://hostname/other/' + INVENTORY_FILENAME
|
mocked_get.url = 'https://hostname/other/' + INVENTORY_FILENAME
|
||||||
|
|
||||||
_fetch_inventory(
|
_fetch_inventory(
|
||||||
target_uri='https://hostname/',
|
target_uri='https://hostname/',
|
||||||
|
Loading…
Reference in New Issue
Block a user