mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
intersphinx: Simplify `_fetch_inventory()
` (#13209)
The streams-based interfaces in intersphinx and ``sphinx.util.inventory`` are clever, but also complex and prevent using compression methods that don't support incrememntal decoding. This change refactors ``_fetch_inventory()`` to read all inventory content from disk or an HTTP request at once.
This commit is contained in:
parent
e17ed74fe0
commit
619a10efa7
@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from sphinx.ext.intersphinx._load import _fetch_inventory
|
||||
|
||||
@ -29,7 +30,7 @@ def inspect_main(argv: list[str], /) -> int:
|
||||
target_uri='',
|
||||
inv_location=filename,
|
||||
config=MockConfig(), # type: ignore[arg-type]
|
||||
srcdir='', # type: ignore[arg-type]
|
||||
srcdir=Path(''),
|
||||
)
|
||||
for key in sorted(inv_data or {}):
|
||||
print(key)
|
||||
|
@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import io
|
||||
import os.path
|
||||
import posixpath
|
||||
import time
|
||||
@ -20,8 +21,6 @@ from sphinx.util.inventory import InventoryFile
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
from urllib3.response import HTTPResponse
|
||||
|
||||
from sphinx.application import Sphinx
|
||||
from sphinx.config import Config
|
||||
from sphinx.ext.intersphinx._shared import (
|
||||
@ -31,7 +30,7 @@ if TYPE_CHECKING:
|
||||
InventoryName,
|
||||
InventoryURI,
|
||||
)
|
||||
from sphinx.util.typing import Inventory, _ReadableStream
|
||||
from sphinx.util.typing import Inventory
|
||||
|
||||
|
||||
def validate_intersphinx_mapping(app: Sphinx, config: Config) -> None:
|
||||
@ -297,13 +296,38 @@ def _fetch_inventory(
|
||||
# and *inv_location* (actual location of the inventory file)
|
||||
# can be local or remote URIs
|
||||
if '://' in target_uri:
|
||||
# case: inv URI points to remote resource; strip any existing auth
|
||||
# inv URI points to remote resource; strip any existing auth
|
||||
target_uri = _strip_basic_auth(target_uri)
|
||||
try:
|
||||
if '://' in inv_location:
|
||||
f: _ReadableStream[bytes] = _read_from_url(inv_location, config=config)
|
||||
raw_data, target_uri = _fetch_inventory_url(
|
||||
target_uri=target_uri, inv_location=inv_location, config=config
|
||||
)
|
||||
else:
|
||||
f = open(os.path.join(srcdir, inv_location), 'rb') # NoQA: SIM115
|
||||
raw_data = _fetch_inventory_file(inv_location=inv_location, srcdir=srcdir)
|
||||
|
||||
stream = io.BytesIO(raw_data)
|
||||
try:
|
||||
invdata = InventoryFile.load(stream, target_uri, posixpath.join)
|
||||
except ValueError as exc:
|
||||
msg = f'unknown or unsupported inventory version: {exc!r}'
|
||||
raise ValueError(msg) from exc
|
||||
return invdata
|
||||
|
||||
|
||||
def _fetch_inventory_url(
|
||||
*, target_uri: InventoryURI, inv_location: str, config: Config
|
||||
) -> tuple[bytes, str]:
|
||||
try:
|
||||
with requests.get(
|
||||
inv_location,
|
||||
stream=True,
|
||||
timeout=config.intersphinx_timeout,
|
||||
_user_agent=config.user_agent,
|
||||
_tls_info=(config.tls_verify, config.tls_cacerts),
|
||||
) as r:
|
||||
r.raise_for_status()
|
||||
raw_data = r.content
|
||||
new_inv_location = r.url
|
||||
except Exception as err:
|
||||
err.args = (
|
||||
'intersphinx inventory %r not fetchable due to %s: %s',
|
||||
@ -312,9 +336,7 @@ def _fetch_inventory(
|
||||
str(err),
|
||||
)
|
||||
raise
|
||||
try:
|
||||
if hasattr(f, 'url'):
|
||||
new_inv_location = f.url
|
||||
|
||||
if inv_location != new_inv_location:
|
||||
msg = __('intersphinx inventory has moved: %s -> %s')
|
||||
LOGGER.info(msg, inv_location, new_inv_location)
|
||||
@ -325,12 +347,14 @@ def _fetch_inventory(
|
||||
os.path.dirname(inv_location) + '/',
|
||||
}:
|
||||
target_uri = os.path.dirname(new_inv_location)
|
||||
with f:
|
||||
|
||||
return raw_data, target_uri
|
||||
|
||||
|
||||
def _fetch_inventory_file(*, inv_location: str, srcdir: Path) -> bytes:
|
||||
try:
|
||||
invdata = InventoryFile.load(f, target_uri, posixpath.join)
|
||||
except ValueError as exc:
|
||||
msg = f'unknown or unsupported inventory version: {exc!r}'
|
||||
raise ValueError(msg) from exc
|
||||
with open(srcdir / inv_location, 'rb') as f:
|
||||
raw_data = f.read()
|
||||
except Exception as err:
|
||||
err.args = (
|
||||
'intersphinx inventory %r not readable due to %s: %s',
|
||||
@ -339,8 +363,7 @@ def _fetch_inventory(
|
||||
str(err),
|
||||
)
|
||||
raise
|
||||
else:
|
||||
return invdata
|
||||
return raw_data
|
||||
|
||||
|
||||
def _get_safe_url(url: str) -> str:
|
||||
@ -387,37 +410,3 @@ def _strip_basic_auth(url: str) -> str:
|
||||
if '@' in frags[1]:
|
||||
frags[1] = frags[1].split('@')[1]
|
||||
return urlunsplit(frags)
|
||||
|
||||
|
||||
def _read_from_url(url: str, *, config: Config) -> HTTPResponse:
|
||||
"""Reads data from *url* with an HTTP *GET*.
|
||||
|
||||
This function supports fetching from resources which use basic HTTP auth as
|
||||
laid out by RFC1738 § 3.1. See § 5 for grammar definitions for URLs.
|
||||
|
||||
.. seealso:
|
||||
|
||||
https://www.ietf.org/rfc/rfc1738.txt
|
||||
|
||||
:param url: URL of an HTTP resource
|
||||
:type url: ``str``
|
||||
|
||||
:return: data read from resource described by *url*
|
||||
:rtype: ``file``-like object
|
||||
"""
|
||||
r = requests.get(
|
||||
url,
|
||||
stream=True,
|
||||
timeout=config.intersphinx_timeout,
|
||||
_user_agent=config.user_agent,
|
||||
_tls_info=(config.tls_verify, config.tls_cacerts),
|
||||
)
|
||||
r.raise_for_status()
|
||||
|
||||
# For inv_location / new_inv_location
|
||||
r.raw.url = r.url # type: ignore[union-attr]
|
||||
|
||||
# Decode content-body based on the header.
|
||||
# xref: https://github.com/psf/requests/issues/2155
|
||||
r.raw.decode_content = True
|
||||
return r.raw
|
||||
|
@ -70,14 +70,15 @@ def set_config(app, mapping):
|
||||
|
||||
|
||||
@mock.patch('sphinx.ext.intersphinx._load.InventoryFile')
|
||||
@mock.patch('sphinx.ext.intersphinx._load._read_from_url')
|
||||
@mock.patch('sphinx.ext.intersphinx._load.requests.get')
|
||||
@pytest.mark.sphinx('html', testroot='root')
|
||||
def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQA: PT019
|
||||
def test_fetch_inventory_redirection(get_request, InventoryFile, app):
|
||||
mocked_get = get_request.return_value.__enter__.return_value
|
||||
intersphinx_setup(app)
|
||||
_read_from_url().readline.return_value = b'# Sphinx inventory version 2'
|
||||
mocked_get.content = b'# Sphinx inventory version 2'
|
||||
|
||||
# same uri and inv, not redirected
|
||||
_read_from_url().url = 'https://hostname/' + INVENTORY_FILENAME
|
||||
mocked_get.url = 'https://hostname/' + INVENTORY_FILENAME
|
||||
_fetch_inventory(
|
||||
target_uri='https://hostname/',
|
||||
inv_location='https://hostname/' + INVENTORY_FILENAME,
|
||||
@ -90,7 +91,7 @@ def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQ
|
||||
# same uri and inv, redirected
|
||||
app.status.seek(0)
|
||||
app.status.truncate(0)
|
||||
_read_from_url().url = 'https://hostname/new/' + INVENTORY_FILENAME
|
||||
mocked_get.url = 'https://hostname/new/' + INVENTORY_FILENAME
|
||||
|
||||
_fetch_inventory(
|
||||
target_uri='https://hostname/',
|
||||
@ -108,7 +109,7 @@ def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQ
|
||||
# different uri and inv, not redirected
|
||||
app.status.seek(0)
|
||||
app.status.truncate(0)
|
||||
_read_from_url().url = 'https://hostname/new/' + INVENTORY_FILENAME
|
||||
mocked_get.url = 'https://hostname/new/' + INVENTORY_FILENAME
|
||||
|
||||
_fetch_inventory(
|
||||
target_uri='https://hostname/',
|
||||
@ -122,7 +123,7 @@ def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQ
|
||||
# different uri and inv, redirected
|
||||
app.status.seek(0)
|
||||
app.status.truncate(0)
|
||||
_read_from_url().url = 'https://hostname/other/' + INVENTORY_FILENAME
|
||||
mocked_get.url = 'https://hostname/other/' + INVENTORY_FILENAME
|
||||
|
||||
_fetch_inventory(
|
||||
target_uri='https://hostname/',
|
||||
|
Loading…
Reference in New Issue
Block a user