Add `InventoryFile.loads()` (#13215)

This commit is contained in:
Adam Turner 2025-01-07 00:40:57 +00:00 committed by GitHub
parent b3035c7d22
commit df06e6d628
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 153 additions and 138 deletions

View File

@ -4,7 +4,6 @@ from __future__ import annotations
import concurrent.futures import concurrent.futures
import dataclasses import dataclasses
import io
import os.path import os.path
import posixpath import posixpath
import time import time
@ -327,9 +326,8 @@ def _fetch_inventory(
else: else:
raw_data = _fetch_inventory_file(inv_location=inv_location, srcdir=srcdir) raw_data = _fetch_inventory_file(inv_location=inv_location, srcdir=srcdir)
stream = io.BytesIO(raw_data)
try: try:
invdata = InventoryFile.load(stream, target_uri, posixpath.join) invdata = InventoryFile.loads(raw_data, uri=target_uri)
except ValueError as exc: except ValueError as exc:
msg = f'unknown or unsupported inventory version: {exc!r}' msg = f'unknown or unsupported inventory version: {exc!r}'
raise ValueError(msg) from exc raise ValueError(msg) from exc

View File

@ -0,0 +1,76 @@
from __future__ import annotations
import zlib
from typing import TYPE_CHECKING
from sphinx.util import logging
BUFSIZE = 16 * 1024
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from collections.abc import Iterator
from typing import Protocol
# Readable file stream for inventory loading
class _SupportsRead(Protocol):
def read(self, size: int = ...) -> bytes: ...
__all__ = ('InventoryFileReader',)
class InventoryFileReader:
"""A file reader for an inventory file.
This reader supports mixture of texts and compressed texts.
"""
def __init__(self, stream: _SupportsRead) -> None:
self.stream = stream
self.buffer = b''
self.eof = False
def read_buffer(self) -> None:
chunk = self.stream.read(BUFSIZE)
if chunk == b'':
self.eof = True
self.buffer += chunk
def readline(self) -> str:
pos = self.buffer.find(b'\n')
if pos != -1:
line = self.buffer[:pos].decode()
self.buffer = self.buffer[pos + 1 :]
elif self.eof:
line = self.buffer.decode()
self.buffer = b''
else:
self.read_buffer()
line = self.readline()
return line
def readlines(self) -> Iterator[str]:
while not self.eof:
line = self.readline()
if line:
yield line
def read_compressed_chunks(self) -> Iterator[bytes]:
decompressor = zlib.decompressobj()
while not self.eof:
self.read_buffer()
yield decompressor.decompress(self.buffer)
self.buffer = b''
yield decompressor.flush()
def read_compressed_lines(self) -> Iterator[str]:
buf = b''
for chunk in self.read_compressed_chunks():
buf += chunk
pos = buf.find(b'\n')
while pos != -1:
yield buf[:pos].decode()
buf = buf[pos + 1 :]
pos = buf.find(b'\n')

View File

@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import posixpath
import re import re
import zlib import zlib
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
@ -14,127 +15,96 @@ logger = logging.getLogger(__name__)
if TYPE_CHECKING: if TYPE_CHECKING:
import os import os
from collections.abc import Callable, Iterator from collections.abc import Callable, Sequence
from typing import Protocol
from sphinx.builders import Builder from sphinx.builders import Builder
from sphinx.environment import BuildEnvironment from sphinx.environment import BuildEnvironment
from sphinx.util.typing import Inventory, InventoryItem, _ReadableStream from sphinx.util.typing import Inventory, InventoryItem
# Readable file stream for inventory loading
class _SupportsRead(Protocol):
def read(self, size: int = ...) -> bytes: ...
_JoinFunc = Callable[[str, str], str]
class InventoryFileReader: def __getattr__(name: str) -> object:
"""A file reader for an inventory file. if name == 'InventoryFileReader':
from sphinx.util._inventory_file_reader import InventoryFileReader
This reader supports mixture of texts and compressed texts. return InventoryFileReader
""" msg = f'module {__name__!r} has no attribute {name!r}'
raise AttributeError(msg)
def __init__(self, stream: _ReadableStream[bytes]) -> None:
self.stream = stream
self.buffer = b''
self.eof = False
def read_buffer(self) -> None:
chunk = self.stream.read(BUFSIZE)
if chunk == b'':
self.eof = True
self.buffer += chunk
def readline(self) -> str:
pos = self.buffer.find(b'\n')
if pos != -1:
line = self.buffer[:pos].decode()
self.buffer = self.buffer[pos + 1 :]
elif self.eof:
line = self.buffer.decode()
self.buffer = b''
else:
self.read_buffer()
line = self.readline()
return line
def readlines(self) -> Iterator[str]:
while not self.eof:
line = self.readline()
if line:
yield line
def read_compressed_chunks(self) -> Iterator[bytes]:
decompressor = zlib.decompressobj()
while not self.eof:
self.read_buffer()
yield decompressor.decompress(self.buffer)
self.buffer = b''
yield decompressor.flush()
def read_compressed_lines(self) -> Iterator[str]:
buf = b''
for chunk in self.read_compressed_chunks():
buf += chunk
pos = buf.find(b'\n')
while pos != -1:
yield buf[:pos].decode()
buf = buf[pos + 1 :]
pos = buf.find(b'\n')
class InventoryFile: class InventoryFile:
@classmethod @classmethod
def load( def loads(
cls: type[InventoryFile], cls,
stream: _ReadableStream[bytes], content: bytes,
*,
uri: str, uri: str,
joinfunc: Callable[[str, str], str],
) -> Inventory: ) -> Inventory:
reader = InventoryFileReader(stream) format_line, _, content = content.partition(b'\n')
line = reader.readline().rstrip() format_line = format_line.rstrip() # remove trailing \r or spaces
if line == '# Sphinx inventory version 1': if format_line == b'# Sphinx inventory version 2':
return cls.load_v1(reader, uri, joinfunc) return cls._loads_v2(content, uri=uri)
elif line == '# Sphinx inventory version 2': if format_line == b'# Sphinx inventory version 1':
return cls.load_v2(reader, uri, joinfunc) lines = content.decode().splitlines()
else: return cls._loads_v1(lines, uri=uri)
raise ValueError('invalid inventory header: %s' % line) if format_line.startswith(b'# Sphinx inventory version '):
unknown_version = format_line[27:].decode()
msg = f'unknown or unsupported inventory version: {unknown_version!r}'
raise ValueError(msg)
msg = f'invalid inventory header: {format_line.decode()}'
raise ValueError(msg)
@classmethod @classmethod
def load_v1( def load(cls, stream: _SupportsRead, uri: str, joinfunc: _JoinFunc) -> Inventory:
cls: type[InventoryFile], return cls.loads(stream.read(), uri=uri)
stream: InventoryFileReader,
uri: str, @classmethod
join: Callable[[str, str], str], def _loads_v1(cls, lines: Sequence[str], *, uri: str) -> Inventory:
) -> Inventory: if len(lines) < 2:
msg = 'invalid inventory header: missing project name or version'
raise ValueError(msg)
invdata: Inventory = {} invdata: Inventory = {}
projname = stream.readline().rstrip()[11:] projname = lines[0].rstrip()[11:] # Project name
version = stream.readline().rstrip()[11:] version = lines[1].rstrip()[11:] # Project version
for line in stream.readlines(): for line in lines[2:]:
name, type, location = line.rstrip().split(None, 2) name, item_type, location = line.rstrip().split(None, 2)
location = join(uri, location) location = posixpath.join(uri, location)
# version 1 did not add anchors to the location # version 1 did not add anchors to the location
if type == 'mod': if item_type == 'mod':
type = 'py:module' item_type = 'py:module'
location += '#module-' + name location += f'#module-{name}'
else: else:
type = 'py:' + type item_type = f'py:{item_type}'
location += '#' + name location += f'#{name}'
invdata.setdefault(type, {})[name] = (projname, version, location, '-') inv_item: InventoryItem = projname, version, location, '-'
invdata.setdefault(item_type, {})[name] = inv_item
return invdata return invdata
@classmethod @classmethod
def load_v2( def _loads_v2(cls, inv_data: bytes, *, uri: str) -> Inventory:
cls: type[InventoryFile], try:
stream: InventoryFileReader, line_1, line_2, check_line, compressed = inv_data.split(b'\n', maxsplit=3)
uri: str, except ValueError:
join: Callable[[str, str], str], msg = 'invalid inventory header: missing project name or version'
) -> Inventory: raise ValueError(msg) from None
invdata: Inventory = {} invdata: Inventory = {}
projname = stream.readline().rstrip()[11:] projname = line_1.rstrip()[11:].decode() # Project name
version = stream.readline().rstrip()[11:] version = line_2.rstrip()[11:].decode() # Project version
# definition -> priority, location, display name # definition -> priority, location, display name
potential_ambiguities: dict[str, tuple[str, str, str]] = {} potential_ambiguities: dict[str, tuple[str, str, str]] = {}
actual_ambiguities = set() actual_ambiguities = set()
line = stream.readline() if b'zlib' not in check_line: # '... compressed using zlib'
if 'zlib' not in line: msg = f'invalid inventory header (not compressed): {check_line.decode()}'
raise ValueError('invalid inventory header (not compressed): %s' % line) raise ValueError(msg)
for line in stream.read_compressed_lines(): decompressed_content = zlib.decompress(compressed)
for line in decompressed_content.decode().splitlines():
# be careful to handle names with embedded spaces correctly # be careful to handle names with embedded spaces correctly
m = re.match( m = re.match(
r'(.+?)\s+(\S+)\s+(-?\d+)\s+?(\S*)\s+(.*)', r'(.+?)\s+(\S+)\s+(-?\d+)\s+?(\S*)\s+(.*)',
@ -177,7 +147,7 @@ class InventoryFile:
potential_ambiguities[lowercase_definition] = content potential_ambiguities[lowercase_definition] = content
if location.endswith('$'): if location.endswith('$'):
location = location[:-1] + name location = location[:-1] + name
location = join(uri, location) location = posixpath.join(uri, location)
inv_item: InventoryItem = projname, version, location, dispname inv_item: InventoryItem = projname, version, location, dispname
invdata.setdefault(type, {})[name] = inv_item invdata.setdefault(type, {})[name] = inv_item
for ambiguity in actual_ambiguities: for ambiguity in actual_ambiguities:
@ -192,10 +162,7 @@ class InventoryFile:
@classmethod @classmethod
def dump( def dump(
cls: type[InventoryFile], cls, filename: str | os.PathLike[str], env: BuildEnvironment, builder: Builder
filename: str | os.PathLike[str],
env: BuildEnvironment,
builder: Builder,
) -> None: ) -> None:
def escape(string: str) -> str: def escape(string: str) -> str:
return re.sub('\\s+', ' ', string) return re.sub('\\s+', ' ', string)

View File

@ -119,26 +119,6 @@ OptionSpec: TypeAlias = dict[str, Callable[[str], Any]]
# title getter functions for enumerable nodes (see sphinx.domains.std) # title getter functions for enumerable nodes (see sphinx.domains.std)
TitleGetter: TypeAlias = Callable[[nodes.Node], str] TitleGetter: TypeAlias = Callable[[nodes.Node], str]
# Readable file stream for inventory loading
if TYPE_CHECKING:
from types import TracebackType
from typing import Self
_T_co = TypeVar('_T_co', str, bytes, covariant=True)
class _ReadableStream(Protocol[_T_co]): # NoQA: PYI046 (false positive)
def read(self, size: int = ...) -> _T_co: ...
def __enter__(self) -> Self: ...
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_val: BaseException | None,
exc_tb: TracebackType | None,
) -> None: ...
# inventory data on memory # inventory data on memory
InventoryItem: TypeAlias = tuple[ InventoryItem: TypeAlias = tuple[
str, # project name str, # project name

View File

@ -88,7 +88,7 @@ def test_fetch_inventory_redirection(get_request, InventoryFile, app):
srcdir=app.srcdir, srcdir=app.srcdir,
) )
assert 'intersphinx inventory has moved' not in app.status.getvalue() assert 'intersphinx inventory has moved' not in app.status.getvalue()
assert InventoryFile.load.call_args[0][1] == 'https://hostname/' assert InventoryFile.loads.call_args[1]['uri'] == 'https://hostname/'
# same uri and inv, redirected # same uri and inv, redirected
app.status.seek(0) app.status.seek(0)
@ -106,7 +106,7 @@ def test_fetch_inventory_redirection(get_request, InventoryFile, app):
'https://hostname/%s -> https://hostname/new/%s\n' 'https://hostname/%s -> https://hostname/new/%s\n'
% (INVENTORY_FILENAME, INVENTORY_FILENAME) % (INVENTORY_FILENAME, INVENTORY_FILENAME)
) )
assert InventoryFile.load.call_args[0][1] == 'https://hostname/new' assert InventoryFile.loads.call_args[1]['uri'] == 'https://hostname/new'
# different uri and inv, not redirected # different uri and inv, not redirected
app.status.seek(0) app.status.seek(0)
@ -120,7 +120,7 @@ def test_fetch_inventory_redirection(get_request, InventoryFile, app):
srcdir=app.srcdir, srcdir=app.srcdir,
) )
assert 'intersphinx inventory has moved' not in app.status.getvalue() assert 'intersphinx inventory has moved' not in app.status.getvalue()
assert InventoryFile.load.call_args[0][1] == 'https://hostname/' assert InventoryFile.loads.call_args[1]['uri'] == 'https://hostname/'
# different uri and inv, redirected # different uri and inv, redirected
app.status.seek(0) app.status.seek(0)
@ -138,7 +138,7 @@ def test_fetch_inventory_redirection(get_request, InventoryFile, app):
'https://hostname/new/%s -> https://hostname/other/%s\n' 'https://hostname/new/%s -> https://hostname/other/%s\n'
% (INVENTORY_FILENAME, INVENTORY_FILENAME) % (INVENTORY_FILENAME, INVENTORY_FILENAME)
) )
assert InventoryFile.load.call_args[0][1] == 'https://hostname/' assert InventoryFile.loads.call_args[1]['uri'] == 'https://hostname/'
@pytest.mark.sphinx('html', testroot='root') @pytest.mark.sphinx('html', testroot='root')

View File

@ -3,8 +3,6 @@
from __future__ import annotations from __future__ import annotations
import os import os
import posixpath
from io import BytesIO
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
import pytest import pytest
@ -25,8 +23,7 @@ if TYPE_CHECKING:
def test_read_inventory_v1(): def test_read_inventory_v1():
f = BytesIO(INVENTORY_V1) invdata = InventoryFile.loads(INVENTORY_V1, uri='/util')
invdata = InventoryFile.load(f, '/util', posixpath.join)
assert invdata['py:module']['module'] == ( assert invdata['py:module']['module'] == (
'foo', 'foo',
'1.0', '1.0',
@ -42,8 +39,7 @@ def test_read_inventory_v1():
def test_read_inventory_v2(): def test_read_inventory_v2():
f = BytesIO(INVENTORY_V2) invdata = InventoryFile.loads(INVENTORY_V2, uri='/util')
invdata = InventoryFile.load(f, '/util', posixpath.join)
assert len(invdata['py:module']) == 2 assert len(invdata['py:module']) == 2
assert invdata['py:module']['module1'] == ( assert invdata['py:module']['module1'] == (
@ -69,8 +65,7 @@ def test_read_inventory_v2():
def test_read_inventory_v2_not_having_version(): def test_read_inventory_v2_not_having_version():
f = BytesIO(INVENTORY_V2_NO_VERSION) invdata = InventoryFile.loads(INVENTORY_V2_NO_VERSION, uri='/util')
invdata = InventoryFile.load(f, '/util', posixpath.join)
assert invdata['py:module']['module1'] == ( assert invdata['py:module']['module1'] == (
'foo', 'foo',
'', '',
@ -81,8 +76,7 @@ def test_read_inventory_v2_not_having_version():
@pytest.mark.sphinx('html', testroot='root') @pytest.mark.sphinx('html', testroot='root')
def test_ambiguous_definition_warning(app): def test_ambiguous_definition_warning(app):
f = BytesIO(INVENTORY_V2_AMBIGUOUS_TERMS) InventoryFile.loads(INVENTORY_V2_AMBIGUOUS_TERMS, uri='/util')
InventoryFile.load(f, '/util', posixpath.join)
def _multiple_defs_notice_for(entity: str) -> str: def _multiple_defs_notice_for(entity: str) -> str:
return f'contains multiple definitions for {entity}' return f'contains multiple definitions for {entity}'