mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
linkcheck: support ignored-URIs for redirects (#13127)
Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com>
This commit is contained in:
parent
182f621cad
commit
872d270f10
@ -43,6 +43,8 @@ Bugs fixed
|
||||
Patch by Jean-François B.
|
||||
* #13096: HTML Search: check that query terms exist as properties in
|
||||
term indices before accessing them.
|
||||
* #11233: linkcheck: match redirect URIs against :confval:`linkcheck_ignore` by
|
||||
overriding session-level ``requests.get_redirect_target``.
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
@ -3709,6 +3709,9 @@ and which failures and redirects it ignores.
|
||||
A list of regular expressions that match URIs that should not be checked
|
||||
when doing a ``linkcheck`` build.
|
||||
|
||||
Server-issued redirects that match :confval:`ignored URIs <linkcheck_ignore>`
|
||||
will not be followed.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
@ -398,7 +398,9 @@ class HyperlinkAvailabilityCheckWorker(Thread):
|
||||
self.tls_verify = config.tls_verify
|
||||
self.tls_cacerts = config.tls_cacerts
|
||||
|
||||
self._session = requests._Session()
|
||||
self._session = requests._Session(
|
||||
_ignored_redirects=tuple(map(re.compile, config.linkcheck_ignore))
|
||||
)
|
||||
|
||||
super().__init__(daemon=True)
|
||||
|
||||
@ -570,6 +572,14 @@ class HyperlinkAvailabilityCheckWorker(Thread):
|
||||
error_message = str(err)
|
||||
continue
|
||||
|
||||
except requests._IgnoredRedirection as err:
|
||||
# A redirection to an ignored URI was attempted; report it appropriately
|
||||
return (
|
||||
_Status.IGNORED,
|
||||
f'ignored redirect: {err.destination}',
|
||||
err.status_code,
|
||||
)
|
||||
|
||||
except HTTPError as err:
|
||||
error_message = str(err)
|
||||
|
||||
|
@ -3,20 +3,34 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import Any
|
||||
from urllib.parse import urlsplit
|
||||
from typing import TYPE_CHECKING
|
||||
from urllib.parse import urljoin, urlsplit
|
||||
|
||||
import requests
|
||||
from urllib3.exceptions import InsecureRequestWarning
|
||||
|
||||
import sphinx
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
|
||||
|
||||
_USER_AGENT = (
|
||||
f'Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0 '
|
||||
f'Sphinx/{sphinx.__version__}'
|
||||
)
|
||||
|
||||
|
||||
class _IgnoredRedirection(Exception):
|
||||
"""Sphinx-internal exception raised when an HTTP redirect is ignored"""
|
||||
|
||||
def __init__(self, destination: str, status_code: int) -> None:
|
||||
self.destination = destination
|
||||
self.status_code = status_code
|
||||
|
||||
|
||||
def _get_tls_cacert(url: str, certs: str | dict[str, str] | None) -> str | bool:
|
||||
"""Get additional CA cert for a specific URL."""
|
||||
if not certs:
|
||||
@ -50,6 +64,23 @@ def head(url: str, **kwargs: Any) -> requests.Response:
|
||||
|
||||
|
||||
class _Session(requests.Session):
|
||||
_ignored_redirects: Sequence[re.Pattern[str]]
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
self._ignored_redirects = kwargs.pop('_ignored_redirects', ())
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def get_redirect_target(self, resp: requests.Response) -> str | None:
|
||||
"""Overrides the default requests.Session.get_redirect_target"""
|
||||
# do not follow redirections that match ignored URI patterns
|
||||
if resp.is_redirect:
|
||||
destination = urljoin(resp.url, resp.headers['location'])
|
||||
if any(pat.match(destination) for pat in self._ignored_redirects):
|
||||
raise _IgnoredRedirection(
|
||||
destination=destination, status_code=resp.status_code
|
||||
)
|
||||
return super().get_redirect_target(resp)
|
||||
|
||||
def request( # type: ignore[override]
|
||||
self,
|
||||
method: str,
|
||||
|
@ -926,7 +926,7 @@ class InfiniteRedirectOnHeadHandler(BaseHTTPRequestHandler):
|
||||
|
||||
def do_HEAD(self):
|
||||
self.send_response(302, 'Found')
|
||||
self.send_header('Location', '/')
|
||||
self.send_header('Location', '/redirected')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
|
||||
@ -966,6 +966,55 @@ def test_TooManyRedirects_on_HEAD(app, monkeypatch):
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver')
|
||||
def test_ignore_local_redirection(app):
|
||||
with serve_application(app, InfiniteRedirectOnHeadHandler) as address:
|
||||
app.config.linkcheck_ignore = [f'http://{address}/redirected']
|
||||
app.build()
|
||||
|
||||
with open(app.outdir / 'output.json', encoding='utf-8') as fp:
|
||||
content = json.load(fp)
|
||||
assert content == {
|
||||
'code': 302,
|
||||
'status': 'ignored',
|
||||
'filename': 'index.rst',
|
||||
'lineno': 1,
|
||||
'uri': f'http://{address}/',
|
||||
'info': f'ignored redirect: http://{address}/redirected',
|
||||
}
|
||||
|
||||
|
||||
class RemoteDomainRedirectHandler(InfiniteRedirectOnHeadHandler):
|
||||
protocol_version = 'HTTP/1.1'
|
||||
|
||||
def do_GET(self):
|
||||
self.send_response(301, 'Found')
|
||||
if self.path == '/':
|
||||
self.send_header('Location', '/local')
|
||||
elif self.path == '/local':
|
||||
self.send_header('Location', 'http://example.test/migrated')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
|
||||
|
||||
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver')
|
||||
def test_ignore_remote_redirection(app):
|
||||
with serve_application(app, RemoteDomainRedirectHandler) as address:
|
||||
app.config.linkcheck_ignore = ['http://example.test']
|
||||
app.build()
|
||||
|
||||
with open(app.outdir / 'output.json', encoding='utf-8') as fp:
|
||||
content = json.load(fp)
|
||||
assert content == {
|
||||
'code': 301,
|
||||
'status': 'ignored',
|
||||
'filename': 'index.rst',
|
||||
'lineno': 1,
|
||||
'uri': f'http://{address}/',
|
||||
'info': 'ignored redirect: http://example.test/migrated',
|
||||
}
|
||||
|
||||
|
||||
def make_retry_after_handler(
|
||||
responses: list[tuple[int, str | None]],
|
||||
) -> type[BaseHTTPRequestHandler]:
|
||||
|
Loading…
Reference in New Issue
Block a user