mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Enable reporting HTTP 401 hyperlinks as broken in linkcheck (#11684)
Co-authored-by: picnixz <10796600+picnixz@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
@@ -53,6 +53,10 @@ Bugs fixed
|
|||||||
* #11715: Apply ``tls_verify`` and ``tls_cacerts`` config to
|
* #11715: Apply ``tls_verify`` and ``tls_cacerts`` config to
|
||||||
``ImageDownloader``.
|
``ImageDownloader``.
|
||||||
Patch by Nick Touran.
|
Patch by Nick Touran.
|
||||||
|
* #11433: Added the ``linkcheck_allow_unauthorized`` configuration option.
|
||||||
|
Set this option to ``False`` to report HTTP 401 (unauthorized) server
|
||||||
|
responses as broken.
|
||||||
|
Patch by James Addison.
|
||||||
|
|
||||||
Testing
|
Testing
|
||||||
-------
|
-------
|
||||||
|
|||||||
@@ -2915,6 +2915,18 @@ Options for the linkcheck builder
|
|||||||
|
|
||||||
.. versionadded:: 4.4
|
.. versionadded:: 4.4
|
||||||
|
|
||||||
|
.. confval:: linkcheck_allow_unauthorized
|
||||||
|
|
||||||
|
When a webserver responds with an HTTP 401 (unauthorized) response, the
|
||||||
|
current default behaviour of Sphinx is to treat the link as "working". To
|
||||||
|
change that behaviour, set this option to ``False``.
|
||||||
|
|
||||||
|
The default value for this option will be changed in Sphinx 8.0; from that
|
||||||
|
version onwards, HTTP 401 responses to checked hyperlinks will be treated
|
||||||
|
as "broken" by default.
|
||||||
|
|
||||||
|
.. versionadded:: 7.3
|
||||||
|
|
||||||
|
|
||||||
Options for the XML builder
|
Options for the XML builder
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import time
|
import time
|
||||||
|
import warnings
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from os import path
|
from os import path
|
||||||
from queue import PriorityQueue, Queue
|
from queue import PriorityQueue, Queue
|
||||||
@@ -18,6 +19,7 @@ from docutils import nodes
|
|||||||
from requests.exceptions import ConnectionError, HTTPError, SSLError, TooManyRedirects
|
from requests.exceptions import ConnectionError, HTTPError, SSLError, TooManyRedirects
|
||||||
|
|
||||||
from sphinx.builders.dummy import DummyBuilder
|
from sphinx.builders.dummy import DummyBuilder
|
||||||
|
from sphinx.deprecation import RemovedInSphinx80Warning
|
||||||
from sphinx.locale import __
|
from sphinx.locale import __
|
||||||
from sphinx.transforms.post_transforms import SphinxPostTransform
|
from sphinx.transforms.post_transforms import SphinxPostTransform
|
||||||
from sphinx.util import encode_uri, logging, requests
|
from sphinx.util import encode_uri, logging, requests
|
||||||
@@ -66,6 +68,15 @@ class CheckExternalLinksBuilder(DummyBuilder):
|
|||||||
# set a timeout for non-responding servers
|
# set a timeout for non-responding servers
|
||||||
socket.setdefaulttimeout(5.0)
|
socket.setdefaulttimeout(5.0)
|
||||||
|
|
||||||
|
if not self.config.linkcheck_allow_unauthorized:
|
||||||
|
deprecation_msg = (
|
||||||
|
"The default value for 'linkcheck_allow_unauthorized' will change "
|
||||||
|
"from `True` in Sphinx 7.3+ to `False`, meaning that HTTP 401 "
|
||||||
|
"unauthorized responses will be reported as broken by default. "
|
||||||
|
"See https://github.com/sphinx-doc/sphinx/issues/11433 for details."
|
||||||
|
)
|
||||||
|
warnings.warn(deprecation_msg, RemovedInSphinx80Warning, stacklevel=1)
|
||||||
|
|
||||||
def finish(self) -> None:
|
def finish(self) -> None:
|
||||||
checker = HyperlinkAvailabilityChecker(self.config)
|
checker = HyperlinkAvailabilityChecker(self.config)
|
||||||
logger.info('')
|
logger.info('')
|
||||||
@@ -283,6 +294,7 @@ class HyperlinkAvailabilityCheckWorker(Thread):
|
|||||||
self.allowed_redirects = config.linkcheck_allowed_redirects
|
self.allowed_redirects = config.linkcheck_allowed_redirects
|
||||||
self.retries: int = config.linkcheck_retries
|
self.retries: int = config.linkcheck_retries
|
||||||
self.rate_limit_timeout = config.linkcheck_rate_limit_timeout
|
self.rate_limit_timeout = config.linkcheck_rate_limit_timeout
|
||||||
|
self._allow_unauthorized = config.linkcheck_allow_unauthorized
|
||||||
|
|
||||||
self.user_agent = config.user_agent
|
self.user_agent = config.user_agent
|
||||||
self.tls_verify = config.tls_verify
|
self.tls_verify = config.tls_verify
|
||||||
@@ -437,9 +449,30 @@ class HyperlinkAvailabilityCheckWorker(Thread):
|
|||||||
except HTTPError as err:
|
except HTTPError as err:
|
||||||
error_message = str(err)
|
error_message = str(err)
|
||||||
|
|
||||||
# Unauthorised: the reference probably exists
|
# Unauthorized: the client did not provide required credentials
|
||||||
if status_code == 401:
|
if status_code == 401:
|
||||||
return 'working', 'unauthorized', 0
|
if self._allow_unauthorized:
|
||||||
|
deprecation_msg = (
|
||||||
|
"\n---\n"
|
||||||
|
"The linkcheck builder encountered an HTTP 401 "
|
||||||
|
"(unauthorized) response, and will report it as "
|
||||||
|
"'working' in this version of Sphinx to maintain "
|
||||||
|
"backwards-compatibility."
|
||||||
|
"\n"
|
||||||
|
"This logic will change in Sphinx 8.0 which will "
|
||||||
|
"report the hyperlink as 'broken'."
|
||||||
|
"\n"
|
||||||
|
"To explicitly continue treating unauthorized "
|
||||||
|
"hyperlink responses as 'working', set the "
|
||||||
|
"'linkcheck_allow_unauthorized' config option to "
|
||||||
|
"``True``."
|
||||||
|
"\n"
|
||||||
|
"See sphinx-doc/sphinx#11433 for details."
|
||||||
|
"\n---"
|
||||||
|
)
|
||||||
|
warnings.warn(deprecation_msg, RemovedInSphinx80Warning, stacklevel=1)
|
||||||
|
status = 'working' if self._allow_unauthorized else 'broken'
|
||||||
|
return status, 'unauthorized', 0
|
||||||
|
|
||||||
# Rate limiting; back-off if allowed, or report failure otherwise
|
# Rate limiting; back-off if allowed, or report failure otherwise
|
||||||
if status_code == 429:
|
if status_code == 429:
|
||||||
@@ -625,6 +658,7 @@ def setup(app: Sphinx) -> dict[str, Any]:
|
|||||||
app.add_config_value('linkcheck_anchors_ignore', ['^!'], '')
|
app.add_config_value('linkcheck_anchors_ignore', ['^!'], '')
|
||||||
app.add_config_value('linkcheck_anchors_ignore_for_url', (), '', (tuple, list))
|
app.add_config_value('linkcheck_anchors_ignore_for_url', (), '', (tuple, list))
|
||||||
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, '')
|
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, '')
|
||||||
|
app.add_config_value('linkcheck_allow_unauthorized', True, '')
|
||||||
|
|
||||||
app.add_event('linkcheck-process-uri')
|
app.add_event('linkcheck-process-uri')
|
||||||
|
|
||||||
|
|||||||
@@ -348,8 +348,12 @@ def custom_handler(valid_credentials=(), success_criteria=lambda _: True):
|
|||||||
|
|
||||||
def authenticated(method):
|
def authenticated(method):
|
||||||
def method_if_authenticated(self):
|
def method_if_authenticated(self):
|
||||||
if (expected_token is None
|
if expected_token is None:
|
||||||
or self.headers["Authorization"] == f"Basic {expected_token}"):
|
return method(self)
|
||||||
|
elif not self.headers["Authorization"]:
|
||||||
|
self.send_response(401, "Unauthorized")
|
||||||
|
self.end_headers()
|
||||||
|
elif self.headers["Authorization"] == f"Basic {expected_token}":
|
||||||
return method(self)
|
return method(self)
|
||||||
else:
|
else:
|
||||||
self.send_response(403, "Forbidden")
|
self.send_response(403, "Forbidden")
|
||||||
@@ -392,6 +396,21 @@ def test_auth_header_uses_first_match(app):
|
|||||||
assert content["status"] == "working"
|
assert content["status"] == "working"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.filterwarnings('ignore::sphinx.deprecation.RemovedInSphinx80Warning')
|
||||||
|
@pytest.mark.sphinx(
|
||||||
|
'linkcheck', testroot='linkcheck-localserver', freshenv=True,
|
||||||
|
confoverrides={'linkcheck_allow_unauthorized': False})
|
||||||
|
def test_unauthorized_broken(app):
|
||||||
|
with http_server(custom_handler(valid_credentials=("user1", "password"))):
|
||||||
|
app.build()
|
||||||
|
|
||||||
|
with open(app.outdir / "output.json", encoding="utf-8") as fp:
|
||||||
|
content = json.load(fp)
|
||||||
|
|
||||||
|
assert content["info"] == "unauthorized"
|
||||||
|
assert content["status"] == "broken"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.sphinx(
|
@pytest.mark.sphinx(
|
||||||
'linkcheck', testroot='linkcheck-localserver', freshenv=True,
|
'linkcheck', testroot='linkcheck-localserver', freshenv=True,
|
||||||
confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]})
|
confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]})
|
||||||
@@ -402,10 +421,9 @@ def test_auth_header_no_match(app):
|
|||||||
with open(app.outdir / "output.json", encoding="utf-8") as fp:
|
with open(app.outdir / "output.json", encoding="utf-8") as fp:
|
||||||
content = json.load(fp)
|
content = json.load(fp)
|
||||||
|
|
||||||
# TODO: should this test's webserver return HTTP 401 here?
|
# This link is considered working based on the default linkcheck_allow_unauthorized=true
|
||||||
# https://github.com/sphinx-doc/sphinx/issues/11433
|
assert content["info"] == "unauthorized"
|
||||||
assert content["info"] == "403 Client Error: Forbidden for url: http://localhost:7777/"
|
assert content["status"] == "working"
|
||||||
assert content["status"] == "broken"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.sphinx(
|
@pytest.mark.sphinx(
|
||||||
|
|||||||
Reference in New Issue
Block a user