Enable reporting HTTP 401 hyperlinks as broken in linkcheck (#11684)

Co-authored-by: picnixz <10796600+picnixz@users.noreply.github.com>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
James Addison 2024-01-09 12:33:40 +00:00 committed by GitHub
parent 6ac7cdbb02
commit 5211c01646
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 76 additions and 8 deletions

View File

@ -53,6 +53,10 @@ Bugs fixed
* #11715: Apply ``tls_verify`` and ``tls_cacerts`` config to
``ImageDownloader``.
Patch by Nick Touran.
* #11433: Added the ``linkcheck_allow_unauthorized`` configuration option.
Set this option to ``False`` to report HTTP 401 (unauthorized) server
responses as broken.
Patch by James Addison.
Testing
-------

View File

@ -2915,6 +2915,18 @@ Options for the linkcheck builder
.. versionadded:: 4.4
.. confval:: linkcheck_allow_unauthorized
When a webserver responds with an HTTP 401 (unauthorized) response, the
current default behaviour of Sphinx is to treat the link as "working". To
change that behaviour, set this option to ``False``.
The default value for this option will be changed in Sphinx 8.0; from that
version onwards, HTTP 401 responses to checked hyperlinks will be treated
as "broken" by default.
.. versionadded:: 7.3
Options for the XML builder
---------------------------

View File

@ -7,6 +7,7 @@ import json
import re
import socket
import time
import warnings
from html.parser import HTMLParser
from os import path
from queue import PriorityQueue, Queue
@ -18,6 +19,7 @@ from docutils import nodes
from requests.exceptions import ConnectionError, HTTPError, SSLError, TooManyRedirects
from sphinx.builders.dummy import DummyBuilder
from sphinx.deprecation import RemovedInSphinx80Warning
from sphinx.locale import __
from sphinx.transforms.post_transforms import SphinxPostTransform
from sphinx.util import encode_uri, logging, requests
@ -66,6 +68,15 @@ class CheckExternalLinksBuilder(DummyBuilder):
# set a timeout for non-responding servers
socket.setdefaulttimeout(5.0)
if not self.config.linkcheck_allow_unauthorized:
deprecation_msg = (
"The default value for 'linkcheck_allow_unauthorized' will change "
"from `True` in Sphinx 7.3+ to `False`, meaning that HTTP 401 "
"unauthorized responses will be reported as broken by default. "
"See https://github.com/sphinx-doc/sphinx/issues/11433 for details."
)
warnings.warn(deprecation_msg, RemovedInSphinx80Warning, stacklevel=1)
def finish(self) -> None:
checker = HyperlinkAvailabilityChecker(self.config)
logger.info('')
@ -283,6 +294,7 @@ class HyperlinkAvailabilityCheckWorker(Thread):
self.allowed_redirects = config.linkcheck_allowed_redirects
self.retries: int = config.linkcheck_retries
self.rate_limit_timeout = config.linkcheck_rate_limit_timeout
self._allow_unauthorized = config.linkcheck_allow_unauthorized
self.user_agent = config.user_agent
self.tls_verify = config.tls_verify
@ -437,9 +449,30 @@ class HyperlinkAvailabilityCheckWorker(Thread):
except HTTPError as err:
error_message = str(err)
# Unauthorised: the reference probably exists
# Unauthorized: the client did not provide required credentials
if status_code == 401:
return 'working', 'unauthorized', 0
if self._allow_unauthorized:
deprecation_msg = (
"\n---\n"
"The linkcheck builder encountered an HTTP 401 "
"(unauthorized) response, and will report it as "
"'working' in this version of Sphinx to maintain "
"backwards-compatibility."
"\n"
"This logic will change in Sphinx 8.0 which will "
"report the hyperlink as 'broken'."
"\n"
"To explicitly continue treating unauthorized "
"hyperlink responses as 'working', set the "
"'linkcheck_allow_unauthorized' config option to "
"``True``."
"\n"
"See sphinx-doc/sphinx#11433 for details."
"\n---"
)
warnings.warn(deprecation_msg, RemovedInSphinx80Warning, stacklevel=1)
status = 'working' if self._allow_unauthorized else 'broken'
return status, 'unauthorized', 0
# Rate limiting; back-off if allowed, or report failure otherwise
if status_code == 429:
@ -625,6 +658,7 @@ def setup(app: Sphinx) -> dict[str, Any]:
app.add_config_value('linkcheck_anchors_ignore', ['^!'], '')
app.add_config_value('linkcheck_anchors_ignore_for_url', (), '', (tuple, list))
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, '')
app.add_config_value('linkcheck_allow_unauthorized', True, '')
app.add_event('linkcheck-process-uri')

View File

@ -348,8 +348,12 @@ def custom_handler(valid_credentials=(), success_criteria=lambda _: True):
def authenticated(method):
def method_if_authenticated(self):
if (expected_token is None
or self.headers["Authorization"] == f"Basic {expected_token}"):
if expected_token is None:
return method(self)
elif not self.headers["Authorization"]:
self.send_response(401, "Unauthorized")
self.end_headers()
elif self.headers["Authorization"] == f"Basic {expected_token}":
return method(self)
else:
self.send_response(403, "Forbidden")
@ -392,6 +396,21 @@ def test_auth_header_uses_first_match(app):
assert content["status"] == "working"
@pytest.mark.filterwarnings('ignore::sphinx.deprecation.RemovedInSphinx80Warning')
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver', freshenv=True,
confoverrides={'linkcheck_allow_unauthorized': False})
def test_unauthorized_broken(app):
with http_server(custom_handler(valid_credentials=("user1", "password"))):
app.build()
with open(app.outdir / "output.json", encoding="utf-8") as fp:
content = json.load(fp)
assert content["info"] == "unauthorized"
assert content["status"] == "broken"
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver', freshenv=True,
confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]})
@ -402,10 +421,9 @@ def test_auth_header_no_match(app):
with open(app.outdir / "output.json", encoding="utf-8") as fp:
content = json.load(fp)
# TODO: should this test's webserver return HTTP 401 here?
# https://github.com/sphinx-doc/sphinx/issues/11433
assert content["info"] == "403 Client Error: Forbidden for url: http://localhost:7777/"
assert content["status"] == "broken"
# This link is considered working based on the default linkcheck_allow_unauthorized=true
assert content["info"] == "unauthorized"
assert content["status"] == "working"
@pytest.mark.sphinx(