Enable reporting HTTP 401 hyperlinks as broken in linkcheck (#11684)

Co-authored-by: picnixz <10796600+picnixz@users.noreply.github.com>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
James Addison
2024-01-09 12:33:40 +00:00
committed by GitHub
parent 6ac7cdbb02
commit 5211c01646
4 changed files with 76 additions and 8 deletions

View File

@@ -53,6 +53,10 @@ Bugs fixed
* #11715: Apply ``tls_verify`` and ``tls_cacerts`` config to * #11715: Apply ``tls_verify`` and ``tls_cacerts`` config to
``ImageDownloader``. ``ImageDownloader``.
Patch by Nick Touran. Patch by Nick Touran.
* #11433: Added the ``linkcheck_allow_unauthorized`` configuration option.
Set this option to ``False`` to report HTTP 401 (unauthorized) server
responses as broken.
Patch by James Addison.
Testing Testing
------- -------

View File

@@ -2915,6 +2915,18 @@ Options for the linkcheck builder
.. versionadded:: 4.4 .. versionadded:: 4.4
.. confval:: linkcheck_allow_unauthorized
When a webserver responds with an HTTP 401 (unauthorized) response, the
current default behaviour of Sphinx is to treat the link as "working". To
change that behaviour, set this option to ``False``.
The default value for this option will be changed in Sphinx 8.0; from that
version onwards, HTTP 401 responses to checked hyperlinks will be treated
as "broken" by default.
.. versionadded:: 7.3
Options for the XML builder Options for the XML builder
--------------------------- ---------------------------

View File

@@ -7,6 +7,7 @@ import json
import re import re
import socket import socket
import time import time
import warnings
from html.parser import HTMLParser from html.parser import HTMLParser
from os import path from os import path
from queue import PriorityQueue, Queue from queue import PriorityQueue, Queue
@@ -18,6 +19,7 @@ from docutils import nodes
from requests.exceptions import ConnectionError, HTTPError, SSLError, TooManyRedirects from requests.exceptions import ConnectionError, HTTPError, SSLError, TooManyRedirects
from sphinx.builders.dummy import DummyBuilder from sphinx.builders.dummy import DummyBuilder
from sphinx.deprecation import RemovedInSphinx80Warning
from sphinx.locale import __ from sphinx.locale import __
from sphinx.transforms.post_transforms import SphinxPostTransform from sphinx.transforms.post_transforms import SphinxPostTransform
from sphinx.util import encode_uri, logging, requests from sphinx.util import encode_uri, logging, requests
@@ -66,6 +68,15 @@ class CheckExternalLinksBuilder(DummyBuilder):
# set a timeout for non-responding servers # set a timeout for non-responding servers
socket.setdefaulttimeout(5.0) socket.setdefaulttimeout(5.0)
if not self.config.linkcheck_allow_unauthorized:
deprecation_msg = (
"The default value for 'linkcheck_allow_unauthorized' will change "
"from `True` in Sphinx 7.3+ to `False`, meaning that HTTP 401 "
"unauthorized responses will be reported as broken by default. "
"See https://github.com/sphinx-doc/sphinx/issues/11433 for details."
)
warnings.warn(deprecation_msg, RemovedInSphinx80Warning, stacklevel=1)
def finish(self) -> None: def finish(self) -> None:
checker = HyperlinkAvailabilityChecker(self.config) checker = HyperlinkAvailabilityChecker(self.config)
logger.info('') logger.info('')
@@ -283,6 +294,7 @@ class HyperlinkAvailabilityCheckWorker(Thread):
self.allowed_redirects = config.linkcheck_allowed_redirects self.allowed_redirects = config.linkcheck_allowed_redirects
self.retries: int = config.linkcheck_retries self.retries: int = config.linkcheck_retries
self.rate_limit_timeout = config.linkcheck_rate_limit_timeout self.rate_limit_timeout = config.linkcheck_rate_limit_timeout
self._allow_unauthorized = config.linkcheck_allow_unauthorized
self.user_agent = config.user_agent self.user_agent = config.user_agent
self.tls_verify = config.tls_verify self.tls_verify = config.tls_verify
@@ -437,9 +449,30 @@ class HyperlinkAvailabilityCheckWorker(Thread):
except HTTPError as err: except HTTPError as err:
error_message = str(err) error_message = str(err)
# Unauthorised: the reference probably exists # Unauthorized: the client did not provide required credentials
if status_code == 401: if status_code == 401:
return 'working', 'unauthorized', 0 if self._allow_unauthorized:
deprecation_msg = (
"\n---\n"
"The linkcheck builder encountered an HTTP 401 "
"(unauthorized) response, and will report it as "
"'working' in this version of Sphinx to maintain "
"backwards-compatibility."
"\n"
"This logic will change in Sphinx 8.0 which will "
"report the hyperlink as 'broken'."
"\n"
"To explicitly continue treating unauthorized "
"hyperlink responses as 'working', set the "
"'linkcheck_allow_unauthorized' config option to "
"``True``."
"\n"
"See sphinx-doc/sphinx#11433 for details."
"\n---"
)
warnings.warn(deprecation_msg, RemovedInSphinx80Warning, stacklevel=1)
status = 'working' if self._allow_unauthorized else 'broken'
return status, 'unauthorized', 0
# Rate limiting; back-off if allowed, or report failure otherwise # Rate limiting; back-off if allowed, or report failure otherwise
if status_code == 429: if status_code == 429:
@@ -625,6 +658,7 @@ def setup(app: Sphinx) -> dict[str, Any]:
app.add_config_value('linkcheck_anchors_ignore', ['^!'], '') app.add_config_value('linkcheck_anchors_ignore', ['^!'], '')
app.add_config_value('linkcheck_anchors_ignore_for_url', (), '', (tuple, list)) app.add_config_value('linkcheck_anchors_ignore_for_url', (), '', (tuple, list))
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, '') app.add_config_value('linkcheck_rate_limit_timeout', 300.0, '')
app.add_config_value('linkcheck_allow_unauthorized', True, '')
app.add_event('linkcheck-process-uri') app.add_event('linkcheck-process-uri')

View File

@@ -348,8 +348,12 @@ def custom_handler(valid_credentials=(), success_criteria=lambda _: True):
def authenticated(method): def authenticated(method):
def method_if_authenticated(self): def method_if_authenticated(self):
if (expected_token is None if expected_token is None:
or self.headers["Authorization"] == f"Basic {expected_token}"): return method(self)
elif not self.headers["Authorization"]:
self.send_response(401, "Unauthorized")
self.end_headers()
elif self.headers["Authorization"] == f"Basic {expected_token}":
return method(self) return method(self)
else: else:
self.send_response(403, "Forbidden") self.send_response(403, "Forbidden")
@@ -392,6 +396,21 @@ def test_auth_header_uses_first_match(app):
assert content["status"] == "working" assert content["status"] == "working"
@pytest.mark.filterwarnings('ignore::sphinx.deprecation.RemovedInSphinx80Warning')
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver', freshenv=True,
confoverrides={'linkcheck_allow_unauthorized': False})
def test_unauthorized_broken(app):
with http_server(custom_handler(valid_credentials=("user1", "password"))):
app.build()
with open(app.outdir / "output.json", encoding="utf-8") as fp:
content = json.load(fp)
assert content["info"] == "unauthorized"
assert content["status"] == "broken"
@pytest.mark.sphinx( @pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver', freshenv=True, 'linkcheck', testroot='linkcheck-localserver', freshenv=True,
confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]}) confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]})
@@ -402,10 +421,9 @@ def test_auth_header_no_match(app):
with open(app.outdir / "output.json", encoding="utf-8") as fp: with open(app.outdir / "output.json", encoding="utf-8") as fp:
content = json.load(fp) content = json.load(fp)
# TODO: should this test's webserver return HTTP 401 here? # This link is considered working based on the default linkcheck_allow_unauthorized=true
# https://github.com/sphinx-doc/sphinx/issues/11433 assert content["info"] == "unauthorized"
assert content["info"] == "403 Client Error: Forbidden for url: http://localhost:7777/" assert content["status"] == "working"
assert content["status"] == "broken"
@pytest.mark.sphinx( @pytest.mark.sphinx(