From df2f80c7553714397d5ee09b43025e0df9eefa4a Mon Sep 17 00:00:00 2001 From: Jonas Haag Date: Wed, 13 Nov 2019 15:39:47 +0100 Subject: [PATCH] Add linkcheck_auth option --- CHANGES | 2 ++ doc/usage/configuration.rst | 30 +++++++++++++++++++++++++++++- sphinx/builders/linkcheck.py | 17 ++++++++++++++--- tests/test_build_linkcheck.py | 23 +++++++++++++++++++++++ 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/CHANGES b/CHANGES index cf7231b7d..8256a8a5d 100644 --- a/CHANGES +++ b/CHANGES @@ -39,6 +39,8 @@ Features added * #2546: apidoc: .so file support * #6798: autosummary: emit ``autodoc-skip-member`` event on generating stub file * #6483: i18n: make explicit titles in toctree translatable +* #6816: linkcheck: Add :confval:`linkcheck_auth` option to provide + authentication information when doing ``linkcheck`` builds Bugs fixed ---------- diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index bb1c639f9..8a253d8ce 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -538,7 +538,7 @@ General configuration directory pointed ``REQUESTS_CA_BUNDLE`` environment variable if ``tls_cacerts`` not set. - .. _requests: http://docs.python-requests.org/en/master/ + .. _requests: https://requests.readthedocs.io/en/master/ .. confval:: today today_fmt @@ -2369,6 +2369,34 @@ Options for the linkcheck builder .. versionadded:: 1.5 +.. confval:: linkcheck_auth + + Pass authentication information when doing a ``linkcheck`` build. + + A list of ``(regex_pattern, auth_info)`` tuples where the items are: + + *regex_pattern* + A regular expression that matches a URI. + *auth_info* + Authentication information to use for that URI. The value can be anything + that is understood by the ``requests`` library (see `requests + Authentication `_ for details). + + .. _requests-auth: https://requests.readthedocs.io/en/master/user/authentication/ + + The ``linkcheck`` builder will use the first matching ``auth_info`` value + it can find in the :confval:`linkcheck_auth` list, so values earlier in the + list have higher priority. + + Example:: + + linkcheck_auth = [ + ('https://foo\.yourcompany\.com/.+', ('johndoe', 'secret')), + ('https://.+\.yourcompany\.com/.+', HTTPDigestAuth(...)), + ] + + .. versionadded:: 2.3 + Options for the XML builder --------------------------- diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 737079a02..acca8998a 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -81,6 +81,8 @@ class CheckExternalLinksBuilder(Builder): self.to_ignore = [re.compile(x) for x in self.app.config.linkcheck_ignore] self.anchors_ignore = [re.compile(x) for x in self.app.config.linkcheck_anchors_ignore] + self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info + in self.app.config.linkcheck_auth] self.good = set() # type: Set[str] self.broken = {} # type: Dict[str, str] self.redirected = {} # type: Dict[str, Tuple[str, int]] @@ -127,11 +129,18 @@ class CheckExternalLinksBuilder(Builder): except UnicodeError: req_url = encode_uri(req_url) + # Get auth info, if any + for pattern, auth_info in self.auth: + if pattern.match(uri): + break + else: + auth_info = None + try: if anchor and self.app.config.linkcheck_anchors: # Read the whole document and see if #anchor exists response = requests.get(req_url, stream=True, config=self.app.config, - **kwargs) + auth=auth_info, **kwargs) found = check_anchor(response, unquote(anchor)) if not found: @@ -140,13 +149,14 @@ class CheckExternalLinksBuilder(Builder): try: # try a HEAD request first, which should be easier on # the server and the network - response = requests.head(req_url, config=self.app.config, **kwargs) + response = requests.head(req_url, config=self.app.config, + auth=auth_info, **kwargs) response.raise_for_status() except HTTPError: # retry with GET request if that fails, some servers # don't like HEAD requests. response = requests.get(req_url, stream=True, config=self.app.config, - **kwargs) + auth=auth_info, **kwargs) response.raise_for_status() except HTTPError as err: if err.response.status_code == 401: @@ -305,6 +315,7 @@ def setup(app: Sphinx) -> Dict[str, Any]: app.add_builder(CheckExternalLinksBuilder) app.add_config_value('linkcheck_ignore', [], None) + app.add_config_value('linkcheck_auth', [], None) app.add_config_value('linkcheck_retries', 1, None) app.add_config_value('linkcheck_timeout', None, None, [int]) app.add_config_value('linkcheck_workers', 5, None) diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 4bf47a962..22866b27b 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -8,6 +8,7 @@ :license: BSD, see LICENSE for details. """ +from unittest import mock import pytest @@ -47,3 +48,25 @@ def test_anchors_ignored(app, status, warning): # expect all ok when excluding #top assert not content + + +@pytest.mark.sphinx( + 'linkcheck', testroot='linkcheck', freshenv=True, + confoverrides={'linkcheck_auth': [ + (r'.+google\.com/image.+', 'authinfo1'), + (r'.+google\.com.+', 'authinfo2'), + ] + }) +def test_auth(app, status, warning): + mock_req = mock.MagicMock() + mock_req.return_value = 'fake-response' + + with mock.patch.multiple('requests', get=mock_req, head=mock_req): + app.builder.build_all() + for c_args, c_kwargs in mock_req.call_args_list: + if 'google.com/image' in c_args[0]: + assert c_kwargs['auth'] == 'authinfo1' + elif 'google.com' in c_args[0]: + assert c_kwargs['auth'] == 'authinfo2' + else: + assert not c_kwargs['auth']