diff --git a/CHANGES b/CHANGES index ef1d36002..78dc38213 100644 --- a/CHANGES +++ b/CHANGES @@ -84,6 +84,8 @@ Features added of ``foo[=bar]`` * #7582: napoleon: a type for attribute are represented like type annotation * #7734: napoleon: overescaped trailing underscore on attribute +* #7247: linkcheck: Add :confval:`linkcheck_request_headers` to send custom HTTP + headers for specific host * #7683: Add ``allowed_exceptions`` parameter to ``Sphinx.emit()`` to allow handlers to raise specified exceptions * #7295: C++, parse (trailing) requires clauses. diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index bc483fa1c..cdcc2a561 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -2390,6 +2390,32 @@ Options for the linkcheck builder .. versionadded:: 1.1 +.. confval:: linkcheck_request_headers + + A dictionary that maps baseurls to HTTP request headers. + + The key is a URL base string like ``"https://sphinx-doc.org/"``. To specify + headers for other hosts, ``"*"`` can be used. It matches all hosts only when + the URL does not match other settings. + + The value is a dictionary that maps header name to its value. + + Example: + + .. code-block:: python + + linkcheck_request_headers = { + "https://sphinx-doc.org/": { + "Accept": "text/html", + "Accept-Encoding": "utf-8", + }, + "*": { + "Accept": "text/html,application/xhtml+xml", + } + } + + .. versionadded:: 3.1 + .. confval:: linkcheck_retries The number of times the linkcheck builder will attempt to check a URL before diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 9fe689ec9..dd5317087 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -16,7 +16,7 @@ import threading from html.parser import HTMLParser from os import path from typing import Any, Dict, List, Set, Tuple -from urllib.parse import unquote +from urllib.parse import unquote, urlparse from docutils import nodes from docutils.nodes import Node @@ -36,6 +36,11 @@ from sphinx.util.requests import is_ssl_error logger = logging.getLogger(__name__) +DEFAULT_REQUEST_HEADERS = { + 'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8', +} + + class AnchorCheckParser(HTMLParser): """Specialized HTML parser that looks for a specific anchor.""" @@ -107,13 +112,25 @@ class CheckExternalLinksBuilder(Builder): def check_thread(self) -> None: kwargs = { 'allow_redirects': True, - 'headers': { - 'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8', - }, - } + } # type: Dict if self.app.config.linkcheck_timeout: kwargs['timeout'] = self.app.config.linkcheck_timeout + def get_request_headers() -> Dict: + url = urlparse(uri) + candidates = ["%s://%s" % (url.scheme, url.netloc), + "%s://%s/" % (url.scheme, url.netloc), + uri, + "*"] + + for u in candidates: + if u in self.config.linkcheck_request_headers: + headers = dict(DEFAULT_REQUEST_HEADERS) + headers.update(self.config.linkcheck_request_headers[u]) + return headers + + return {} + def check_uri() -> Tuple[str, str, int]: # split off anchor if '#' in uri: @@ -139,6 +156,9 @@ class CheckExternalLinksBuilder(Builder): else: auth_info = None + # update request headers for the URL + kwargs['headers'] = get_request_headers() + try: if anchor and self.app.config.linkcheck_anchors: # Read the whole document and see if #anchor exists @@ -337,6 +357,7 @@ def setup(app: Sphinx) -> Dict[str, Any]: app.add_config_value('linkcheck_ignore', [], None) app.add_config_value('linkcheck_auth', [], None) + app.add_config_value('linkcheck_request_headers', {}, None) app.add_config_value('linkcheck_retries', 1, None) app.add_config_value('linkcheck_timeout', None, None, [int]) app.add_config_value('linkcheck_workers', 5, None) diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 54bde6b68..d1fec550f 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -124,3 +124,36 @@ def test_auth(app, status, warning): assert c_kwargs['auth'] == 'authinfo2' else: assert not c_kwargs['auth'] + + +@pytest.mark.sphinx( + 'linkcheck', testroot='linkcheck', freshenv=True, + confoverrides={'linkcheck_request_headers': { + "https://localhost:7777/": { + "Accept": "text/html", + }, + "http://www.sphinx-doc.org": { # no slash at the end + "Accept": "application/json", + }, + "*": { + "X-Secret": "open sesami", + } + }}) +def test_linkcheck_request_headers(app, status, warning): + mock_req = mock.MagicMock() + mock_req.return_value = 'fake-response' + + with mock.patch.multiple('requests', get=mock_req, head=mock_req): + app.builder.build_all() + for args, kwargs in mock_req.call_args_list: + url = args[0] + headers = kwargs.get('headers', {}) + if "https://localhost:7777" in url: + assert headers["Accept"] == "text/html" + elif 'http://www.sphinx-doc.org' in url: + assert headers["Accept"] == "application/json" + elif 'https://www.google.com' in url: + assert headers["Accept"] == "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8" + assert headers["X-Secret"] == "open sesami" + else: + assert headers["Accept"] == "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8"