Merge pull request #7762 from tk0miya/7247_linkcheck_request_headers

Close #7247: linkcheck: Add linkcheck_request_headers
This commit is contained in:
Takeshi KOMIYA 2020-06-05 01:17:31 +09:00 committed by GitHub
commit dce45413e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 87 additions and 5 deletions

View File

@ -84,6 +84,8 @@ Features added
of ``foo[=bar]``
* #7582: napoleon: a type for attribute are represented like type annotation
* #7734: napoleon: overescaped trailing underscore on attribute
* #7247: linkcheck: Add :confval:`linkcheck_request_headers` to send custom HTTP
headers for specific host
* #7683: Add ``allowed_exceptions`` parameter to ``Sphinx.emit()`` to allow
handlers to raise specified exceptions
* #7295: C++, parse (trailing) requires clauses.

View File

@ -2390,6 +2390,32 @@ Options for the linkcheck builder
.. versionadded:: 1.1
.. confval:: linkcheck_request_headers
A dictionary that maps baseurls to HTTP request headers.
The key is a URL base string like ``"https://sphinx-doc.org/"``. To specify
headers for other hosts, ``"*"`` can be used. It matches all hosts only when
the URL does not match other settings.
The value is a dictionary that maps header name to its value.
Example:
.. code-block:: python
linkcheck_request_headers = {
"https://sphinx-doc.org/": {
"Accept": "text/html",
"Accept-Encoding": "utf-8",
},
"*": {
"Accept": "text/html,application/xhtml+xml",
}
}
.. versionadded:: 3.1
.. confval:: linkcheck_retries
The number of times the linkcheck builder will attempt to check a URL before

View File

@ -16,7 +16,7 @@ import threading
from html.parser import HTMLParser
from os import path
from typing import Any, Dict, List, Set, Tuple
from urllib.parse import unquote
from urllib.parse import unquote, urlparse
from docutils import nodes
from docutils.nodes import Node
@ -36,6 +36,11 @@ from sphinx.util.requests import is_ssl_error
logger = logging.getLogger(__name__)
DEFAULT_REQUEST_HEADERS = {
'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8',
}
class AnchorCheckParser(HTMLParser):
"""Specialized HTML parser that looks for a specific anchor."""
@ -107,13 +112,25 @@ class CheckExternalLinksBuilder(Builder):
def check_thread(self) -> None:
kwargs = {
'allow_redirects': True,
'headers': {
'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8',
},
}
} # type: Dict
if self.app.config.linkcheck_timeout:
kwargs['timeout'] = self.app.config.linkcheck_timeout
def get_request_headers() -> Dict:
url = urlparse(uri)
candidates = ["%s://%s" % (url.scheme, url.netloc),
"%s://%s/" % (url.scheme, url.netloc),
uri,
"*"]
for u in candidates:
if u in self.config.linkcheck_request_headers:
headers = dict(DEFAULT_REQUEST_HEADERS)
headers.update(self.config.linkcheck_request_headers[u])
return headers
return {}
def check_uri() -> Tuple[str, str, int]:
# split off anchor
if '#' in uri:
@ -139,6 +156,9 @@ class CheckExternalLinksBuilder(Builder):
else:
auth_info = None
# update request headers for the URL
kwargs['headers'] = get_request_headers()
try:
if anchor and self.app.config.linkcheck_anchors:
# Read the whole document and see if #anchor exists
@ -337,6 +357,7 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value('linkcheck_ignore', [], None)
app.add_config_value('linkcheck_auth', [], None)
app.add_config_value('linkcheck_request_headers', {}, None)
app.add_config_value('linkcheck_retries', 1, None)
app.add_config_value('linkcheck_timeout', None, None, [int])
app.add_config_value('linkcheck_workers', 5, None)

View File

@ -124,3 +124,36 @@ def test_auth(app, status, warning):
assert c_kwargs['auth'] == 'authinfo2'
else:
assert not c_kwargs['auth']
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck', freshenv=True,
confoverrides={'linkcheck_request_headers': {
"https://localhost:7777/": {
"Accept": "text/html",
},
"http://www.sphinx-doc.org": { # no slash at the end
"Accept": "application/json",
},
"*": {
"X-Secret": "open sesami",
}
}})
def test_linkcheck_request_headers(app, status, warning):
mock_req = mock.MagicMock()
mock_req.return_value = 'fake-response'
with mock.patch.multiple('requests', get=mock_req, head=mock_req):
app.builder.build_all()
for args, kwargs in mock_req.call_args_list:
url = args[0]
headers = kwargs.get('headers', {})
if "https://localhost:7777" in url:
assert headers["Accept"] == "text/html"
elif 'http://www.sphinx-doc.org' in url:
assert headers["Accept"] == "application/json"
elif 'https://www.google.com' in url:
assert headers["Accept"] == "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8"
assert headers["X-Secret"] == "open sesami"
else:
assert headers["Accept"] == "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8"