mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Merge pull request #7762 from tk0miya/7247_linkcheck_request_headers
Close #7247: linkcheck: Add linkcheck_request_headers
This commit is contained in:
commit
dce45413e6
2
CHANGES
2
CHANGES
@ -84,6 +84,8 @@ Features added
|
||||
of ``foo[=bar]``
|
||||
* #7582: napoleon: a type for attribute are represented like type annotation
|
||||
* #7734: napoleon: overescaped trailing underscore on attribute
|
||||
* #7247: linkcheck: Add :confval:`linkcheck_request_headers` to send custom HTTP
|
||||
headers for specific host
|
||||
* #7683: Add ``allowed_exceptions`` parameter to ``Sphinx.emit()`` to allow
|
||||
handlers to raise specified exceptions
|
||||
* #7295: C++, parse (trailing) requires clauses.
|
||||
|
@ -2390,6 +2390,32 @@ Options for the linkcheck builder
|
||||
|
||||
.. versionadded:: 1.1
|
||||
|
||||
.. confval:: linkcheck_request_headers
|
||||
|
||||
A dictionary that maps baseurls to HTTP request headers.
|
||||
|
||||
The key is a URL base string like ``"https://sphinx-doc.org/"``. To specify
|
||||
headers for other hosts, ``"*"`` can be used. It matches all hosts only when
|
||||
the URL does not match other settings.
|
||||
|
||||
The value is a dictionary that maps header name to its value.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
linkcheck_request_headers = {
|
||||
"https://sphinx-doc.org/": {
|
||||
"Accept": "text/html",
|
||||
"Accept-Encoding": "utf-8",
|
||||
},
|
||||
"*": {
|
||||
"Accept": "text/html,application/xhtml+xml",
|
||||
}
|
||||
}
|
||||
|
||||
.. versionadded:: 3.1
|
||||
|
||||
.. confval:: linkcheck_retries
|
||||
|
||||
The number of times the linkcheck builder will attempt to check a URL before
|
||||
|
@ -16,7 +16,7 @@ import threading
|
||||
from html.parser import HTMLParser
|
||||
from os import path
|
||||
from typing import Any, Dict, List, Set, Tuple
|
||||
from urllib.parse import unquote
|
||||
from urllib.parse import unquote, urlparse
|
||||
|
||||
from docutils import nodes
|
||||
from docutils.nodes import Node
|
||||
@ -36,6 +36,11 @@ from sphinx.util.requests import is_ssl_error
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_REQUEST_HEADERS = {
|
||||
'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8',
|
||||
}
|
||||
|
||||
|
||||
class AnchorCheckParser(HTMLParser):
|
||||
"""Specialized HTML parser that looks for a specific anchor."""
|
||||
|
||||
@ -107,13 +112,25 @@ class CheckExternalLinksBuilder(Builder):
|
||||
def check_thread(self) -> None:
|
||||
kwargs = {
|
||||
'allow_redirects': True,
|
||||
'headers': {
|
||||
'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8',
|
||||
},
|
||||
}
|
||||
} # type: Dict
|
||||
if self.app.config.linkcheck_timeout:
|
||||
kwargs['timeout'] = self.app.config.linkcheck_timeout
|
||||
|
||||
def get_request_headers() -> Dict:
|
||||
url = urlparse(uri)
|
||||
candidates = ["%s://%s" % (url.scheme, url.netloc),
|
||||
"%s://%s/" % (url.scheme, url.netloc),
|
||||
uri,
|
||||
"*"]
|
||||
|
||||
for u in candidates:
|
||||
if u in self.config.linkcheck_request_headers:
|
||||
headers = dict(DEFAULT_REQUEST_HEADERS)
|
||||
headers.update(self.config.linkcheck_request_headers[u])
|
||||
return headers
|
||||
|
||||
return {}
|
||||
|
||||
def check_uri() -> Tuple[str, str, int]:
|
||||
# split off anchor
|
||||
if '#' in uri:
|
||||
@ -139,6 +156,9 @@ class CheckExternalLinksBuilder(Builder):
|
||||
else:
|
||||
auth_info = None
|
||||
|
||||
# update request headers for the URL
|
||||
kwargs['headers'] = get_request_headers()
|
||||
|
||||
try:
|
||||
if anchor and self.app.config.linkcheck_anchors:
|
||||
# Read the whole document and see if #anchor exists
|
||||
@ -337,6 +357,7 @@ def setup(app: Sphinx) -> Dict[str, Any]:
|
||||
|
||||
app.add_config_value('linkcheck_ignore', [], None)
|
||||
app.add_config_value('linkcheck_auth', [], None)
|
||||
app.add_config_value('linkcheck_request_headers', {}, None)
|
||||
app.add_config_value('linkcheck_retries', 1, None)
|
||||
app.add_config_value('linkcheck_timeout', None, None, [int])
|
||||
app.add_config_value('linkcheck_workers', 5, None)
|
||||
|
@ -124,3 +124,36 @@ def test_auth(app, status, warning):
|
||||
assert c_kwargs['auth'] == 'authinfo2'
|
||||
else:
|
||||
assert not c_kwargs['auth']
|
||||
|
||||
|
||||
@pytest.mark.sphinx(
|
||||
'linkcheck', testroot='linkcheck', freshenv=True,
|
||||
confoverrides={'linkcheck_request_headers': {
|
||||
"https://localhost:7777/": {
|
||||
"Accept": "text/html",
|
||||
},
|
||||
"http://www.sphinx-doc.org": { # no slash at the end
|
||||
"Accept": "application/json",
|
||||
},
|
||||
"*": {
|
||||
"X-Secret": "open sesami",
|
||||
}
|
||||
}})
|
||||
def test_linkcheck_request_headers(app, status, warning):
|
||||
mock_req = mock.MagicMock()
|
||||
mock_req.return_value = 'fake-response'
|
||||
|
||||
with mock.patch.multiple('requests', get=mock_req, head=mock_req):
|
||||
app.builder.build_all()
|
||||
for args, kwargs in mock_req.call_args_list:
|
||||
url = args[0]
|
||||
headers = kwargs.get('headers', {})
|
||||
if "https://localhost:7777" in url:
|
||||
assert headers["Accept"] == "text/html"
|
||||
elif 'http://www.sphinx-doc.org' in url:
|
||||
assert headers["Accept"] == "application/json"
|
||||
elif 'https://www.google.com' in url:
|
||||
assert headers["Accept"] == "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8"
|
||||
assert headers["X-Secret"] == "open sesami"
|
||||
else:
|
||||
assert headers["Accept"] == "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8"
|
||||
|
Loading…
Reference in New Issue
Block a user