mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Fix #2988: linkcheck: retry with GET request if denied HEAD request
This commit is contained in:
parent
72b76ab6d7
commit
ce7fea9a35
1
CHANGES
1
CHANGES
@ -15,6 +15,7 @@ Bugs fixed
|
||||
|
||||
* #2810: Problems with pdflatex in an Italian document
|
||||
* Use ``latex_elements.papersize`` to specify papersize of LaTeX in Makefile
|
||||
* #2988: linkcheck: retry with GET request if denied HEAD request
|
||||
|
||||
Documentation
|
||||
-------------
|
||||
|
@ -84,14 +84,12 @@ class CheckExternalLinksBuilder(Builder):
|
||||
self.good = set()
|
||||
self.broken = {}
|
||||
self.redirected = {}
|
||||
self.headers = dict(useragent_header)
|
||||
# set a timeout for non-responding servers
|
||||
socket.setdefaulttimeout(5.0)
|
||||
# create output file
|
||||
open(path.join(self.outdir, 'output.txt'), 'w').close()
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers = dict(useragent_header)
|
||||
|
||||
# create queues and worker threads
|
||||
self.wqueue = queue.Queue()
|
||||
self.rqueue = queue.Queue()
|
||||
@ -129,23 +127,23 @@ class CheckExternalLinksBuilder(Builder):
|
||||
# Read the whole document and see if #anchor exists
|
||||
# (Anchors starting with ! are ignored since they are
|
||||
# commonly used for dynamic pages)
|
||||
response = self.session.get(req_url, stream=True, **kwargs)
|
||||
response = requests.get(req_url, stream=True, headers=self.headers,
|
||||
**kwargs)
|
||||
found = check_anchor(response, unquote(anchor))
|
||||
|
||||
if not found:
|
||||
raise Exception("Anchor '%s' not found" % anchor)
|
||||
else:
|
||||
try:
|
||||
# try a HEAD request, which should be easier on
|
||||
# try a HEAD request first, which should be easier on
|
||||
# the server and the network
|
||||
response = self.session.head(req_url, **kwargs)
|
||||
response = requests.head(req_url, headers=self.headers, **kwargs)
|
||||
response.raise_for_status()
|
||||
except HTTPError as err:
|
||||
if err.response.status_code not in (403, 405):
|
||||
raise
|
||||
# retry with GET if that fails, some servers
|
||||
# don't like HEAD requests and reply with 403 or 405
|
||||
response = self.session.get(req_url, stream=True, **kwargs)
|
||||
# retry with GET request if that fails, some servers
|
||||
# don't like HEAD requests.
|
||||
response = requests.get(req_url, stream=True, headers=self.headers,
|
||||
**kwargs)
|
||||
response.raise_for_status()
|
||||
except HTTPError as err:
|
||||
if err.response.status_code == 401:
|
||||
|
@ -39,5 +39,5 @@ except pkg_resources.UnknownExtra:
|
||||
'install requests-2.4.1+.'
|
||||
)
|
||||
|
||||
useragent_header = [('User-agent',
|
||||
useragent_header = [('User-Agent',
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0')]
|
||||
|
@ -66,7 +66,7 @@ def test_build_all():
|
||||
)
|
||||
|
||||
with mock.patch('sphinx.builders.linkcheck.requests') as requests:
|
||||
requests.Session().head = request_session_head
|
||||
requests.head = request_session_head
|
||||
|
||||
# note: no 'html' - if it's ok with dirhtml it's ok with html
|
||||
for buildername in ['dirhtml', 'singlehtml', 'latex', 'texinfo', 'pickle',
|
||||
|
Loading…
Reference in New Issue
Block a user