mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Fix #2988: linkcheck: retry with GET request if denied HEAD request
This commit is contained in:
parent
72b76ab6d7
commit
ce7fea9a35
1
CHANGES
1
CHANGES
@ -15,6 +15,7 @@ Bugs fixed
|
|||||||
|
|
||||||
* #2810: Problems with pdflatex in an Italian document
|
* #2810: Problems with pdflatex in an Italian document
|
||||||
* Use ``latex_elements.papersize`` to specify papersize of LaTeX in Makefile
|
* Use ``latex_elements.papersize`` to specify papersize of LaTeX in Makefile
|
||||||
|
* #2988: linkcheck: retry with GET request if denied HEAD request
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
-------------
|
-------------
|
||||||
|
@ -84,14 +84,12 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
self.good = set()
|
self.good = set()
|
||||||
self.broken = {}
|
self.broken = {}
|
||||||
self.redirected = {}
|
self.redirected = {}
|
||||||
|
self.headers = dict(useragent_header)
|
||||||
# set a timeout for non-responding servers
|
# set a timeout for non-responding servers
|
||||||
socket.setdefaulttimeout(5.0)
|
socket.setdefaulttimeout(5.0)
|
||||||
# create output file
|
# create output file
|
||||||
open(path.join(self.outdir, 'output.txt'), 'w').close()
|
open(path.join(self.outdir, 'output.txt'), 'w').close()
|
||||||
|
|
||||||
self.session = requests.Session()
|
|
||||||
self.session.headers = dict(useragent_header)
|
|
||||||
|
|
||||||
# create queues and worker threads
|
# create queues and worker threads
|
||||||
self.wqueue = queue.Queue()
|
self.wqueue = queue.Queue()
|
||||||
self.rqueue = queue.Queue()
|
self.rqueue = queue.Queue()
|
||||||
@ -129,23 +127,23 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
# Read the whole document and see if #anchor exists
|
# Read the whole document and see if #anchor exists
|
||||||
# (Anchors starting with ! are ignored since they are
|
# (Anchors starting with ! are ignored since they are
|
||||||
# commonly used for dynamic pages)
|
# commonly used for dynamic pages)
|
||||||
response = self.session.get(req_url, stream=True, **kwargs)
|
response = requests.get(req_url, stream=True, headers=self.headers,
|
||||||
|
**kwargs)
|
||||||
found = check_anchor(response, unquote(anchor))
|
found = check_anchor(response, unquote(anchor))
|
||||||
|
|
||||||
if not found:
|
if not found:
|
||||||
raise Exception("Anchor '%s' not found" % anchor)
|
raise Exception("Anchor '%s' not found" % anchor)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
# try a HEAD request, which should be easier on
|
# try a HEAD request first, which should be easier on
|
||||||
# the server and the network
|
# the server and the network
|
||||||
response = self.session.head(req_url, **kwargs)
|
response = requests.head(req_url, headers=self.headers, **kwargs)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except HTTPError as err:
|
except HTTPError as err:
|
||||||
if err.response.status_code not in (403, 405):
|
# retry with GET request if that fails, some servers
|
||||||
raise
|
# don't like HEAD requests.
|
||||||
# retry with GET if that fails, some servers
|
response = requests.get(req_url, stream=True, headers=self.headers,
|
||||||
# don't like HEAD requests and reply with 403 or 405
|
**kwargs)
|
||||||
response = self.session.get(req_url, stream=True, **kwargs)
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except HTTPError as err:
|
except HTTPError as err:
|
||||||
if err.response.status_code == 401:
|
if err.response.status_code == 401:
|
||||||
|
@ -39,5 +39,5 @@ except pkg_resources.UnknownExtra:
|
|||||||
'install requests-2.4.1+.'
|
'install requests-2.4.1+.'
|
||||||
)
|
)
|
||||||
|
|
||||||
useragent_header = [('User-agent',
|
useragent_header = [('User-Agent',
|
||||||
'Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0')]
|
'Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0')]
|
||||||
|
@ -66,7 +66,7 @@ def test_build_all():
|
|||||||
)
|
)
|
||||||
|
|
||||||
with mock.patch('sphinx.builders.linkcheck.requests') as requests:
|
with mock.patch('sphinx.builders.linkcheck.requests') as requests:
|
||||||
requests.Session().head = request_session_head
|
requests.head = request_session_head
|
||||||
|
|
||||||
# note: no 'html' - if it's ok with dirhtml it's ok with html
|
# note: no 'html' - if it's ok with dirhtml it's ok with html
|
||||||
for buildername in ['dirhtml', 'singlehtml', 'latex', 'texinfo', 'pickle',
|
for buildername in ['dirhtml', 'singlehtml', 'latex', 'texinfo', 'pickle',
|
||||||
|
Loading…
Reference in New Issue
Block a user