mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Merge pull request #2312 from alex/linkcheck-retry
Fixed #1099 -- allow the linkcheck builder to retry on errors.
This commit is contained in:
commit
56a4e791d6
@ -1757,6 +1757,13 @@ Options for the linkcheck builder
|
|||||||
|
|
||||||
.. versionadded:: 1.1
|
.. versionadded:: 1.1
|
||||||
|
|
||||||
|
.. confval:: linkcheck_retries
|
||||||
|
|
||||||
|
The number of times the linkcheck builder will attempt to check a URL before
|
||||||
|
declaring it broken. Defaults to 1 attempt.
|
||||||
|
|
||||||
|
.. versionadded:: 1.4
|
||||||
|
|
||||||
.. confval:: linkcheck_timeout
|
.. confval:: linkcheck_timeout
|
||||||
|
|
||||||
A timeout value, in seconds, for the linkcheck builder. **Only works in
|
A timeout value, in seconds, for the linkcheck builder. **Only works in
|
||||||
|
@ -137,23 +137,7 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
if self.app.config.linkcheck_timeout:
|
if self.app.config.linkcheck_timeout:
|
||||||
kwargs['timeout'] = self.app.config.linkcheck_timeout
|
kwargs['timeout'] = self.app.config.linkcheck_timeout
|
||||||
|
|
||||||
def check():
|
def check_uri():
|
||||||
# check for various conditions without bothering the network
|
|
||||||
if len(uri) == 0 or uri[0] == '#' or \
|
|
||||||
uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
|
|
||||||
return 'unchecked', '', 0
|
|
||||||
elif not (uri[0:5] == 'http:' or uri[0:6] == 'https:'):
|
|
||||||
return 'local', '', 0
|
|
||||||
elif uri in self.good:
|
|
||||||
return 'working', 'old', 0
|
|
||||||
elif uri in self.broken:
|
|
||||||
return 'broken', self.broken[uri], 0
|
|
||||||
elif uri in self.redirected:
|
|
||||||
return 'redirected', self.redirected[uri][0], self.redirected[uri][1]
|
|
||||||
for rex in self.to_ignore:
|
|
||||||
if rex.match(uri):
|
|
||||||
return 'ignored', '', 0
|
|
||||||
|
|
||||||
# split off anchor
|
# split off anchor
|
||||||
if '#' in uri:
|
if '#' in uri:
|
||||||
req_url, hash = uri.split('#', 1)
|
req_url, hash = uri.split('#', 1)
|
||||||
@ -167,7 +151,6 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
except UnicodeError:
|
except UnicodeError:
|
||||||
req_url = encode_uri(req_url)
|
req_url = encode_uri(req_url)
|
||||||
|
|
||||||
# need to actually check the URI
|
|
||||||
try:
|
try:
|
||||||
if hash and self.app.config.linkcheck_anchors:
|
if hash and self.app.config.linkcheck_anchors:
|
||||||
# Read the whole document and see if #hash exists
|
# Read the whole document and see if #hash exists
|
||||||
@ -201,25 +184,52 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
except HTTPError as err:
|
except HTTPError as err:
|
||||||
if err.code == 401:
|
if err.code == 401:
|
||||||
# We'll take "Unauthorized" as working.
|
# We'll take "Unauthorized" as working.
|
||||||
self.good.add(uri)
|
|
||||||
return 'working', ' - unauthorized', 0
|
return 'working', ' - unauthorized', 0
|
||||||
else:
|
else:
|
||||||
self.broken[uri] = str(err)
|
|
||||||
return 'broken', str(err), 0
|
return 'broken', str(err), 0
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
self.broken[uri] = str(err)
|
|
||||||
return 'broken', str(err), 0
|
return 'broken', str(err), 0
|
||||||
if f.url.rstrip('/') == req_url.rstrip('/'):
|
if f.url.rstrip('/') == req_url.rstrip('/'):
|
||||||
self.good.add(uri)
|
|
||||||
return 'working', '', 0
|
return 'working', '', 0
|
||||||
else:
|
else:
|
||||||
new_url = f.url
|
new_url = f.url
|
||||||
if hash:
|
if hash:
|
||||||
new_url += '#' + hash
|
new_url += '#' + hash
|
||||||
code = getattr(req, 'redirect_code', 0)
|
code = getattr(req, 'redirect_code', 0)
|
||||||
self.redirected[uri] = (new_url, code)
|
|
||||||
return 'redirected', new_url, code
|
return 'redirected', new_url, code
|
||||||
|
|
||||||
|
def check():
|
||||||
|
# check for various conditions without bothering the network
|
||||||
|
if len(uri) == 0 or uri[0] == '#' or \
|
||||||
|
uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
|
||||||
|
return 'unchecked', '', 0
|
||||||
|
elif not (uri[0:5] == 'http:' or uri[0:6] == 'https:'):
|
||||||
|
return 'local', '', 0
|
||||||
|
elif uri in self.good:
|
||||||
|
return 'working', 'old', 0
|
||||||
|
elif uri in self.broken:
|
||||||
|
return 'broken', self.broken[uri], 0
|
||||||
|
elif uri in self.redirected:
|
||||||
|
return 'redirected', self.redirected[uri][0], self.redirected[uri][1]
|
||||||
|
for rex in self.to_ignore:
|
||||||
|
if rex.match(uri):
|
||||||
|
return 'ignored', '', 0
|
||||||
|
|
||||||
|
# need to actually check the URI
|
||||||
|
for _ in range(self.app.config.linkcheck_retries):
|
||||||
|
status, info, code = check_uri()
|
||||||
|
if status != "broken":
|
||||||
|
break
|
||||||
|
|
||||||
|
if status == "working":
|
||||||
|
self.good.add(uri)
|
||||||
|
elif status == "broken":
|
||||||
|
self.broken[uri] = info
|
||||||
|
elif status == "redirected":
|
||||||
|
self.redirected[uri] = (info, code)
|
||||||
|
|
||||||
|
return (status, info, code)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
uri, docname, lineno = self.wqueue.get()
|
uri, docname, lineno = self.wqueue.get()
|
||||||
if uri is None:
|
if uri is None:
|
||||||
|
@ -244,6 +244,7 @@ class Config(object):
|
|||||||
|
|
||||||
# linkcheck options
|
# linkcheck options
|
||||||
linkcheck_ignore = ([], None),
|
linkcheck_ignore = ([], None),
|
||||||
|
linkcheck_retries = (1, None),
|
||||||
linkcheck_timeout = (None, None, [int]),
|
linkcheck_timeout = (None, None, [int]),
|
||||||
linkcheck_workers = (5, None),
|
linkcheck_workers = (5, None),
|
||||||
linkcheck_anchors = (True, None),
|
linkcheck_anchors = (True, None),
|
||||||
|
Loading…
Reference in New Issue
Block a user