Merge pull request #2312 from alex/linkcheck-retry

Fixed #1099 -- allow the linkcheck builder to retry on errors.
This commit is contained in:
Georg Brandl 2016-02-13 15:51:33 +01:00
commit 56a4e791d6
3 changed files with 41 additions and 23 deletions

View File

@ -1757,6 +1757,13 @@ Options for the linkcheck builder
.. versionadded:: 1.1
.. confval:: linkcheck_retries
The number of times the linkcheck builder will attempt to check a URL before
declaring it broken. Defaults to 1 attempt.
.. versionadded:: 1.4
.. confval:: linkcheck_timeout
A timeout value, in seconds, for the linkcheck builder. **Only works in

View File

@ -137,23 +137,7 @@ class CheckExternalLinksBuilder(Builder):
if self.app.config.linkcheck_timeout:
kwargs['timeout'] = self.app.config.linkcheck_timeout
def check():
# check for various conditions without bothering the network
if len(uri) == 0 or uri[0] == '#' or \
uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
return 'unchecked', '', 0
elif not (uri[0:5] == 'http:' or uri[0:6] == 'https:'):
return 'local', '', 0
elif uri in self.good:
return 'working', 'old', 0
elif uri in self.broken:
return 'broken', self.broken[uri], 0
elif uri in self.redirected:
return 'redirected', self.redirected[uri][0], self.redirected[uri][1]
for rex in self.to_ignore:
if rex.match(uri):
return 'ignored', '', 0
def check_uri():
# split off anchor
if '#' in uri:
req_url, hash = uri.split('#', 1)
@ -167,7 +151,6 @@ class CheckExternalLinksBuilder(Builder):
except UnicodeError:
req_url = encode_uri(req_url)
# need to actually check the URI
try:
if hash and self.app.config.linkcheck_anchors:
# Read the whole document and see if #hash exists
@ -201,25 +184,52 @@ class CheckExternalLinksBuilder(Builder):
except HTTPError as err:
if err.code == 401:
# We'll take "Unauthorized" as working.
self.good.add(uri)
return 'working', ' - unauthorized', 0
else:
self.broken[uri] = str(err)
return 'broken', str(err), 0
except Exception as err:
self.broken[uri] = str(err)
return 'broken', str(err), 0
if f.url.rstrip('/') == req_url.rstrip('/'):
self.good.add(uri)
return 'working', '', 0
else:
new_url = f.url
if hash:
new_url += '#' + hash
code = getattr(req, 'redirect_code', 0)
self.redirected[uri] = (new_url, code)
return 'redirected', new_url, code
def check():
# check for various conditions without bothering the network
if len(uri) == 0 or uri[0] == '#' or \
uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
return 'unchecked', '', 0
elif not (uri[0:5] == 'http:' or uri[0:6] == 'https:'):
return 'local', '', 0
elif uri in self.good:
return 'working', 'old', 0
elif uri in self.broken:
return 'broken', self.broken[uri], 0
elif uri in self.redirected:
return 'redirected', self.redirected[uri][0], self.redirected[uri][1]
for rex in self.to_ignore:
if rex.match(uri):
return 'ignored', '', 0
# need to actually check the URI
for _ in range(self.app.config.linkcheck_retries):
status, info, code = check_uri()
if status != "broken":
break
if status == "working":
self.good.add(uri)
elif status == "broken":
self.broken[uri] = info
elif status == "redirected":
self.redirected[uri] = (info, code)
return (status, info, code)
while True:
uri, docname, lineno = self.wqueue.get()
if uri is None:

View File

@ -244,6 +244,7 @@ class Config(object):
# linkcheck options
linkcheck_ignore = ([], None),
linkcheck_retries = (1, None),
linkcheck_timeout = (None, None, [int]),
linkcheck_workers = (5, None),
linkcheck_anchors = (True, None),