From 4f00a9905c10fe31ca6d93af5db0291321f50f3e Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sat, 13 Feb 2016 08:52:38 -0500 Subject: [PATCH 1/6] Fixed #1099 -- allow the linkcheck builder to retry on errors. This is useful because if you run linkcheck often, you are likely to see lots of transient network errors, which usually disappear if you simply try again. --- doc/config.rst | 7 ++++ sphinx/builders/linkcheck.py | 79 ++++++++++++++++++++---------------- sphinx/config.py | 1 + 3 files changed, 52 insertions(+), 35 deletions(-) diff --git a/doc/config.rst b/doc/config.rst index 58f04c895..516bf6c26 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -1757,6 +1757,13 @@ Options for the linkcheck builder .. versionadded:: 1.1 +.. confval:: linkcheck_retries + + The number of times the linkcheck builder will attempt to check a URL before + declaring it broken. Defaults to 1 attempt. + + .. versionadded:: 1.4 + .. confval:: linkcheck_timeout A timeout value, in seconds, for the linkcheck builder. **Only works in diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 5904d659b..ff254515b 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -137,37 +137,8 @@ class CheckExternalLinksBuilder(Builder): if self.app.config.linkcheck_timeout: kwargs['timeout'] = self.app.config.linkcheck_timeout - def check(): - # check for various conditions without bothering the network - if len(uri) == 0 or uri[0] == '#' or \ - uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:': - return 'unchecked', '', 0 - elif not (uri[0:5] == 'http:' or uri[0:6] == 'https:'): - return 'local', '', 0 - elif uri in self.good: - return 'working', 'old', 0 - elif uri in self.broken: - return 'broken', self.broken[uri], 0 - elif uri in self.redirected: - return 'redirected', self.redirected[uri][0], self.redirected[uri][1] - for rex in self.to_ignore: - if rex.match(uri): - return 'ignored', '', 0 - # split off anchor - if '#' in uri: - req_url, hash = uri.split('#', 1) - else: - req_url = uri - hash = None - - # handle non-ASCII URIs - try: - req_url.encode('ascii') - except UnicodeError: - req_url = encode_uri(req_url) - - # need to actually check the URI + def check_uri(): try: if hash and self.app.config.linkcheck_anchors: # Read the whole document and see if #hash exists @@ -201,25 +172,63 @@ class CheckExternalLinksBuilder(Builder): except HTTPError as err: if err.code == 401: # We'll take "Unauthorized" as working. - self.good.add(uri) return 'working', ' - unauthorized', 0 else: - self.broken[uri] = str(err) return 'broken', str(err), 0 except Exception as err: - self.broken[uri] = str(err) return 'broken', str(err), 0 if f.url.rstrip('/') == req_url.rstrip('/'): - self.good.add(uri) return 'working', '', 0 else: new_url = f.url if hash: new_url += '#' + hash code = getattr(req, 'redirect_code', 0) - self.redirected[uri] = (new_url, code) return 'redirected', new_url, code + def check(): + # check for various conditions without bothering the network + if len(uri) == 0 or uri[0] == '#' or \ + uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:': + return 'unchecked', '', 0 + elif not (uri[0:5] == 'http:' or uri[0:6] == 'https:'): + return 'local', '', 0 + elif uri in self.good: + return 'working', 'old', 0 + elif uri in self.broken: + return 'broken', self.broken[uri], 0 + elif uri in self.redirected: + return 'redirected', self.redirected[uri][0], self.redirected[uri][1] + for rex in self.to_ignore: + if rex.match(uri): + return 'ignored', '', 0 + + # split off anchor + if '#' in uri: + req_url, hash = uri.split('#', 1) + else: + req_url = uri + hash = None + + # handle non-ASCII URIs + try: + req_url.encode('ascii') + except UnicodeError: + req_url = encode_uri(req_url) + + # need to actually check the URI + for _ in range(self.app.config.linkcheck_retries) + status, info, code = check_uri() + if status != "broken": + break + + if status == "working": + self.good.add(uri) + elif status == "broken": + self.broken[uri] = info + elif status == "redirected": + self.redirected[uri] = (info, code) + while True: uri, docname, lineno = self.wqueue.get() if uri is None: diff --git a/sphinx/config.py b/sphinx/config.py index 74ea23bd7..402eb1400 100644 --- a/sphinx/config.py +++ b/sphinx/config.py @@ -244,6 +244,7 @@ class Config(object): # linkcheck options linkcheck_ignore = ([], None), + linkcheck_retries = (1, None), linkcheck_timeout = (None, None, [int]), linkcheck_workers = (5, None), linkcheck_anchors = (True, None), From ae5bfe500ec91b2eee6e0f00b0b37468f4cb3a73 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sat, 13 Feb 2016 08:55:00 -0500 Subject: [PATCH 2/6] syntax error --- sphinx/builders/linkcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index ff254515b..ac28b7ed8 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -217,7 +217,7 @@ class CheckExternalLinksBuilder(Builder): req_url = encode_uri(req_url) # need to actually check the URI - for _ in range(self.app.config.linkcheck_retries) + for _ in range(self.app.config.linkcheck_retries): status, info, code = check_uri() if status != "broken": break From 863e2f468f1ba63627bd87fe9edb4d3ba5e2f5d5 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sat, 13 Feb 2016 08:55:55 -0500 Subject: [PATCH 3/6] moved more logic around --- sphinx/builders/linkcheck.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index ac28b7ed8..ecec3c9c9 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -137,8 +137,20 @@ class CheckExternalLinksBuilder(Builder): if self.app.config.linkcheck_timeout: kwargs['timeout'] = self.app.config.linkcheck_timeout - def check_uri(): + # split off anchor + if '#' in uri: + req_url, hash = uri.split('#', 1) + else: + req_url = uri + hash = None + + # handle non-ASCII URIs + try: + req_url.encode('ascii') + except UnicodeError: + req_url = encode_uri(req_url) + try: if hash and self.app.config.linkcheck_anchors: # Read the whole document and see if #hash exists @@ -203,19 +215,6 @@ class CheckExternalLinksBuilder(Builder): if rex.match(uri): return 'ignored', '', 0 - # split off anchor - if '#' in uri: - req_url, hash = uri.split('#', 1) - else: - req_url = uri - hash = None - - # handle non-ASCII URIs - try: - req_url.encode('ascii') - except UnicodeError: - req_url = encode_uri(req_url) - # need to actually check the URI for _ in range(self.app.config.linkcheck_retries): status, info, code = check_uri() From e6980adaf6273594e0b8d9cc1416066c22fc2e75 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sat, 13 Feb 2016 09:02:44 -0500 Subject: [PATCH 4/6] oops, lost this --- sphinx/builders/linkcheck.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index ecec3c9c9..175966116 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -228,6 +228,8 @@ class CheckExternalLinksBuilder(Builder): elif status == "redirected": self.redirected[uri] = (info, code) + return (status, info, code) + while True: uri, docname, lineno = self.wqueue.get() if uri is None: From 374c6a6db38193e1607d2e0b2a9ff445fb2aef5f Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sat, 13 Feb 2016 09:18:41 -0500 Subject: [PATCH 5/6] fixed indentation --- doc/config.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/config.rst b/doc/config.rst index 516bf6c26..e9c7a3fa7 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -1759,8 +1759,8 @@ Options for the linkcheck builder .. confval:: linkcheck_retries - The number of times the linkcheck builder will attempt to check a URL before - declaring it broken. Defaults to 1 attempt. + The number of times the linkcheck builder will attempt to check a URL before + declaring it broken. Defaults to 1 attempt. .. versionadded:: 1.4 From 79206ef7b1eaccc4a43727b650d3c0b08ea802d1 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sat, 13 Feb 2016 09:23:00 -0500 Subject: [PATCH 6/6] fixed indentation here as well --- doc/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/config.rst b/doc/config.rst index e9c7a3fa7..63f43998a 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -1762,7 +1762,7 @@ Options for the linkcheck builder The number of times the linkcheck builder will attempt to check a URL before declaring it broken. Defaults to 1 attempt. - .. versionadded:: 1.4 + .. versionadded:: 1.4 .. confval:: linkcheck_timeout