mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
linkcheck: Fix race condition that could lead to checking the availability of the same URL twice
So far, linkcheck scans all of references and images from documents, and checks them parallel. As a result, some URL would be checked twice (or more) by race condition. This collects the URL via post-transforms, and removes duplicated URLs before checking availability. refs: #4303
This commit is contained in:
3
CHANGES
3
CHANGES
@@ -10,6 +10,7 @@ Incompatible changes
|
||||
Deprecated
|
||||
----------
|
||||
|
||||
* ``sphinx.builders.linkcheck.node_line_or_0()``
|
||||
* ``sphinx.ext.autodoc.AttributeDocumenter.isinstanceattribute()``
|
||||
* ``sphinx.ext.autodoc.directive.DocumenterBridge.reporter``
|
||||
* ``sphinx.ext.autodoc.importer.get_module_members()``
|
||||
@@ -57,6 +58,8 @@ Bugs fixed
|
||||
+ or ^) are used as keystrokes
|
||||
* #8629: html: A type warning for html_use_opensearch is shown twice
|
||||
* #8665: html theme: Could not override globaltoc_maxdepth in theme.conf
|
||||
* #4304: linkcheck: Fix race condition that could lead to checking the
|
||||
availability of the same URL twice
|
||||
* #8094: texinfo: image files on the different directory with document are not
|
||||
copied
|
||||
* #8671: :confval:`highlight_options` is not working
|
||||
|
@@ -26,6 +26,11 @@ The following is a list of deprecated interfaces.
|
||||
- (will be) Removed
|
||||
- Alternatives
|
||||
|
||||
* - ``sphinx.builders.linkcheck.node_line_or_0()``
|
||||
- 3.5
|
||||
- 5.0
|
||||
- ``sphinx.util.nodes.get_node_line()``
|
||||
|
||||
* - ``sphinx.ext.autodoc.AttributeDocumenter.isinstanceattribute()``
|
||||
- 3.5
|
||||
- 5.0
|
||||
|
@@ -14,11 +14,12 @@ import re
|
||||
import socket
|
||||
import threading
|
||||
import time
|
||||
import warnings
|
||||
from datetime import datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
from html.parser import HTMLParser
|
||||
from os import path
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, cast
|
||||
from urllib.parse import unquote, urlparse
|
||||
|
||||
from docutils import nodes
|
||||
@@ -28,7 +29,9 @@ from requests.exceptions import HTTPError, TooManyRedirects
|
||||
|
||||
from sphinx.application import Sphinx
|
||||
from sphinx.builders import Builder
|
||||
from sphinx.deprecation import RemovedInSphinx40Warning
|
||||
from sphinx.locale import __
|
||||
from sphinx.transforms.post_transforms import SphinxPostTransform
|
||||
from sphinx.util import encode_uri, logging, requests
|
||||
from sphinx.util.console import darkgray, darkgreen, purple, red, turquoise # type: ignore
|
||||
from sphinx.util.nodes import get_node_line
|
||||
@@ -37,6 +40,10 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
uri_re = re.compile('([a-z]+:)?//') # matches to foo:// and // (a protocol relative URL)
|
||||
|
||||
Hyperlink = NamedTuple('Hyperlink', (('next_check', float),
|
||||
('uri', Optional[str]),
|
||||
('docname', Optional[str]),
|
||||
('lineno', Optional[int])))
|
||||
RateLimit = NamedTuple('RateLimit', (('delay', float), ('next_check', float)))
|
||||
|
||||
DEFAULT_REQUEST_HEADERS = {
|
||||
@@ -52,6 +59,8 @@ def node_line_or_0(node: Element) -> int:
|
||||
PriorityQueue items must be comparable. The line number is part of the
|
||||
tuple used by the PriorityQueue, keep an homogeneous type for comparison.
|
||||
"""
|
||||
warnings.warn('node_line_or_0() is deprecated.',
|
||||
RemovedInSphinx40Warning, stacklevel=2)
|
||||
return get_node_line(node) or 0
|
||||
|
||||
|
||||
@@ -98,6 +107,7 @@ class CheckExternalLinksBuilder(Builder):
|
||||
'%(outdir)s/output.txt')
|
||||
|
||||
def init(self) -> None:
|
||||
self.hyperlinks = {} # type: Dict[str, Hyperlink]
|
||||
self.to_ignore = [re.compile(x) for x in self.app.config.linkcheck_ignore]
|
||||
self.anchors_ignore = [re.compile(x)
|
||||
for x in self.app.config.linkcheck_anchors_ignore]
|
||||
@@ -406,35 +416,7 @@ class CheckExternalLinksBuilder(Builder):
|
||||
return
|
||||
|
||||
def write_doc(self, docname: str, doctree: Node) -> None:
|
||||
logger.info('')
|
||||
n = 0
|
||||
|
||||
# reference nodes
|
||||
for refnode in doctree.traverse(nodes.reference):
|
||||
if 'refuri' not in refnode:
|
||||
continue
|
||||
uri = refnode['refuri']
|
||||
lineno = node_line_or_0(refnode)
|
||||
uri_info = (CHECK_IMMEDIATELY, uri, docname, lineno)
|
||||
self.wqueue.put(uri_info, False)
|
||||
n += 1
|
||||
|
||||
# image nodes
|
||||
for imgnode in doctree.traverse(nodes.image):
|
||||
uri = imgnode['candidates'].get('?')
|
||||
if uri and '://' in uri:
|
||||
lineno = node_line_or_0(imgnode)
|
||||
uri_info = (CHECK_IMMEDIATELY, uri, docname, lineno)
|
||||
self.wqueue.put(uri_info, False)
|
||||
n += 1
|
||||
|
||||
done = 0
|
||||
while done < n:
|
||||
self.process_result(self.rqueue.get())
|
||||
done += 1
|
||||
|
||||
if self.broken:
|
||||
self.app.statuscode = 1
|
||||
pass
|
||||
|
||||
def write_entry(self, what: str, docname: str, filename: str, line: int,
|
||||
uri: str) -> None:
|
||||
@@ -447,14 +429,58 @@ class CheckExternalLinksBuilder(Builder):
|
||||
output.write('\n')
|
||||
|
||||
def finish(self) -> None:
|
||||
logger.info('')
|
||||
n = 0
|
||||
|
||||
for hyperlink in self.hyperlinks.values():
|
||||
self.wqueue.put(hyperlink, False)
|
||||
n += 1
|
||||
|
||||
done = 0
|
||||
while done < n:
|
||||
self.process_result(self.rqueue.get())
|
||||
done += 1
|
||||
|
||||
if self.broken:
|
||||
self.app.statuscode = 1
|
||||
|
||||
self.wqueue.join()
|
||||
# Shutdown threads.
|
||||
for worker in self.workers:
|
||||
self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
|
||||
|
||||
|
||||
class HyperlinkCollector(SphinxPostTransform):
|
||||
builders = ('linkcheck',)
|
||||
default_priority = 800
|
||||
|
||||
def run(self, **kwargs: Any) -> None:
|
||||
builder = cast(CheckExternalLinksBuilder, self.app.builder)
|
||||
hyperlinks = builder.hyperlinks
|
||||
|
||||
# reference nodes
|
||||
for refnode in self.document.traverse(nodes.reference):
|
||||
if 'refuri' not in refnode:
|
||||
continue
|
||||
uri = refnode['refuri']
|
||||
lineno = get_node_line(refnode)
|
||||
uri_info = Hyperlink(CHECK_IMMEDIATELY, uri, self.env.docname, lineno)
|
||||
if uri not in hyperlinks:
|
||||
hyperlinks[uri] = uri_info
|
||||
|
||||
# image nodes
|
||||
for imgnode in self.document.traverse(nodes.image):
|
||||
uri = imgnode['candidates'].get('?')
|
||||
if uri and '://' in uri:
|
||||
lineno = get_node_line(imgnode)
|
||||
uri_info = Hyperlink(CHECK_IMMEDIATELY, uri, self.env.docname, lineno)
|
||||
if uri not in hyperlinks:
|
||||
hyperlinks[uri] = uri_info
|
||||
|
||||
|
||||
def setup(app: Sphinx) -> Dict[str, Any]:
|
||||
app.add_builder(CheckExternalLinksBuilder)
|
||||
app.add_post_transform(HyperlinkCollector)
|
||||
|
||||
app.add_config_value('linkcheck_ignore', [], None)
|
||||
app.add_config_value('linkcheck_auth', [], None)
|
||||
|
@@ -1 +0,0 @@
|
||||
exclude_patterns = ['_build']
|
@@ -1,6 +0,0 @@
|
||||
.. image:: http://localhost:7777/
|
||||
:target: http://localhost:7777/
|
||||
|
||||
`weblate.org`_
|
||||
|
||||
.. _weblate.org: http://localhost:7777/
|
@@ -573,40 +573,3 @@ def test_limit_rate_bails_out_after_waiting_max_time(app):
|
||||
checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)}
|
||||
next_check = checker.limit_rate(FakeResponse())
|
||||
assert next_check is None
|
||||
|
||||
|
||||
@pytest.mark.sphinx(
|
||||
'linkcheck', testroot='linkcheck-localserver-two-links', freshenv=True,
|
||||
)
|
||||
def test_priorityqueue_items_are_comparable(app):
|
||||
with http_server(OKHandler):
|
||||
app.builder.build_all()
|
||||
content = (app.outdir / 'output.json').read_text()
|
||||
rows = [json.loads(x) for x in sorted(content.splitlines())]
|
||||
assert rows == [
|
||||
{
|
||||
'filename': 'index.rst',
|
||||
# Should not be None.
|
||||
'lineno': 0,
|
||||
'status': 'working',
|
||||
'code': 0,
|
||||
'uri': 'http://localhost:7777/',
|
||||
'info': '',
|
||||
},
|
||||
{
|
||||
'filename': 'index.rst',
|
||||
'lineno': 0,
|
||||
'status': 'working',
|
||||
'code': 0,
|
||||
'uri': 'http://localhost:7777/',
|
||||
'info': '',
|
||||
},
|
||||
{
|
||||
'filename': 'index.rst',
|
||||
'lineno': 4,
|
||||
'status': 'working',
|
||||
'code': 0,
|
||||
'uri': 'http://localhost:7777/',
|
||||
'info': '',
|
||||
}
|
||||
]
|
||||
|
Reference in New Issue
Block a user