Ensure linkcheck items are comparable

Linkcheck organizes the URLs to checks in a PriorityQueue. The items are
tuples (priority, url, docname, lineno).

Tuples where the lineno is `None` are not comparable with tuples that
have an integer lineno, and PriorityQueue items must be comparable (see
https://bugs.python.org/issue31145).

Fixes an issue when a document contains two links to the same URL, one
with an int line number and the other without line number metadata (such
as an image :target: attribute).

Using 0 instead of None to represent no line number should not lead to
observable changes, the result logger only logs the line number when it
is truthy.

Close #8565
This commit is contained in:
François Freitag 2020-12-22 19:49:53 +01:00
parent 955d6558ec
commit a1b8b1febb
No known key found for this signature in database
GPG Key ID: 0CD53670BCA9253D
5 changed files with 57 additions and 3 deletions

View File

@ -20,6 +20,8 @@ Bugs fixed
annotations
* #8568: autodoc: TypeError is raised on checking slots attribute
* #8567: autodoc: Instance attributes are incorrectly added to Parent class
* #8565: linkcheck: Fix PriorityQueue crash when link tuples are not
comparable
Testing
--------

View File

@ -22,7 +22,7 @@ from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple
from urllib.parse import unquote, urlparse
from docutils import nodes
from docutils.nodes import Node
from docutils.nodes import Element, Node
from requests import Response
from requests.exceptions import HTTPError, TooManyRedirects
@ -47,6 +47,14 @@ QUEUE_POLL_SECS = 1
DEFAULT_DELAY = 60.0
def node_line_or_0(node: Element) -> int:
"""
PriorityQueue items must be comparable. The line number is part of the
tuple used by the PriorityQueue, keep an homogeneous type for comparison.
"""
return get_node_line(node) or 0
class AnchorCheckParser(HTMLParser):
"""Specialized HTML parser that looks for a specific anchor."""
@ -406,7 +414,7 @@ class CheckExternalLinksBuilder(Builder):
if 'refuri' not in refnode:
continue
uri = refnode['refuri']
lineno = get_node_line(refnode)
lineno = node_line_or_0(refnode)
uri_info = (CHECK_IMMEDIATELY, uri, docname, lineno)
self.wqueue.put(uri_info, False)
n += 1
@ -415,7 +423,7 @@ class CheckExternalLinksBuilder(Builder):
for imgnode in doctree.traverse(nodes.image):
uri = imgnode['candidates'].get('?')
if uri and '://' in uri:
lineno = get_node_line(imgnode)
lineno = node_line_or_0(imgnode)
uri_info = (CHECK_IMMEDIATELY, uri, docname, lineno)
self.wqueue.put(uri_info, False)
n += 1

View File

@ -0,0 +1 @@
exclude_patterns = ['_build']

View File

@ -0,0 +1,6 @@
.. image:: http://localhost:7777/
:target: http://localhost:7777/
`weblate.org`_
.. _weblate.org: http://localhost:7777/

View File

@ -573,3 +573,40 @@ def test_limit_rate_bails_out_after_waiting_max_time(app):
checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)}
next_check = checker.limit_rate(FakeResponse())
assert next_check is None
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver-two-links', freshenv=True,
)
def test_priorityqueue_items_are_comparable(app):
with http_server(OKHandler):
app.builder.build_all()
content = (app.outdir / 'output.json').read_text()
rows = [json.loads(x) for x in sorted(content.splitlines())]
assert rows == [
{
'filename': 'index.rst',
# Should not be None.
'lineno': 0,
'status': 'working',
'code': 0,
'uri': 'http://localhost:7777/',
'info': '',
},
{
'filename': 'index.rst',
'lineno': 0,
'status': 'working',
'code': 0,
'uri': 'http://localhost:7777/',
'info': '',
},
{
'filename': 'index.rst',
'lineno': 4,
'status': 'working',
'code': 0,
'uri': 'http://localhost:7777/',
'info': '',
}
]