linkcheck: Check the source URL of raw directives

Add raw directives' source URL to the list of links to check with linkcheck.
By the way, refactor HyperlinkCollector by adding `add_uri` function.
Add test for linkcheck raw directives source URL
This commit is contained in:
n-peugnet 2022-08-07 11:02:45 +02:00
parent 59056aa781
commit 1553cc3b36
4 changed files with 36 additions and 19 deletions

View File

@ -15,6 +15,8 @@ Features added
- #10286: C++, support requires clauses not just between the template
parameter lists and the declaration.
- #10755: linkcheck: Check the source URL of raw directives that use the ``url``
option.
Bugs fixed
----------

View File

@ -502,32 +502,34 @@ class HyperlinkCollector(SphinxPostTransform):
builder = cast(CheckExternalLinksBuilder, self.app.builder)
hyperlinks = builder.hyperlinks
def add_uri(uri: str, node: nodes.Element) -> None:
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri
lineno = get_node_line(node)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
hyperlinks[uri] = uri_info
# reference nodes
for refnode in self.document.findall(nodes.reference):
if 'refuri' not in refnode:
continue
uri = refnode['refuri']
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri
lineno = get_node_line(refnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
hyperlinks[uri] = uri_info
add_uri(uri, refnode)
# image nodes
for imgnode in self.document.findall(nodes.image):
uri = imgnode['candidates'].get('?')
if uri and '://' in uri:
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri
add_uri(uri, imgnode)
lineno = get_node_line(imgnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
hyperlinks[uri] = uri_info
# raw nodes
for rawnode in self.document.findall(nodes.raw):
uri = rawnode.get('source')
if uri and '://' in uri:
add_uri(uri, rawnode)
def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]:

View File

@ -17,3 +17,6 @@ Some additional anchors to exercise ignore code
.. image:: https://www.google.com/image.png
.. figure:: https://www.google.com/image2.png
.. raw:: html
:url: https://www.sphinx-doc.org/

View File

@ -41,7 +41,7 @@ def test_defaults(app):
assert "Not Found for url: https://www.google.com/image2.png" in content
# looking for local file should fail
assert "[broken] path/to/notfound" in content
assert len(content.splitlines()) == 6
assert len(content.splitlines()) == 7
@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
@ -58,8 +58,8 @@ def test_defaults_json(app):
"info"]:
assert attr in row
assert len(content.splitlines()) == 11
assert len(rows) == 11
assert len(content.splitlines()) == 12
assert len(rows) == 12
# the output order of the rows is not stable
# due to possible variance in network latency
rowsby = {row["uri"]: row for row in rows}
@ -80,7 +80,7 @@ def test_defaults_json(app):
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
assert rowsby['https://www.google.com/image2.png'] == {
'filename': 'links.txt',
'lineno': 19,
'lineno': 20,
'status': 'broken',
'code': 0,
'uri': 'https://www.google.com/image2.png',
@ -94,6 +94,15 @@ def test_defaults_json(app):
# images should fail
assert "Not Found for url: https://www.google.com/image.png" in \
rowsby["https://www.google.com/image.png"]["info"]
# raw nodes' url should be checked too
assert rowsby["https://www.sphinx-doc.org/"] == {
'filename': 'links.txt',
'lineno': 21,
'status': 'redirected',
'code': 302,
'uri': 'https://www.sphinx-doc.org/',
'info': 'https://www.sphinx-doc.org/en/master/'
}
@pytest.mark.sphinx(
@ -102,6 +111,7 @@ def test_defaults_json(app):
'linkcheck_ignore': [
'https://localhost:7777/doesnotexist',
'http://www.sphinx-doc.org/en/master/index.html#',
'https://www.sphinx-doc.org/',
'https://www.google.com/image.png',
'https://www.google.com/image2.png',
'path/to/notfound']