linkcheck: Check the source URL of raw directives

Add raw directives' source URL to the list of links to check with linkcheck.
By the way, refactor HyperlinkCollector by adding `add_uri` function.
Add test for linkcheck raw directives source URL
This commit is contained in:
n-peugnet 2022-08-07 11:02:45 +02:00
parent 59056aa781
commit 1553cc3b36
4 changed files with 36 additions and 19 deletions

View File

@ -15,6 +15,8 @@ Features added
- #10286: C++, support requires clauses not just between the template - #10286: C++, support requires clauses not just between the template
parameter lists and the declaration. parameter lists and the declaration.
- #10755: linkcheck: Check the source URL of raw directives that use the ``url``
option.
Bugs fixed Bugs fixed
---------- ----------

View File

@ -502,32 +502,34 @@ class HyperlinkCollector(SphinxPostTransform):
builder = cast(CheckExternalLinksBuilder, self.app.builder) builder = cast(CheckExternalLinksBuilder, self.app.builder)
hyperlinks = builder.hyperlinks hyperlinks = builder.hyperlinks
def add_uri(uri: str, node: nodes.Element) -> None:
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri
lineno = get_node_line(node)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
hyperlinks[uri] = uri_info
# reference nodes # reference nodes
for refnode in self.document.findall(nodes.reference): for refnode in self.document.findall(nodes.reference):
if 'refuri' not in refnode: if 'refuri' not in refnode:
continue continue
uri = refnode['refuri'] uri = refnode['refuri']
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri) add_uri(uri, refnode)
if newuri:
uri = newuri
lineno = get_node_line(refnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
hyperlinks[uri] = uri_info
# image nodes # image nodes
for imgnode in self.document.findall(nodes.image): for imgnode in self.document.findall(nodes.image):
uri = imgnode['candidates'].get('?') uri = imgnode['candidates'].get('?')
if uri and '://' in uri: if uri and '://' in uri:
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri) add_uri(uri, imgnode)
if newuri:
uri = newuri
lineno = get_node_line(imgnode) # raw nodes
uri_info = Hyperlink(uri, self.env.docname, lineno) for rawnode in self.document.findall(nodes.raw):
if uri not in hyperlinks: uri = rawnode.get('source')
hyperlinks[uri] = uri_info if uri and '://' in uri:
add_uri(uri, rawnode)
def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]: def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]:

View File

@ -17,3 +17,6 @@ Some additional anchors to exercise ignore code
.. image:: https://www.google.com/image.png .. image:: https://www.google.com/image.png
.. figure:: https://www.google.com/image2.png .. figure:: https://www.google.com/image2.png
.. raw:: html
:url: https://www.sphinx-doc.org/

View File

@ -41,7 +41,7 @@ def test_defaults(app):
assert "Not Found for url: https://www.google.com/image2.png" in content assert "Not Found for url: https://www.google.com/image2.png" in content
# looking for local file should fail # looking for local file should fail
assert "[broken] path/to/notfound" in content assert "[broken] path/to/notfound" in content
assert len(content.splitlines()) == 6 assert len(content.splitlines()) == 7
@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True) @pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
@ -58,8 +58,8 @@ def test_defaults_json(app):
"info"]: "info"]:
assert attr in row assert attr in row
assert len(content.splitlines()) == 11 assert len(content.splitlines()) == 12
assert len(rows) == 11 assert len(rows) == 12
# the output order of the rows is not stable # the output order of the rows is not stable
# due to possible variance in network latency # due to possible variance in network latency
rowsby = {row["uri"]: row for row in rows} rowsby = {row["uri"]: row for row in rows}
@ -80,7 +80,7 @@ def test_defaults_json(app):
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist' assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
assert rowsby['https://www.google.com/image2.png'] == { assert rowsby['https://www.google.com/image2.png'] == {
'filename': 'links.txt', 'filename': 'links.txt',
'lineno': 19, 'lineno': 20,
'status': 'broken', 'status': 'broken',
'code': 0, 'code': 0,
'uri': 'https://www.google.com/image2.png', 'uri': 'https://www.google.com/image2.png',
@ -94,6 +94,15 @@ def test_defaults_json(app):
# images should fail # images should fail
assert "Not Found for url: https://www.google.com/image.png" in \ assert "Not Found for url: https://www.google.com/image.png" in \
rowsby["https://www.google.com/image.png"]["info"] rowsby["https://www.google.com/image.png"]["info"]
# raw nodes' url should be checked too
assert rowsby["https://www.sphinx-doc.org/"] == {
'filename': 'links.txt',
'lineno': 21,
'status': 'redirected',
'code': 302,
'uri': 'https://www.sphinx-doc.org/',
'info': 'https://www.sphinx-doc.org/en/master/'
}
@pytest.mark.sphinx( @pytest.mark.sphinx(
@ -102,6 +111,7 @@ def test_defaults_json(app):
'linkcheck_ignore': [ 'linkcheck_ignore': [
'https://localhost:7777/doesnotexist', 'https://localhost:7777/doesnotexist',
'http://www.sphinx-doc.org/en/master/index.html#', 'http://www.sphinx-doc.org/en/master/index.html#',
'https://www.sphinx-doc.org/',
'https://www.google.com/image.png', 'https://www.google.com/image.png',
'https://www.google.com/image2.png', 'https://www.google.com/image2.png',
'path/to/notfound'] 'path/to/notfound']