linkcheck: add `HyperlinkCollector.find_url()` (#12213)

Signed-off-by: James Knight <james.d.knight@live.com>
Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com>
This commit is contained in:
James Knight
2024-04-24 14:19:44 -04:00
committed by GitHub
parent 208f4bcb66
commit 04b229fe4d

View File

@@ -178,41 +178,67 @@ class HyperlinkCollector(SphinxPostTransform):
default_priority = 800
def run(self, **kwargs: Any) -> None:
for node in self.document.findall():
if uri := self.find_uri(node):
self._add_uri(uri, node)
def find_uri(self, node: nodes.Element) -> str | None:
"""Find a URI for a given node.
This call can be used to retrieve a URI from a provided node. If no
URI exists for a provided node, this call will return ``None``.
This method can be useful for extension developers who wish to
easily inject hyperlinks into a builder by only needing to override
this method.
:param node: A node class
:returns: URI of the node
"""
# reference nodes
if isinstance(node, nodes.reference):
if 'refuri' in node:
return node['refuri']
# image nodes
if isinstance(node, nodes.image):
uri = node['candidates'].get('?')
if uri and '://' in uri:
return uri
# raw nodes
if isinstance(node, nodes.raw):
uri = node.get('source')
if uri and '://' in uri:
return uri
return None
def _add_uri(self, uri: str, node: nodes.Element) -> None:
"""Registers a node's URI into a builder's collection of hyperlinks.
Provides the ability to register a URI value determined from a node
into the linkcheck's builder. URI's processed through this call can
be manipulated through a ``linkcheck-process-uri`` event before the
builder attempts to validate.
:param uri: URI to add
:param node: A node class where the URI was found
"""
builder = cast(CheckExternalLinksBuilder, self.app.builder)
hyperlinks = builder.hyperlinks
docname = self.env.docname
# reference nodes
for refnode in self.document.findall(nodes.reference):
if 'refuri' in refnode:
uri = refnode['refuri']
_add_uri(self.app, uri, refnode, hyperlinks, docname)
if newuri := self.app.emit_firstresult('linkcheck-process-uri', uri):
uri = newuri
# image nodes
for imgnode in self.document.findall(nodes.image):
uri = imgnode['candidates'].get('?')
if uri and '://' in uri:
_add_uri(self.app, uri, imgnode, hyperlinks, docname)
try:
lineno = get_node_line(node)
except ValueError:
lineno = -1
# raw nodes
for rawnode in self.document.findall(nodes.raw):
uri = rawnode.get('source')
if uri and '://' in uri:
_add_uri(self.app, uri, rawnode, hyperlinks, docname)
def _add_uri(app: Sphinx, uri: str, node: nodes.Element,
hyperlinks: dict[str, Hyperlink], docname: str) -> None:
if newuri := app.emit_firstresult('linkcheck-process-uri', uri):
uri = newuri
try:
lineno = get_node_line(node)
except ValueError:
lineno = -1
if uri not in hyperlinks:
hyperlinks[uri] = Hyperlink(uri, docname, app.env.doc2path(docname), lineno)
if uri not in hyperlinks:
hyperlinks[uri] = Hyperlink(uri, docname, self.env.doc2path(docname), lineno)
class Hyperlink(NamedTuple):