Close #9016: linkcheck builder failed to check the anchors of github.com

This commit is contained in:
Takeshi KOMIYA 2021-05-22 14:13:19 +09:00
parent 5c275191b6
commit 92335bd6e6
5 changed files with 48 additions and 3 deletions

View File

@ -42,6 +42,9 @@ Features added
text
* #9176: i18n: Emit a debug message if message catalog file not found under
:confval:`locale_dirs`
* #9016: linkcheck: failed to check the anchor of github.com
* #9016: linkcheck: Add a new event :event:`linkcheck-process-uri` to modify
URIs before checking hyperlinks
* #1874: py domain: Support union types using ``|`` in info-field-list
* #9097: Optimize the paralell build
* #9131: Add :confval:`nitpick_ignore_regex` to ignore nitpicky warnings using

View File

@ -384,6 +384,14 @@ Here is a more detailed list of these events.
.. versionchanged:: 1.3
The return value can now specify a template name.
.. event:: linkcheck-process-uri (app, uri)
Emitted when the linkcheck builder collects hyperlinks from document. *uri*
is a collected URI. The event handlers can modify the URI by returning a
string.
.. versionadded:: 4.1
.. event:: build-finished (app, exception)
Emitted when a build has finished, before Sphinx exits, usually used for

View File

@ -627,6 +627,10 @@ class HyperlinkCollector(SphinxPostTransform):
if 'refuri' not in refnode:
continue
uri = refnode['refuri']
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri
lineno = get_node_line(refnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
@ -636,12 +640,33 @@ class HyperlinkCollector(SphinxPostTransform):
for imgnode in self.document.traverse(nodes.image):
uri = imgnode['candidates'].get('?')
if uri and '://' in uri:
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri
lineno = get_node_line(imgnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
hyperlinks[uri] = uri_info
def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]:
"""Rewrite anchor name of the hyperlink to github.com
The hyperlink anchors in github.com are dynamically generated. This rewrites
them before checking and makes them comparable.
"""
if re.search('://github.com/', uri) and '#' in uri:
baseuri, anchor = uri.split('#', 1)
if anchor.startswith('user-content-'):
# Ignored when URI is already prefixed.
return None
else:
return f'{baseuri}#user-content-{anchor}'
else:
return None
def setup(app: Sphinx) -> Dict[str, Any]:
app.add_builder(CheckExternalLinksBuilder)
app.add_post_transform(HyperlinkCollector)
@ -658,6 +683,9 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value('linkcheck_anchors_ignore', ["^!"], None)
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, None)
app.add_event('linkcheck-process-uri')
app.connect('linkcheck-process-uri', rewrite_github_anchor)
return {
'version': 'builtin',
'parallel_read_safe': True,

View File

@ -13,6 +13,8 @@ Some additional anchors to exercise ignore code
* `Complete nonsense <https://localhost:7777/doesnotexist>`_
* `Example valid local file <conf.py>`_
* `Example invalid local file <path/to/notfound>`_
* https://github.com/sphinx-doc/sphinx#documentation
* https://github.com/sphinx-doc/sphinx#user-content-testing
.. image:: https://www.google.com/image.png
.. figure:: https://www.google.com/image2.png

View File

@ -65,8 +65,8 @@ def test_defaults_json(app):
"info"]:
assert attr in row
assert len(content.splitlines()) == 10
assert len(rows) == 10
assert len(content.splitlines()) == 12
assert len(rows) == 12
# the output order of the rows is not stable
# due to possible variance in network latency
rowsby = {row["uri"]: row for row in rows}
@ -87,7 +87,7 @@ def test_defaults_json(app):
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
assert rowsby['https://www.google.com/image2.png'] == {
'filename': 'links.txt',
'lineno': 18,
'lineno': 20,
'status': 'broken',
'code': 0,
'uri': 'https://www.google.com/image2.png',
@ -101,6 +101,10 @@ def test_defaults_json(app):
# images should fail
assert "Not Found for url: https://www.google.com/image.png" in \
rowsby["https://www.google.com/image.png"]["info"]
# The anchor of the URI for github.com is automatically modified
assert 'https://github.com/sphinx-doc/sphinx#documentation' not in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-documentation' in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-testing' in rowsby
@pytest.mark.sphinx(