Merge pull request #9260 from tk0miya/9016_linkcheck_github_anchors

Close #9016: linkcheck builder failed to check the anchors of github.com
This commit is contained in:
Takeshi KOMIYA 2021-06-04 01:18:22 +09:00 committed by GitHub
commit 1418e3acbb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 47 additions and 4 deletions

View File

@ -43,6 +43,9 @@ Features added
text
* #9176: i18n: Emit a debug message if message catalog file not found under
:confval:`locale_dirs`
* #9016: linkcheck: Support checking anchors on github.com
* #9016: linkcheck: Add a new event :event:`linkcheck-process-uri` to modify
URIs before checking hyperlinks
* #1874: py domain: Support union types using ``|`` in info-field-list
* #9268: py domain: :confval:`python_use_unqualified_type_names` supports type
field in info-field-list

View File

@ -384,6 +384,14 @@ Here is a more detailed list of these events.
.. versionchanged:: 1.3
The return value can now specify a template name.
.. event:: linkcheck-process-uri (app, uri)
Emitted when the linkcheck builder collects hyperlinks from document. *uri*
is a collected URI. The event handlers can modify the URI by returning a
string.
.. versionadded:: 4.1
.. event:: build-finished (app, exception)
Emitted when a build has finished, before Sphinx exits, usually used for

View File

@ -21,7 +21,7 @@ from queue import PriorityQueue, Queue
from threading import Thread
from typing import (Any, Dict, Generator, List, NamedTuple, Optional, Pattern, Set, Tuple,
Union, cast)
from urllib.parse import unquote, urlparse
from urllib.parse import unquote, urlparse, urlunparse
from docutils import nodes
from docutils.nodes import Element
@ -627,6 +627,10 @@ class HyperlinkCollector(SphinxPostTransform):
if 'refuri' not in refnode:
continue
uri = refnode['refuri']
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri
lineno = get_node_line(refnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
@ -636,12 +640,31 @@ class HyperlinkCollector(SphinxPostTransform):
for imgnode in self.document.traverse(nodes.image):
uri = imgnode['candidates'].get('?')
if uri and '://' in uri:
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri
lineno = get_node_line(imgnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
hyperlinks[uri] = uri_info
def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]:
"""Rewrite anchor name of the hyperlink to github.com
The hyperlink anchors in github.com are dynamically generated. This rewrites
them before checking and makes them comparable.
"""
parsed = urlparse(uri)
if parsed.hostname == "github.com" and parsed.fragment:
prefixed = parsed.fragment.startswith('user-content-')
if not prefixed:
fragment = f'user-content-{parsed.fragment}'
return urlunparse(parsed._replace(fragment=fragment))
return None
def setup(app: Sphinx) -> Dict[str, Any]:
app.add_builder(CheckExternalLinksBuilder)
app.add_post_transform(HyperlinkCollector)
@ -658,6 +681,9 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value('linkcheck_anchors_ignore', ["^!"], None)
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, None)
app.add_event('linkcheck-process-uri')
app.connect('linkcheck-process-uri', rewrite_github_anchor)
return {
'version': 'builtin',
'parallel_read_safe': True,

View File

@ -13,6 +13,8 @@ Some additional anchors to exercise ignore code
* `Complete nonsense <https://localhost:7777/doesnotexist>`_
* `Example valid local file <conf.py>`_
* `Example invalid local file <path/to/notfound>`_
* https://github.com/sphinx-doc/sphinx#documentation
* https://github.com/sphinx-doc/sphinx#user-content-testing
.. image:: https://www.google.com/image.png
.. figure:: https://www.google.com/image2.png

View File

@ -65,8 +65,8 @@ def test_defaults_json(app):
"info"]:
assert attr in row
assert len(content.splitlines()) == 10
assert len(rows) == 10
assert len(content.splitlines()) == 12
assert len(rows) == 12
# the output order of the rows is not stable
# due to possible variance in network latency
rowsby = {row["uri"]: row for row in rows}
@ -87,7 +87,7 @@ def test_defaults_json(app):
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
assert rowsby['https://www.google.com/image2.png'] == {
'filename': 'links.txt',
'lineno': 18,
'lineno': 20,
'status': 'broken',
'code': 0,
'uri': 'https://www.google.com/image2.png',
@ -101,6 +101,10 @@ def test_defaults_json(app):
# images should fail
assert "Not Found for url: https://www.google.com/image.png" in \
rowsby["https://www.google.com/image.png"]["info"]
# The anchor of the URI for github.com is automatically modified
assert 'https://github.com/sphinx-doc/sphinx#documentation' not in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-documentation' in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-testing' in rowsby
@pytest.mark.sphinx(