diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 9178458b1..7d75cac98 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -13,7 +13,7 @@ from os import path from queue import PriorityQueue, Queue from threading import Thread from typing import TYPE_CHECKING, NamedTuple, cast -from urllib.parse import unquote, urlparse, urlsplit, urlunparse +from urllib.parse import quote, unquote, urlparse, urlsplit, urlunparse from docutils import nodes from requests.exceptions import ConnectionError, HTTPError, SSLError, TooManyRedirects @@ -409,6 +409,7 @@ class HyperlinkAvailabilityCheckWorker(Thread): if rex.match(req_url): anchor = '' break + anchor = unquote(anchor) # handle non-ASCII URIs try: @@ -446,7 +447,7 @@ class HyperlinkAvailabilityCheckWorker(Thread): ) as response: if (self.check_anchors and response.ok and anchor and not contains_anchor(response, anchor)): - raise Exception(__(f'Anchor {anchor!r} not found')) + raise Exception(__(f'Anchor {quote(anchor)!r} not found')) # Copy data we need from the (closed) response status_code = response.status_code @@ -592,7 +593,7 @@ def _get_request_headers( def contains_anchor(response: Response, anchor: str) -> bool: """Determine if an anchor is contained within an HTTP response.""" - parser = AnchorCheckParser(unquote(anchor)) + parser = AnchorCheckParser(anchor) # Read file in chunks. If we find a matching anchor, we break # the loop early in hopes not to have to download the whole thing. for chunk in response.iter_content(chunk_size=4096, decode_unicode=True): diff --git a/tests/roots/test-linkcheck-anchors-ignore-for-url/index.rst b/tests/roots/test-linkcheck-anchors-ignore-for-url/index.rst index df287b4c4..02969b63e 100644 --- a/tests/roots/test-linkcheck-anchors-ignore-for-url/index.rst +++ b/tests/roots/test-linkcheck-anchors-ignore-for-url/index.rst @@ -1,5 +1,6 @@ * `Example valid url, no anchor `_ * `Example valid url, valid anchor `_ +* `Example valid url, valid quotable anchor `_ * `Example valid url, invalid anchor `_ * `Example ignored url, no anchor `_ * `Example ignored url, invalid anchor `_ diff --git a/tests/test_builders/test_build_linkcheck.py b/tests/test_builders/test_build_linkcheck.py index c8d8515af..f3ff64c08 100644 --- a/tests/test_builders/test_build_linkcheck.py +++ b/tests/test_builders/test_build_linkcheck.py @@ -295,7 +295,7 @@ def test_anchors_ignored_for_url(app): attrs = ('filename', 'lineno', 'status', 'code', 'uri', 'info') data = [json.loads(x) for x in content.splitlines()] - assert len(data) == 7 + assert len(data) == 8 assert all(all(attr in row for attr in attrs) for row in data) # rows may be unsorted due to network latency or @@ -304,6 +304,7 @@ def test_anchors_ignored_for_url(app): assert rows[f'http://{address}/valid']['status'] == 'working' assert rows[f'http://{address}/valid#valid-anchor']['status'] == 'working' + assert rows['http://localhost:7777/valid#py:module::urllib.parse']['status'] == 'broken' assert rows[f'http://{address}/valid#invalid-anchor'] == { 'status': 'broken', 'info': "Anchor 'invalid-anchor' not found",