mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
linkcheck: Store the original (unquoted) anchor (#12206)
This commit is contained in:
parent
6d6feb240f
commit
2008aa8c78
@ -13,7 +13,7 @@ from os import path
|
|||||||
from queue import PriorityQueue, Queue
|
from queue import PriorityQueue, Queue
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from typing import TYPE_CHECKING, NamedTuple, cast
|
from typing import TYPE_CHECKING, NamedTuple, cast
|
||||||
from urllib.parse import unquote, urlparse, urlsplit, urlunparse
|
from urllib.parse import quote, unquote, urlparse, urlsplit, urlunparse
|
||||||
|
|
||||||
from docutils import nodes
|
from docutils import nodes
|
||||||
from requests.exceptions import ConnectionError, HTTPError, SSLError, TooManyRedirects
|
from requests.exceptions import ConnectionError, HTTPError, SSLError, TooManyRedirects
|
||||||
@ -409,6 +409,7 @@ class HyperlinkAvailabilityCheckWorker(Thread):
|
|||||||
if rex.match(req_url):
|
if rex.match(req_url):
|
||||||
anchor = ''
|
anchor = ''
|
||||||
break
|
break
|
||||||
|
anchor = unquote(anchor)
|
||||||
|
|
||||||
# handle non-ASCII URIs
|
# handle non-ASCII URIs
|
||||||
try:
|
try:
|
||||||
@ -446,7 +447,7 @@ class HyperlinkAvailabilityCheckWorker(Thread):
|
|||||||
) as response:
|
) as response:
|
||||||
if (self.check_anchors and response.ok and anchor
|
if (self.check_anchors and response.ok and anchor
|
||||||
and not contains_anchor(response, anchor)):
|
and not contains_anchor(response, anchor)):
|
||||||
raise Exception(__(f'Anchor {anchor!r} not found'))
|
raise Exception(__(f'Anchor {quote(anchor)!r} not found'))
|
||||||
|
|
||||||
# Copy data we need from the (closed) response
|
# Copy data we need from the (closed) response
|
||||||
status_code = response.status_code
|
status_code = response.status_code
|
||||||
@ -592,7 +593,7 @@ def _get_request_headers(
|
|||||||
|
|
||||||
def contains_anchor(response: Response, anchor: str) -> bool:
|
def contains_anchor(response: Response, anchor: str) -> bool:
|
||||||
"""Determine if an anchor is contained within an HTTP response."""
|
"""Determine if an anchor is contained within an HTTP response."""
|
||||||
parser = AnchorCheckParser(unquote(anchor))
|
parser = AnchorCheckParser(anchor)
|
||||||
# Read file in chunks. If we find a matching anchor, we break
|
# Read file in chunks. If we find a matching anchor, we break
|
||||||
# the loop early in hopes not to have to download the whole thing.
|
# the loop early in hopes not to have to download the whole thing.
|
||||||
for chunk in response.iter_content(chunk_size=4096, decode_unicode=True):
|
for chunk in response.iter_content(chunk_size=4096, decode_unicode=True):
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
* `Example valid url, no anchor <http://localhost:7777/valid>`_
|
* `Example valid url, no anchor <http://localhost:7777/valid>`_
|
||||||
* `Example valid url, valid anchor <http://localhost:7777/valid#valid-anchor>`_
|
* `Example valid url, valid anchor <http://localhost:7777/valid#valid-anchor>`_
|
||||||
|
* `Example valid url, valid quotable anchor <http://localhost:7777/valid#py:module::urllib.parse>`_
|
||||||
* `Example valid url, invalid anchor <http://localhost:7777/valid#invalid-anchor>`_
|
* `Example valid url, invalid anchor <http://localhost:7777/valid#invalid-anchor>`_
|
||||||
* `Example ignored url, no anchor <http://localhost:7777/ignored>`_
|
* `Example ignored url, no anchor <http://localhost:7777/ignored>`_
|
||||||
* `Example ignored url, invalid anchor <http://localhost:7777/ignored#invalid-anchor>`_
|
* `Example ignored url, invalid anchor <http://localhost:7777/ignored#invalid-anchor>`_
|
||||||
|
@ -295,7 +295,7 @@ def test_anchors_ignored_for_url(app):
|
|||||||
|
|
||||||
attrs = ('filename', 'lineno', 'status', 'code', 'uri', 'info')
|
attrs = ('filename', 'lineno', 'status', 'code', 'uri', 'info')
|
||||||
data = [json.loads(x) for x in content.splitlines()]
|
data = [json.loads(x) for x in content.splitlines()]
|
||||||
assert len(data) == 7
|
assert len(data) == 8
|
||||||
assert all(all(attr in row for attr in attrs) for row in data)
|
assert all(all(attr in row for attr in attrs) for row in data)
|
||||||
|
|
||||||
# rows may be unsorted due to network latency or
|
# rows may be unsorted due to network latency or
|
||||||
@ -304,6 +304,7 @@ def test_anchors_ignored_for_url(app):
|
|||||||
|
|
||||||
assert rows[f'http://{address}/valid']['status'] == 'working'
|
assert rows[f'http://{address}/valid']['status'] == 'working'
|
||||||
assert rows[f'http://{address}/valid#valid-anchor']['status'] == 'working'
|
assert rows[f'http://{address}/valid#valid-anchor']['status'] == 'working'
|
||||||
|
assert rows['http://localhost:7777/valid#py:module::urllib.parse']['status'] == 'broken'
|
||||||
assert rows[f'http://{address}/valid#invalid-anchor'] == {
|
assert rows[f'http://{address}/valid#invalid-anchor'] == {
|
||||||
'status': 'broken',
|
'status': 'broken',
|
||||||
'info': "Anchor 'invalid-anchor' not found",
|
'info': "Anchor 'invalid-anchor' not found",
|
||||||
|
Loading…
Reference in New Issue
Block a user