mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
linkcheck: dont check anchors starting with "!".
Add changelog entry for #1099.
This commit is contained in:
parent
5e8642983b
commit
222edf59e7
4
CHANGES
4
CHANGES
@ -53,7 +53,9 @@ Features added
|
|||||||
* #1779: Add EPUB 3 builder
|
* #1779: Add EPUB 3 builder
|
||||||
* #1751: Add :confval:`todo_link_only` to avoid file path and line indication on
|
* #1751: Add :confval:`todo_link_only` to avoid file path and line indication on
|
||||||
:rst:dir:`todolist`. Thanks to Francesco Montesano.
|
:rst:dir:`todolist`. Thanks to Francesco Montesano.
|
||||||
* #2199: Use ``imagesize`` package to obtain size of images
|
* #2199: Use ``imagesize`` package to obtain size of images.
|
||||||
|
* #1099: Add configurable retries to the linkcheck builder. Thanks to Alex Gaynor.
|
||||||
|
Also don't check anchors starting with ``!``.
|
||||||
|
|
||||||
Bugs fixed
|
Bugs fixed
|
||||||
----------
|
----------
|
||||||
|
@ -75,11 +75,11 @@ class AnchorCheckParser(HTMLParser):
|
|||||||
self.found = True
|
self.found = True
|
||||||
|
|
||||||
|
|
||||||
def check_anchor(f, hash):
|
def check_anchor(f, anchor):
|
||||||
"""Reads HTML data from a filelike object 'f' searching for anchor 'hash'.
|
"""Reads HTML data from a filelike object 'f' searching for *anchor*.
|
||||||
Returns True if anchor was found, False otherwise.
|
Returns True if anchor was found, False otherwise.
|
||||||
"""
|
"""
|
||||||
parser = AnchorCheckParser(hash)
|
parser = AnchorCheckParser(anchor)
|
||||||
try:
|
try:
|
||||||
# Read file in chunks of 8192 bytes. If we find a matching anchor, we
|
# Read file in chunks of 8192 bytes. If we find a matching anchor, we
|
||||||
# break the loop early in hopes not to have to download the whole thing.
|
# break the loop early in hopes not to have to download the whole thing.
|
||||||
@ -140,10 +140,10 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
def check_uri():
|
def check_uri():
|
||||||
# split off anchor
|
# split off anchor
|
||||||
if '#' in uri:
|
if '#' in uri:
|
||||||
req_url, hash = uri.split('#', 1)
|
req_url, anchor = uri.split('#', 1)
|
||||||
else:
|
else:
|
||||||
req_url = uri
|
req_url = uri
|
||||||
hash = None
|
anchor = None
|
||||||
|
|
||||||
# handle non-ASCII URIs
|
# handle non-ASCII URIs
|
||||||
try:
|
try:
|
||||||
@ -152,8 +152,11 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
req_url = encode_uri(req_url)
|
req_url = encode_uri(req_url)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if hash and self.app.config.linkcheck_anchors:
|
if anchor and self.app.config.linkcheck_anchors and \
|
||||||
# Read the whole document and see if #hash exists
|
not anchor.startswith('!'):
|
||||||
|
# Read the whole document and see if #anchor exists
|
||||||
|
# (Anchors starting with ! are ignored since they are
|
||||||
|
# commonly used for dynamic pages)
|
||||||
req = Request(req_url)
|
req = Request(req_url)
|
||||||
f = opener.open(req, **kwargs)
|
f = opener.open(req, **kwargs)
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
@ -161,11 +164,12 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
encoding = f.headers.get_content_charset() or encoding
|
encoding = f.headers.get_content_charset() or encoding
|
||||||
else:
|
else:
|
||||||
encoding = get_content_charset(f) or encoding
|
encoding = get_content_charset(f) or encoding
|
||||||
found = check_anchor(TextIOWrapper(f, encoding), unquote(hash))
|
found = check_anchor(TextIOWrapper(f, encoding),
|
||||||
|
unquote(anchor))
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
if not found:
|
if not found:
|
||||||
raise Exception("Anchor '%s' not found" % hash)
|
raise Exception("Anchor '%s' not found" % anchor)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
# try a HEAD request, which should be easier on
|
# try a HEAD request, which should be easier on
|
||||||
@ -193,8 +197,8 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
return 'working', '', 0
|
return 'working', '', 0
|
||||||
else:
|
else:
|
||||||
new_url = f.url
|
new_url = f.url
|
||||||
if hash:
|
if anchor:
|
||||||
new_url += '#' + hash
|
new_url += '#' + anchor
|
||||||
code = getattr(req, 'redirect_code', 0)
|
code = getattr(req, 'redirect_code', 0)
|
||||||
return 'redirected', new_url, code
|
return 'redirected', new_url, code
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user