mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Merge pull request #2495 from electrofelix/linkcheck-skip-anchors
Allow skipping anchor checking using regex
This commit is contained in:
commit
a503849ac1
@ -2106,6 +2106,17 @@ Options for the linkcheck builder
|
|||||||
|
|
||||||
.. versionadded:: 1.2
|
.. versionadded:: 1.2
|
||||||
|
|
||||||
|
.. confval:: linkcheck_anchors_ignore
|
||||||
|
|
||||||
|
A list of regular expressions that match URIs that should skip checking
|
||||||
|
the validity of anchors in links. This allows skipping entire sites, where
|
||||||
|
anchors are used to control dynamic pages, or just specific anchors within
|
||||||
|
a page, where javascript is used to add anchors dynamically, or use the
|
||||||
|
fragment as part of to trigger an internal REST request. Default is
|
||||||
|
``["/#!"]``.
|
||||||
|
|
||||||
|
.. versionadded:: 1.5
|
||||||
|
|
||||||
|
|
||||||
Options for the XML builder
|
Options for the XML builder
|
||||||
---------------------------
|
---------------------------
|
||||||
|
@ -82,6 +82,8 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
|
|
||||||
def init(self):
|
def init(self):
|
||||||
self.to_ignore = [re.compile(x) for x in self.app.config.linkcheck_ignore]
|
self.to_ignore = [re.compile(x) for x in self.app.config.linkcheck_ignore]
|
||||||
|
self.anchors_ignore = [re.compile(x)
|
||||||
|
for x in self.app.config.linkcheck_anchors_ignore]
|
||||||
self.good = set()
|
self.good = set()
|
||||||
self.broken = {}
|
self.broken = {}
|
||||||
self.redirected = {}
|
self.redirected = {}
|
||||||
@ -112,6 +114,10 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
# split off anchor
|
# split off anchor
|
||||||
if '#' in uri:
|
if '#' in uri:
|
||||||
req_url, anchor = uri.split('#', 1)
|
req_url, anchor = uri.split('#', 1)
|
||||||
|
for rex in self.anchors_ignore:
|
||||||
|
if rex.match(anchor):
|
||||||
|
anchor = None
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
req_url = uri
|
req_url = uri
|
||||||
anchor = None
|
anchor = None
|
||||||
@ -123,11 +129,8 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
req_url = encode_uri(req_url)
|
req_url = encode_uri(req_url)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if anchor and self.app.config.linkcheck_anchors and \
|
if anchor and self.app.config.linkcheck_anchors:
|
||||||
not anchor.startswith('!'):
|
|
||||||
# Read the whole document and see if #anchor exists
|
# Read the whole document and see if #anchor exists
|
||||||
# (Anchors starting with ! are ignored since they are
|
|
||||||
# commonly used for dynamic pages)
|
|
||||||
response = requests.get(req_url, stream=True, headers=self.headers,
|
response = requests.get(req_url, stream=True, headers=self.headers,
|
||||||
**kwargs)
|
**kwargs)
|
||||||
found = check_anchor(response, unquote(anchor))
|
found = check_anchor(response, unquote(anchor))
|
||||||
@ -294,3 +297,6 @@ def setup(app):
|
|||||||
app.add_config_value('linkcheck_timeout', None, None, [int])
|
app.add_config_value('linkcheck_timeout', None, None, [int])
|
||||||
app.add_config_value('linkcheck_workers', 5, None)
|
app.add_config_value('linkcheck_workers', 5, None)
|
||||||
app.add_config_value('linkcheck_anchors', True, None)
|
app.add_config_value('linkcheck_anchors', True, None)
|
||||||
|
# Anchors starting with ! are ignored since they are
|
||||||
|
# commonly used for dynamic pages
|
||||||
|
app.add_config_value('linkcheck_anchors_ignore', ["^!"], None)
|
||||||
|
@ -2,3 +2,10 @@ This is from CPython documentation.
|
|||||||
|
|
||||||
* Also, if there is a `default namespace <https://www.w3.org/TR/2006/REC-xml-names-20060816/#defaulting>`__, that full URI gets prepended to all of the non-prefixed tags.
|
* Also, if there is a `default namespace <https://www.w3.org/TR/2006/REC-xml-names-20060816/#defaulting>`__, that full URI gets prepended to all of the non-prefixed tags.
|
||||||
* The `SSMEDIAN <https://help.gnome.org/users/gnumeric/stable/gnumeric.html#gnumeric-function-SSMEDIAN>`_ function in the Gnome Gnumeric spreadsheet.
|
* The `SSMEDIAN <https://help.gnome.org/users/gnumeric/stable/gnumeric.html#gnumeric-function-SSMEDIAN>`_ function in the Gnome Gnumeric spreadsheet.
|
||||||
|
|
||||||
|
|
||||||
|
Some additional anchors to exercise ignore code
|
||||||
|
|
||||||
|
* `Example Bar invalid <http://example.com/#!bar>`_
|
||||||
|
* `Example Bar invalid <http://example.com#!bar>`_ tests that default ignore anchor of #! does not need to be prefixed with /
|
||||||
|
* `Example Bar invalid <http://example.com/#top>`_
|
||||||
|
@ -14,12 +14,25 @@ from util import with_app
|
|||||||
|
|
||||||
|
|
||||||
@with_app('linkcheck', testroot='linkcheck', freshenv=True)
|
@with_app('linkcheck', testroot='linkcheck', freshenv=True)
|
||||||
def test_all(app, status, warning):
|
def test_defaults(app, status, warning):
|
||||||
app.builder.build_all()
|
app.builder.build_all()
|
||||||
|
|
||||||
assert (app.outdir / 'output.txt').exists()
|
assert (app.outdir / 'output.txt').exists()
|
||||||
content = (app.outdir / 'output.txt').text()
|
content = (app.outdir / 'output.txt').text()
|
||||||
|
|
||||||
# expect all ok
|
print(content)
|
||||||
assert not content
|
# looking for #top should fail
|
||||||
|
assert "Anchor 'top' not found" in content
|
||||||
|
assert len(content.splitlines()) == 1
|
||||||
|
|
||||||
|
|
||||||
|
@with_app('linkcheck', testroot='linkcheck', freshenv=True,
|
||||||
|
confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"]})
|
||||||
|
def test_anchors_ignored(app, status, warning):
|
||||||
|
app.builder.build_all()
|
||||||
|
|
||||||
|
assert (app.outdir / 'output.txt').exists()
|
||||||
|
content = (app.outdir / 'output.txt').text()
|
||||||
|
|
||||||
|
# expect all ok when excluding #top
|
||||||
|
assert not content
|
||||||
|
Loading…
Reference in New Issue
Block a user