mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Merge pull request #2495 from electrofelix/linkcheck-skip-anchors
Allow skipping anchor checking using regex
This commit is contained in:
commit
a503849ac1
@ -2106,6 +2106,17 @@ Options for the linkcheck builder
|
||||
|
||||
.. versionadded:: 1.2
|
||||
|
||||
.. confval:: linkcheck_anchors_ignore
|
||||
|
||||
A list of regular expressions that match URIs that should skip checking
|
||||
the validity of anchors in links. This allows skipping entire sites, where
|
||||
anchors are used to control dynamic pages, or just specific anchors within
|
||||
a page, where javascript is used to add anchors dynamically, or use the
|
||||
fragment as part of to trigger an internal REST request. Default is
|
||||
``["/#!"]``.
|
||||
|
||||
.. versionadded:: 1.5
|
||||
|
||||
|
||||
Options for the XML builder
|
||||
---------------------------
|
||||
|
@ -82,6 +82,8 @@ class CheckExternalLinksBuilder(Builder):
|
||||
|
||||
def init(self):
|
||||
self.to_ignore = [re.compile(x) for x in self.app.config.linkcheck_ignore]
|
||||
self.anchors_ignore = [re.compile(x)
|
||||
for x in self.app.config.linkcheck_anchors_ignore]
|
||||
self.good = set()
|
||||
self.broken = {}
|
||||
self.redirected = {}
|
||||
@ -112,6 +114,10 @@ class CheckExternalLinksBuilder(Builder):
|
||||
# split off anchor
|
||||
if '#' in uri:
|
||||
req_url, anchor = uri.split('#', 1)
|
||||
for rex in self.anchors_ignore:
|
||||
if rex.match(anchor):
|
||||
anchor = None
|
||||
break
|
||||
else:
|
||||
req_url = uri
|
||||
anchor = None
|
||||
@ -123,11 +129,8 @@ class CheckExternalLinksBuilder(Builder):
|
||||
req_url = encode_uri(req_url)
|
||||
|
||||
try:
|
||||
if anchor and self.app.config.linkcheck_anchors and \
|
||||
not anchor.startswith('!'):
|
||||
if anchor and self.app.config.linkcheck_anchors:
|
||||
# Read the whole document and see if #anchor exists
|
||||
# (Anchors starting with ! are ignored since they are
|
||||
# commonly used for dynamic pages)
|
||||
response = requests.get(req_url, stream=True, headers=self.headers,
|
||||
**kwargs)
|
||||
found = check_anchor(response, unquote(anchor))
|
||||
@ -294,3 +297,6 @@ def setup(app):
|
||||
app.add_config_value('linkcheck_timeout', None, None, [int])
|
||||
app.add_config_value('linkcheck_workers', 5, None)
|
||||
app.add_config_value('linkcheck_anchors', True, None)
|
||||
# Anchors starting with ! are ignored since they are
|
||||
# commonly used for dynamic pages
|
||||
app.add_config_value('linkcheck_anchors_ignore', ["^!"], None)
|
||||
|
@ -2,3 +2,10 @@ This is from CPython documentation.
|
||||
|
||||
* Also, if there is a `default namespace <https://www.w3.org/TR/2006/REC-xml-names-20060816/#defaulting>`__, that full URI gets prepended to all of the non-prefixed tags.
|
||||
* The `SSMEDIAN <https://help.gnome.org/users/gnumeric/stable/gnumeric.html#gnumeric-function-SSMEDIAN>`_ function in the Gnome Gnumeric spreadsheet.
|
||||
|
||||
|
||||
Some additional anchors to exercise ignore code
|
||||
|
||||
* `Example Bar invalid <http://example.com/#!bar>`_
|
||||
* `Example Bar invalid <http://example.com#!bar>`_ tests that default ignore anchor of #! does not need to be prefixed with /
|
||||
* `Example Bar invalid <http://example.com/#top>`_
|
||||
|
@ -14,12 +14,25 @@ from util import with_app
|
||||
|
||||
|
||||
@with_app('linkcheck', testroot='linkcheck', freshenv=True)
|
||||
def test_all(app, status, warning):
|
||||
def test_defaults(app, status, warning):
|
||||
app.builder.build_all()
|
||||
|
||||
assert (app.outdir / 'output.txt').exists()
|
||||
content = (app.outdir / 'output.txt').text()
|
||||
|
||||
# expect all ok
|
||||
assert not content
|
||||
print(content)
|
||||
# looking for #top should fail
|
||||
assert "Anchor 'top' not found" in content
|
||||
assert len(content.splitlines()) == 1
|
||||
|
||||
|
||||
@with_app('linkcheck', testroot='linkcheck', freshenv=True,
|
||||
confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"]})
|
||||
def test_anchors_ignored(app, status, warning):
|
||||
app.builder.build_all()
|
||||
|
||||
assert (app.outdir / 'output.txt').exists()
|
||||
content = (app.outdir / 'output.txt').text()
|
||||
|
||||
# expect all ok when excluding #top
|
||||
assert not content
|
||||
|
Loading…
Reference in New Issue
Block a user