diff --git a/doc/config.rst b/doc/config.rst index c34f71db1..ec26d7b2f 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -2106,6 +2106,17 @@ Options for the linkcheck builder .. versionadded:: 1.2 +.. confval:: linkcheck_anchors_ignore + + A list of regular expressions that match URIs that should skip checking + the validity of anchors in links. This allows skipping entire sites, where + anchors are used to control dynamic pages, or just specific anchors within + a page, where javascript is used to add anchors dynamically, or use the + fragment as part of to trigger an internal REST request. Default is + ``["/#!"]``. + + .. versionadded:: 1.5 + Options for the XML builder --------------------------- diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index e53cabb62..f49f4f9a3 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -82,6 +82,8 @@ class CheckExternalLinksBuilder(Builder): def init(self): self.to_ignore = [re.compile(x) for x in self.app.config.linkcheck_ignore] + self.anchors_ignore = [re.compile(x) + for x in self.app.config.linkcheck_anchors_ignore] self.good = set() self.broken = {} self.redirected = {} @@ -112,6 +114,10 @@ class CheckExternalLinksBuilder(Builder): # split off anchor if '#' in uri: req_url, anchor = uri.split('#', 1) + for rex in self.anchors_ignore: + if rex.match(anchor): + anchor = None + break else: req_url = uri anchor = None @@ -123,11 +129,8 @@ class CheckExternalLinksBuilder(Builder): req_url = encode_uri(req_url) try: - if anchor and self.app.config.linkcheck_anchors and \ - not anchor.startswith('!'): + if anchor and self.app.config.linkcheck_anchors: # Read the whole document and see if #anchor exists - # (Anchors starting with ! are ignored since they are - # commonly used for dynamic pages) response = requests.get(req_url, stream=True, headers=self.headers, **kwargs) found = check_anchor(response, unquote(anchor)) @@ -294,3 +297,6 @@ def setup(app): app.add_config_value('linkcheck_timeout', None, None, [int]) app.add_config_value('linkcheck_workers', 5, None) app.add_config_value('linkcheck_anchors', True, None) + # Anchors starting with ! are ignored since they are + # commonly used for dynamic pages + app.add_config_value('linkcheck_anchors_ignore', ["^!"], None) diff --git a/tests/roots/test-linkcheck/links.txt b/tests/roots/test-linkcheck/links.txt index c3ec7235e..ef3607970 100644 --- a/tests/roots/test-linkcheck/links.txt +++ b/tests/roots/test-linkcheck/links.txt @@ -2,3 +2,10 @@ This is from CPython documentation. * Also, if there is a `default namespace `__, that full URI gets prepended to all of the non-prefixed tags. * The `SSMEDIAN `_ function in the Gnome Gnumeric spreadsheet. + + +Some additional anchors to exercise ignore code + +* `Example Bar invalid `_ +* `Example Bar invalid `_ tests that default ignore anchor of #! does not need to be prefixed with / +* `Example Bar invalid `_ diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 700642901..1d75135af 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -14,12 +14,25 @@ from util import with_app @with_app('linkcheck', testroot='linkcheck', freshenv=True) -def test_all(app, status, warning): +def test_defaults(app, status, warning): app.builder.build_all() assert (app.outdir / 'output.txt').exists() content = (app.outdir / 'output.txt').text() - # expect all ok - assert not content + print(content) + # looking for #top should fail + assert "Anchor 'top' not found" in content + assert len(content.splitlines()) == 1 + +@with_app('linkcheck', testroot='linkcheck', freshenv=True, + confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"]}) +def test_anchors_ignored(app, status, warning): + app.builder.build_all() + + assert (app.outdir / 'output.txt').exists() + content = (app.outdir / 'output.txt').text() + + # expect all ok when excluding #top + assert not content