Merge pull request #2495 from electrofelix/linkcheck-skip-anchors

Allow skipping anchor checking using regex
2025-02-25 18:55:22 -06:00 · 2016-11-04 23:53:46 +09:00 · 2016-11-04 23:53:46 +09:00 · a503849ac1
commit a503849ac1
parent 56958227c8 22765990f0
4 changed files with 44 additions and 7 deletions
--- a/doc/config.rst
+++ b/doc/config.rst
@ -2106,6 +2106,17 @@ Options for the linkcheck builder
   .. versionadded:: 1.2
 .. confval:: linkcheck_anchors_ignore
   A list of regular expressions that match URIs that should skip checking
   the validity of anchors in links. This allows skipping entire sites, where
   anchors are used to control dynamic pages, or just specific anchors within
   a page, where javascript is used to add anchors dynamically, or use the
   fragment as part of to trigger an internal REST request. Default is
   ``["/#!"]``.
   .. versionadded:: 1.5
 Options for the XML builder
 ---------------------------
--- a/sphinx/builders/linkcheck.py
+++ b/sphinx/builders/linkcheck.py
@ -82,6 +82,8 @@ class CheckExternalLinksBuilder(Builder):
    def init(self):
        self.to_ignore = [re.compile(x) for x in self.app.config.linkcheck_ignore]
        self.anchors_ignore = [re.compile(x)
                               for x in self.app.config.linkcheck_anchors_ignore]
        self.good = set()
        self.broken = {}
        self.redirected = {}
@ -112,6 +114,10 @@ class CheckExternalLinksBuilder(Builder):
            # split off anchor
            if '#' in uri:
                req_url, anchor = uri.split('#', 1)
                for rex in self.anchors_ignore:
                    if rex.match(anchor):
                        anchor = None
                        break
            else:
                req_url = uri
                anchor = None
@ -123,11 +129,8 @@ class CheckExternalLinksBuilder(Builder):
                req_url = encode_uri(req_url)
            try:
-                if anchor and self.app.config.linkcheck_anchors and \
+                if anchor and self.app.config.linkcheck_anchors:
                   not anchor.startswith('!'):
                    # Read the whole document and see if #anchor exists
                    # (Anchors starting with ! are ignored since they are
                    # commonly used for dynamic pages)
                    response = requests.get(req_url, stream=True, headers=self.headers,
                                            **kwargs)
                    found = check_anchor(response, unquote(anchor))
@ -294,3 +297,6 @@ def setup(app):
    app.add_config_value('linkcheck_timeout', None, None, [int])
    app.add_config_value('linkcheck_workers', 5, None)
    app.add_config_value('linkcheck_anchors', True, None)
    # Anchors starting with ! are ignored since they are
    # commonly used for dynamic pages
    app.add_config_value('linkcheck_anchors_ignore', ["^!"], None)
--- a/tests/roots/test-linkcheck/links.txt
+++ b/tests/roots/test-linkcheck/links.txt
@ -2,3 +2,10 @@ This is from CPython documentation.
 * Also, if there is a `default namespace <https://www.w3.org/TR/2006/REC-xml-names-20060816/#defaulting>`__, that full URI gets prepended to all of the non-prefixed tags.
 * The `SSMEDIAN <https://help.gnome.org/users/gnumeric/stable/gnumeric.html#gnumeric-function-SSMEDIAN>`_ function in the Gnome Gnumeric spreadsheet.
 Some additional anchors to exercise ignore code
 * `Example Bar invalid <http://example.com/#!bar>`_
 * `Example Bar invalid <http://example.com#!bar>`_ tests that default ignore anchor of #! does not need to be prefixed with /
 * `Example Bar invalid <http://example.com/#top>`_
--- a/tests/test_build_linkcheck.py
+++ b/tests/test_build_linkcheck.py
@ -14,12 +14,25 @@ from util import with_app
@with_app('linkcheck', testroot='linkcheck', freshenv=True)
-def test_all(app, status, warning):
+def test_defaults(app, status, warning):
    app.builder.build_all()
    assert (app.outdir / 'output.txt').exists()
    content = (app.outdir / 'output.txt').text()
-    # expect all ok
+    print(content)
-    assert not content
+    # looking for #top should fail
    assert "Anchor 'top' not found" in content
    assert len(content.splitlines()) == 1
@with_app('linkcheck', testroot='linkcheck', freshenv=True,
          confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"]})
 def test_anchors_ignored(app, status, warning):
    app.builder.build_all()
    assert (app.outdir / 'output.txt').exists()
    content = (app.outdir / 'output.txt').text()
    # expect all ok when excluding #top
    assert not content