linkcheck: Exclude links from matched documents

2025-02-25 18:55:22 -06:00 · 2021-11-26 18:14:29 +01:00
parent 471022b6df
commit 10023da895
8 changed files with 72 additions and 0 deletions
--- a/2
+++ b/2
@@ -23,6 +23,8 @@ Features added
  layout via CSS
 * #9899: py domain: Allows to specify cross-reference specifier (``.`` and
  ``~``) as ``:type:`` option
+* #9894: linkcheck: add option ``linkcheck_exclude_documents`` to disable link
+  checking in matched documents.

 Bugs fixed
 ----------
--- a/doc/usage/configuration.rst
+++ b/doc/usage/configuration.rst
@@ -2683,6 +2683,19 @@ Options for the linkcheck builder

   .. versionadded:: 3.4

+.. confval:: linkcheck_exclude_documents
+
+   A list of regular expressions that match documents in which Sphinx should
+   not check the validity of links. This can be used for permitting link decay
+   in legacy or historical sections of the documentation.
+
+   Example::
+
+      # ignore all links in documents located in a subfolder named 'legacy'
+      linkcheck_exclude_documents = [r'.*/legacy/.*']
+
+   .. versionadded:: 4.4
+

 Options for the XML builder
 ---------------------------
--- a/sphinx/builders/linkcheck.py
+++ b/sphinx/builders/linkcheck.py
@@ -378,6 +378,8 @@ class HyperlinkAvailabilityCheckWorker(Thread):

        self.anchors_ignore = [re.compile(x)
                               for x in self.config.linkcheck_anchors_ignore]
+        self.documents_exclude = [re.compile(doc)
+                                  for doc in self.config.linkcheck_exclude_documents]
        self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
                     in self.config.linkcheck_auth]

@@ -519,6 +521,15 @@ class HyperlinkAvailabilityCheckWorker(Thread):

        def check(docname: str) -> Tuple[str, str, int]:
            # check for various conditions without bothering the network
+
+            for doc_matcher in self.documents_exclude:
+                if doc_matcher.match(docname):
+                    info = (
+                        f'{docname} matched {doc_matcher.pattern} from '
+                        'linkcheck_exclude_documents'
+                    )
+                    return 'ignored', info, 0
+
            if len(uri) == 0 or uri.startswith(('#', 'mailto:', 'tel:')):
                return 'unchecked', '', 0
            elif not uri.startswith(('http:', 'https:')):
@@ -699,6 +710,7 @@ def setup(app: Sphinx) -> Dict[str, Any]:
    app.add_post_transform(HyperlinkCollector)

    app.add_config_value('linkcheck_ignore', [], None)
+    app.add_config_value('linkcheck_exclude_documents', [], None)
    app.add_config_value('linkcheck_allowed_redirects', {}, None)
    app.add_config_value('linkcheck_auth', [], None)
    app.add_config_value('linkcheck_request_headers', {}, None)
--- a/tests/roots/test-linkcheck-documents_exclude/br0ken_link.rst
+++ b/tests/roots/test-linkcheck-documents_exclude/br0ken_link.rst
@@ -0,0 +1,5 @@
+Broken link
+===========
+
+Some links are `broken <https://www.sphinx-doc.org/this-is-another-broken-link>`__
+but sometimes not worrying about some broken links is a valid strategy.
--- a/tests/roots/test-linkcheck-documents_exclude/broken_link.rst
+++ b/tests/roots/test-linkcheck-documents_exclude/broken_link.rst
@@ -0,0 +1,5 @@
+Broken link
+===========
+
+Some links are `broken <https://www.sphinx-doc.org/this-is-a-broken-link>`__
+but sometimes not worrying about some broken links is a valid strategy.
--- a/tests/roots/test-linkcheck-documents_exclude/conf.py
+++ b/tests/roots/test-linkcheck-documents_exclude/conf.py
@@ -0,0 +1,5 @@
+exclude_patterns = ['_build']
+linkcheck_exclude_documents = [
+    '^broken_link$',
+    'br[0-9]ken_link',
+]
--- a/tests/roots/test-linkcheck-documents_exclude/index.rst
+++ b/tests/roots/test-linkcheck-documents_exclude/index.rst
@@ -0,0 +1,3 @@
+.. toctree::
+  broken_link
+  br0ken_link
--- a/tests/test_build_linkcheck.py
+++ b/tests/test_build_linkcheck.py
@@ -625,3 +625,30 @@ def test_get_after_head_raises_connection_error(app):
        "uri": "http://localhost:7777/",
        "info": "",
    }
+
+
+@pytest.mark.sphinx('linkcheck', testroot='linkcheck-documents_exclude', freshenv=True)
+def test_linkcheck_exclude_documents(app):
+    app.build()
+
+    with open(app.outdir / 'output.json') as fp:
+        content = [json.loads(record) for record in fp]
+
+    assert content == [
+        {
+            'filename': 'broken_link.rst',
+            'lineno': 4,
+            'status': 'ignored',
+            'code': 0,
+            'uri': 'https://www.sphinx-doc.org/this-is-a-broken-link',
+            'info': 'broken_link matched ^broken_link$ from linkcheck_exclude_documents',
+        },
+        {
+            'filename': 'br0ken_link.rst',
+            'lineno': 4,
+            'status': 'ignored',
+            'code': 0,
+            'uri': 'https://www.sphinx-doc.org/this-is-another-broken-link',
+            'info': 'br0ken_link matched br[0-9]ken_link from linkcheck_exclude_documents',
+        },
+    ]