linkcheck: Exclude links from matched documents

This commit is contained in:
Christian Roth
2021-11-26 18:14:29 +01:00
committed by François Freitag
parent 471022b6df
commit 10023da895
8 changed files with 72 additions and 0 deletions

View File

@@ -23,6 +23,8 @@ Features added
layout via CSS
* #9899: py domain: Allows to specify cross-reference specifier (``.`` and
``~``) as ``:type:`` option
* #9894: linkcheck: add option ``linkcheck_exclude_documents`` to disable link
checking in matched documents.
Bugs fixed
----------

View File

@@ -2683,6 +2683,19 @@ Options for the linkcheck builder
.. versionadded:: 3.4
.. confval:: linkcheck_exclude_documents
A list of regular expressions that match documents in which Sphinx should
not check the validity of links. This can be used for permitting link decay
in legacy or historical sections of the documentation.
Example::
# ignore all links in documents located in a subfolder named 'legacy'
linkcheck_exclude_documents = [r'.*/legacy/.*']
.. versionadded:: 4.4
Options for the XML builder
---------------------------

View File

@@ -378,6 +378,8 @@ class HyperlinkAvailabilityCheckWorker(Thread):
self.anchors_ignore = [re.compile(x)
for x in self.config.linkcheck_anchors_ignore]
self.documents_exclude = [re.compile(doc)
for doc in self.config.linkcheck_exclude_documents]
self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
in self.config.linkcheck_auth]
@@ -519,6 +521,15 @@ class HyperlinkAvailabilityCheckWorker(Thread):
def check(docname: str) -> Tuple[str, str, int]:
# check for various conditions without bothering the network
for doc_matcher in self.documents_exclude:
if doc_matcher.match(docname):
info = (
f'{docname} matched {doc_matcher.pattern} from '
'linkcheck_exclude_documents'
)
return 'ignored', info, 0
if len(uri) == 0 or uri.startswith(('#', 'mailto:', 'tel:')):
return 'unchecked', '', 0
elif not uri.startswith(('http:', 'https:')):
@@ -699,6 +710,7 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_post_transform(HyperlinkCollector)
app.add_config_value('linkcheck_ignore', [], None)
app.add_config_value('linkcheck_exclude_documents', [], None)
app.add_config_value('linkcheck_allowed_redirects', {}, None)
app.add_config_value('linkcheck_auth', [], None)
app.add_config_value('linkcheck_request_headers', {}, None)

View File

@@ -0,0 +1,5 @@
Broken link
===========
Some links are `broken <https://www.sphinx-doc.org/this-is-another-broken-link>`__
but sometimes not worrying about some broken links is a valid strategy.

View File

@@ -0,0 +1,5 @@
Broken link
===========
Some links are `broken <https://www.sphinx-doc.org/this-is-a-broken-link>`__
but sometimes not worrying about some broken links is a valid strategy.

View File

@@ -0,0 +1,5 @@
exclude_patterns = ['_build']
linkcheck_exclude_documents = [
'^broken_link$',
'br[0-9]ken_link',
]

View File

@@ -0,0 +1,3 @@
.. toctree::
broken_link
br0ken_link

View File

@@ -625,3 +625,30 @@ def test_get_after_head_raises_connection_error(app):
"uri": "http://localhost:7777/",
"info": "",
}
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-documents_exclude', freshenv=True)
def test_linkcheck_exclude_documents(app):
app.build()
with open(app.outdir / 'output.json') as fp:
content = [json.loads(record) for record in fp]
assert content == [
{
'filename': 'broken_link.rst',
'lineno': 4,
'status': 'ignored',
'code': 0,
'uri': 'https://www.sphinx-doc.org/this-is-a-broken-link',
'info': 'broken_link matched ^broken_link$ from linkcheck_exclude_documents',
},
{
'filename': 'br0ken_link.rst',
'lineno': 4,
'status': 'ignored',
'code': 0,
'uri': 'https://www.sphinx-doc.org/this-is-another-broken-link',
'info': 'br0ken_link matched br[0-9]ken_link from linkcheck_exclude_documents',
},
]