mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Merge pull request #8702 from tk0miya/4304_linkcheck_same_url
linkcheck: Do not check the availability of the same URL repeatedly
This commit is contained in:
3
CHANGES
3
CHANGES
@@ -10,6 +10,7 @@ Incompatible changes
|
||||
Deprecated
|
||||
----------
|
||||
|
||||
* ``sphinx.builders.linkcheck.node_line_or_0()``
|
||||
* ``sphinx.ext.autodoc.AttributeDocumenter.isinstanceattribute()``
|
||||
* ``sphinx.ext.autodoc.directive.DocumenterBridge.reporter``
|
||||
* ``sphinx.ext.autodoc.importer.get_module_members()``
|
||||
@@ -60,6 +61,8 @@ Bugs fixed
|
||||
+ or ^) are used as keystrokes
|
||||
* #8629: html: A type warning for html_use_opensearch is shown twice
|
||||
* #8665: html theme: Could not override globaltoc_maxdepth in theme.conf
|
||||
* #4304: linkcheck: Fix race condition that could lead to checking the
|
||||
availability of the same URL twice
|
||||
* #8094: texinfo: image files on the different directory with document are not
|
||||
copied
|
||||
* #8671: :confval:`highlight_options` is not working
|
||||
|
@@ -26,6 +26,11 @@ The following is a list of deprecated interfaces.
|
||||
- (will be) Removed
|
||||
- Alternatives
|
||||
|
||||
* - ``sphinx.builders.linkcheck.node_line_or_0()``
|
||||
- 3.5
|
||||
- 5.0
|
||||
- ``sphinx.util.nodes.get_node_line()``
|
||||
|
||||
* - ``sphinx.ext.autodoc.AttributeDocumenter.isinstanceattribute()``
|
||||
- 3.5
|
||||
- 5.0
|
||||
|
@@ -14,11 +14,12 @@ import re
|
||||
import socket
|
||||
import threading
|
||||
import time
|
||||
import warnings
|
||||
from datetime import datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
from html.parser import HTMLParser
|
||||
from os import path
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, cast
|
||||
from urllib.parse import unquote, urlparse
|
||||
|
||||
from docutils import nodes
|
||||
@@ -28,7 +29,9 @@ from requests.exceptions import HTTPError, TooManyRedirects
|
||||
|
||||
from sphinx.application import Sphinx
|
||||
from sphinx.builders import Builder
|
||||
from sphinx.deprecation import RemovedInSphinx40Warning
|
||||
from sphinx.locale import __
|
||||
from sphinx.transforms.post_transforms import SphinxPostTransform
|
||||
from sphinx.util import encode_uri, logging, requests
|
||||
from sphinx.util.console import darkgray, darkgreen, purple, red, turquoise # type: ignore
|
||||
from sphinx.util.nodes import get_node_line
|
||||
@@ -37,6 +40,10 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
uri_re = re.compile('([a-z]+:)?//') # matches to foo:// and // (a protocol relative URL)
|
||||
|
||||
Hyperlink = NamedTuple('Hyperlink', (('next_check', float),
|
||||
('uri', Optional[str]),
|
||||
('docname', Optional[str]),
|
||||
('lineno', Optional[int])))
|
||||
RateLimit = NamedTuple('RateLimit', (('delay', float), ('next_check', float)))
|
||||
|
||||
DEFAULT_REQUEST_HEADERS = {
|
||||
@@ -52,6 +59,8 @@ def node_line_or_0(node: Element) -> int:
|
||||
PriorityQueue items must be comparable. The line number is part of the
|
||||
tuple used by the PriorityQueue, keep an homogeneous type for comparison.
|
||||
"""
|
||||
warnings.warn('node_line_or_0() is deprecated.',
|
||||
RemovedInSphinx40Warning, stacklevel=2)
|
||||
return get_node_line(node) or 0
|
||||
|
||||
|
||||
@@ -98,6 +107,7 @@ class CheckExternalLinksBuilder(Builder):
|
||||
'%(outdir)s/output.txt')
|
||||
|
||||
def init(self) -> None:
|
||||
self.hyperlinks = {} # type: Dict[str, Hyperlink]
|
||||
self.to_ignore = [re.compile(x) for x in self.app.config.linkcheck_ignore]
|
||||
self.anchors_ignore = [re.compile(x)
|
||||
for x in self.app.config.linkcheck_anchors_ignore]
|
||||
@@ -406,35 +416,7 @@ class CheckExternalLinksBuilder(Builder):
|
||||
return
|
||||
|
||||
def write_doc(self, docname: str, doctree: Node) -> None:
|
||||
logger.info('')
|
||||
n = 0
|
||||
|
||||
# reference nodes
|
||||
for refnode in doctree.traverse(nodes.reference):
|
||||
if 'refuri' not in refnode:
|
||||
continue
|
||||
uri = refnode['refuri']
|
||||
lineno = node_line_or_0(refnode)
|
||||
uri_info = (CHECK_IMMEDIATELY, uri, docname, lineno)
|
||||
self.wqueue.put(uri_info, False)
|
||||
n += 1
|
||||
|
||||
# image nodes
|
||||
for imgnode in doctree.traverse(nodes.image):
|
||||
uri = imgnode['candidates'].get('?')
|
||||
if uri and '://' in uri:
|
||||
lineno = node_line_or_0(imgnode)
|
||||
uri_info = (CHECK_IMMEDIATELY, uri, docname, lineno)
|
||||
self.wqueue.put(uri_info, False)
|
||||
n += 1
|
||||
|
||||
done = 0
|
||||
while done < n:
|
||||
self.process_result(self.rqueue.get())
|
||||
done += 1
|
||||
|
||||
if self.broken:
|
||||
self.app.statuscode = 1
|
||||
pass
|
||||
|
||||
def write_entry(self, what: str, docname: str, filename: str, line: int,
|
||||
uri: str) -> None:
|
||||
@@ -447,14 +429,58 @@ class CheckExternalLinksBuilder(Builder):
|
||||
output.write('\n')
|
||||
|
||||
def finish(self) -> None:
|
||||
logger.info('')
|
||||
n = 0
|
||||
|
||||
for hyperlink in self.hyperlinks.values():
|
||||
self.wqueue.put(hyperlink, False)
|
||||
n += 1
|
||||
|
||||
done = 0
|
||||
while done < n:
|
||||
self.process_result(self.rqueue.get())
|
||||
done += 1
|
||||
|
||||
if self.broken:
|
||||
self.app.statuscode = 1
|
||||
|
||||
self.wqueue.join()
|
||||
# Shutdown threads.
|
||||
for worker in self.workers:
|
||||
self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
|
||||
|
||||
|
||||
class HyperlinkCollector(SphinxPostTransform):
|
||||
builders = ('linkcheck',)
|
||||
default_priority = 800
|
||||
|
||||
def run(self, **kwargs: Any) -> None:
|
||||
builder = cast(CheckExternalLinksBuilder, self.app.builder)
|
||||
hyperlinks = builder.hyperlinks
|
||||
|
||||
# reference nodes
|
||||
for refnode in self.document.traverse(nodes.reference):
|
||||
if 'refuri' not in refnode:
|
||||
continue
|
||||
uri = refnode['refuri']
|
||||
lineno = get_node_line(refnode)
|
||||
uri_info = Hyperlink(CHECK_IMMEDIATELY, uri, self.env.docname, lineno)
|
||||
if uri not in hyperlinks:
|
||||
hyperlinks[uri] = uri_info
|
||||
|
||||
# image nodes
|
||||
for imgnode in self.document.traverse(nodes.image):
|
||||
uri = imgnode['candidates'].get('?')
|
||||
if uri and '://' in uri:
|
||||
lineno = get_node_line(imgnode)
|
||||
uri_info = Hyperlink(CHECK_IMMEDIATELY, uri, self.env.docname, lineno)
|
||||
if uri not in hyperlinks:
|
||||
hyperlinks[uri] = uri_info
|
||||
|
||||
|
||||
def setup(app: Sphinx) -> Dict[str, Any]:
|
||||
app.add_builder(CheckExternalLinksBuilder)
|
||||
app.add_post_transform(HyperlinkCollector)
|
||||
|
||||
app.add_config_value('linkcheck_ignore', [], None)
|
||||
app.add_config_value('linkcheck_auth', [], None)
|
||||
|
@@ -1 +0,0 @@
|
||||
exclude_patterns = ['_build']
|
@@ -1,6 +0,0 @@
|
||||
.. image:: http://localhost:7777/
|
||||
:target: http://localhost:7777/
|
||||
|
||||
`weblate.org`_
|
||||
|
||||
.. _weblate.org: http://localhost:7777/
|
@@ -573,40 +573,3 @@ def test_limit_rate_bails_out_after_waiting_max_time(app):
|
||||
checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)}
|
||||
next_check = checker.limit_rate(FakeResponse())
|
||||
assert next_check is None
|
||||
|
||||
|
||||
@pytest.mark.sphinx(
|
||||
'linkcheck', testroot='linkcheck-localserver-two-links', freshenv=True,
|
||||
)
|
||||
def test_priorityqueue_items_are_comparable(app):
|
||||
with http_server(OKHandler):
|
||||
app.builder.build_all()
|
||||
content = (app.outdir / 'output.json').read_text()
|
||||
rows = [json.loads(x) for x in sorted(content.splitlines())]
|
||||
assert rows == [
|
||||
{
|
||||
'filename': 'index.rst',
|
||||
# Should not be None.
|
||||
'lineno': 0,
|
||||
'status': 'working',
|
||||
'code': 0,
|
||||
'uri': 'http://localhost:7777/',
|
||||
'info': '',
|
||||
},
|
||||
{
|
||||
'filename': 'index.rst',
|
||||
'lineno': 0,
|
||||
'status': 'working',
|
||||
'code': 0,
|
||||
'uri': 'http://localhost:7777/',
|
||||
'info': '',
|
||||
},
|
||||
{
|
||||
'filename': 'index.rst',
|
||||
'lineno': 4,
|
||||
'status': 'working',
|
||||
'code': 0,
|
||||
'uri': 'http://localhost:7777/',
|
||||
'info': '',
|
||||
}
|
||||
]
|
||||
|
Reference in New Issue
Block a user