mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
refactor: linkcheck: Separate thread manager feature from builder class
To reduce the complexity of the linkcheck builder, this separates the thread manager feature from the builder class as HyperlinkAvailabilityChecker.
This commit is contained in:
parent
30bc4d450a
commit
5c223d20d6
3
CHANGES
3
CHANGES
@ -23,7 +23,10 @@ Deprecated
|
|||||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.broken``
|
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.broken``
|
||||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.good``
|
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.good``
|
||||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.redirected``
|
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.redirected``
|
||||||
|
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.rqueue``
|
||||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
|
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
|
||||||
|
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.workers``
|
||||||
|
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.wqueue``
|
||||||
* ``sphinx.builders.linkcheck.node_line_or_0()``
|
* ``sphinx.builders.linkcheck.node_line_or_0()``
|
||||||
* ``sphinx.ext.autodoc.AttributeDocumenter.isinstanceattribute()``
|
* ``sphinx.ext.autodoc.AttributeDocumenter.isinstanceattribute()``
|
||||||
* ``sphinx.ext.autodoc.directive.DocumenterBridge.reporter``
|
* ``sphinx.ext.autodoc.directive.DocumenterBridge.reporter``
|
||||||
|
@ -52,11 +52,26 @@ The following is a list of deprecated interfaces.
|
|||||||
- 5.0
|
- 5.0
|
||||||
- N/A
|
- N/A
|
||||||
|
|
||||||
|
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.rqueue``
|
||||||
|
- 3.5
|
||||||
|
- 5.0
|
||||||
|
- N/A
|
||||||
|
|
||||||
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
|
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
|
||||||
- 3.5
|
- 3.5
|
||||||
- 5.0
|
- 5.0
|
||||||
- N/A
|
- N/A
|
||||||
|
|
||||||
|
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.workers``
|
||||||
|
- 3.5
|
||||||
|
- 5.0
|
||||||
|
- N/A
|
||||||
|
|
||||||
|
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.wqueue``
|
||||||
|
- 3.5
|
||||||
|
- 5.0
|
||||||
|
- N/A
|
||||||
|
|
||||||
* - ``sphinx.builders.linkcheck.node_line_or_0()``
|
* - ``sphinx.builders.linkcheck.node_line_or_0()``
|
||||||
- 3.5
|
- 3.5
|
||||||
- 5.0
|
- 5.0
|
||||||
|
@ -19,7 +19,7 @@ from email.utils import parsedate_to_datetime
|
|||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from os import path
|
from os import path
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from typing import Any, Dict, List, NamedTuple, Optional, Pattern, Set, Tuple, cast
|
from typing import Any, Dict, Generator, List, NamedTuple, Optional, Pattern, Set, Tuple, cast
|
||||||
from urllib.parse import unquote, urlparse
|
from urllib.parse import unquote, urlparse
|
||||||
|
|
||||||
from docutils import nodes
|
from docutils import nodes
|
||||||
@ -121,17 +121,8 @@ class CheckExternalLinksBuilder(DummyBuilder):
|
|||||||
socket.setdefaulttimeout(5.0)
|
socket.setdefaulttimeout(5.0)
|
||||||
|
|
||||||
# create queues and worker threads
|
# create queues and worker threads
|
||||||
self.rate_limits = {} # type: Dict[str, RateLimit]
|
self._wqueue = queue.PriorityQueue() # type: queue.PriorityQueue
|
||||||
self.wqueue = queue.PriorityQueue() # type: queue.PriorityQueue
|
self._rqueue = queue.Queue() # type: queue.Queue
|
||||||
self.rqueue = queue.Queue() # type: queue.Queue
|
|
||||||
self.workers = [] # type: List[Thread]
|
|
||||||
for i in range(self.config.linkcheck_workers):
|
|
||||||
thread = HyperlinkAvailabilityCheckWorker(self)
|
|
||||||
thread.start()
|
|
||||||
self.workers.append(thread)
|
|
||||||
|
|
||||||
def is_ignored_uri(self, uri: str) -> bool:
|
|
||||||
return any(pat.match(uri) for pat in self.to_ignore)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def anchors_ignore(self) -> List[Pattern]:
|
def anchors_ignore(self) -> List[Pattern]:
|
||||||
@ -202,7 +193,31 @@ class CheckExternalLinksBuilder(DummyBuilder):
|
|||||||
RemovedInSphinx50Warning,
|
RemovedInSphinx50Warning,
|
||||||
stacklevel=2,
|
stacklevel=2,
|
||||||
)
|
)
|
||||||
return HyperlinkAvailabilityCheckWorker(self).limit_rate(response)
|
return HyperlinkAvailabilityCheckWorker(self, None, None, {}).limit_rate(response)
|
||||||
|
|
||||||
|
def rqueue(self, response: Response) -> queue.Queue:
|
||||||
|
warnings.warn(
|
||||||
|
"%s.%s is deprecated." % (self.__class__.__name__, "rqueue"),
|
||||||
|
RemovedInSphinx50Warning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
return self._rqueue
|
||||||
|
|
||||||
|
def workers(self, response: Response) -> List[Thread]:
|
||||||
|
warnings.warn(
|
||||||
|
"%s.%s is deprecated." % (self.__class__.__name__, "workers"),
|
||||||
|
RemovedInSphinx50Warning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
def wqueue(self, response: Response) -> queue.Queue:
|
||||||
|
warnings.warn(
|
||||||
|
"%s.%s is deprecated." % (self.__class__.__name__, "wqueue"),
|
||||||
|
RemovedInSphinx50Warning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
return self._wqueue
|
||||||
|
|
||||||
def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
|
def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
|
||||||
uri, docname, lineno, status, info, code = result
|
uri, docname, lineno, status, info, code = result
|
||||||
@ -268,49 +283,84 @@ class CheckExternalLinksBuilder(DummyBuilder):
|
|||||||
self.json_outfile.write('\n')
|
self.json_outfile.write('\n')
|
||||||
|
|
||||||
def finish(self) -> None:
|
def finish(self) -> None:
|
||||||
|
checker = HyperlinkAvailabilityChecker(self)
|
||||||
logger.info('')
|
logger.info('')
|
||||||
|
|
||||||
with open(path.join(self.outdir, 'output.txt'), 'w') as self.txt_outfile,\
|
with open(path.join(self.outdir, 'output.txt'), 'w') as self.txt_outfile,\
|
||||||
open(path.join(self.outdir, 'output.json'), 'w') as self.json_outfile:
|
open(path.join(self.outdir, 'output.json'), 'w') as self.json_outfile:
|
||||||
|
for result in checker.check(self.hyperlinks):
|
||||||
|
self.process_result(result)
|
||||||
|
|
||||||
|
if self._broken:
|
||||||
|
self.app.statuscode = 1
|
||||||
|
|
||||||
|
|
||||||
|
class HyperlinkAvailabilityChecker:
|
||||||
|
def __init__(self, builder: CheckExternalLinksBuilder) -> None:
|
||||||
|
self.builder = builder
|
||||||
|
self.config = builder.config
|
||||||
|
self.rate_limits = {} # type: Dict[str, RateLimit]
|
||||||
|
self.workers = [] # type: List[Thread]
|
||||||
|
|
||||||
|
self.to_ignore = [re.compile(x) for x in self.config.linkcheck_ignore]
|
||||||
|
|
||||||
|
if builder:
|
||||||
|
self.rqueue = builder._rqueue
|
||||||
|
self.wqueue = builder._wqueue
|
||||||
|
else:
|
||||||
|
self.rqueue = queue.Queue()
|
||||||
|
self.wqueue = queue.PriorityQueue()
|
||||||
|
|
||||||
|
def invoke_threads(self) -> None:
|
||||||
|
for i in range(self.config.linkcheck_workers):
|
||||||
|
thread = HyperlinkAvailabilityCheckWorker(self.builder, self.rqueue, self.wqueue,
|
||||||
|
self.rate_limits)
|
||||||
|
thread.start()
|
||||||
|
self.workers.append(thread)
|
||||||
|
|
||||||
|
def shutdown_threads(self) -> None:
|
||||||
|
self.wqueue.join()
|
||||||
|
for worker in self.workers:
|
||||||
|
self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
|
||||||
|
|
||||||
|
def check(self, hyperlinks: Dict[str, Hyperlink]) -> Generator[CheckResult, None, None]:
|
||||||
|
self.invoke_threads()
|
||||||
|
|
||||||
total_links = 0
|
total_links = 0
|
||||||
for hyperlink in self.hyperlinks.values():
|
for hyperlink in hyperlinks.values():
|
||||||
if self.is_ignored_uri(hyperlink.uri):
|
if self.is_ignored_uri(hyperlink.uri):
|
||||||
self.process_result(
|
yield CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
|
||||||
CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
|
'ignored', '', 0)
|
||||||
'ignored', '', 0))
|
|
||||||
else:
|
else:
|
||||||
self.wqueue.put(hyperlink, False)
|
self.wqueue.put(hyperlink, False)
|
||||||
total_links += 1
|
total_links += 1
|
||||||
|
|
||||||
done = 0
|
done = 0
|
||||||
while done < total_links:
|
while done < total_links:
|
||||||
self.process_result(self.rqueue.get())
|
yield self.rqueue.get()
|
||||||
done += 1
|
done += 1
|
||||||
|
|
||||||
if self._broken:
|
self.shutdown_threads()
|
||||||
self.app.statuscode = 1
|
|
||||||
|
|
||||||
self.wqueue.join()
|
def is_ignored_uri(self, uri: str) -> bool:
|
||||||
# Shutdown threads.
|
return any(pat.match(uri) for pat in self.to_ignore)
|
||||||
for worker in self.workers:
|
|
||||||
self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
|
|
||||||
|
|
||||||
|
|
||||||
class HyperlinkAvailabilityCheckWorker(Thread):
|
class HyperlinkAvailabilityCheckWorker(Thread):
|
||||||
"""A worker class for checking the availability of hyperlinks."""
|
"""A worker class for checking the availability of hyperlinks."""
|
||||||
|
|
||||||
def __init__(self, builder: CheckExternalLinksBuilder) -> None:
|
def __init__(self, builder: CheckExternalLinksBuilder, rqueue: queue.Queue,
|
||||||
|
wqueue: queue.Queue, rate_limits: Dict[str, RateLimit]) -> None:
|
||||||
self.config = builder.config
|
self.config = builder.config
|
||||||
self.env = builder.env
|
self.env = builder.env
|
||||||
self.rate_limits = builder.rate_limits
|
self.rate_limits = rate_limits
|
||||||
self.rqueue = builder.rqueue
|
self.rqueue = rqueue
|
||||||
self.wqueue = builder.wqueue
|
self.wqueue = wqueue
|
||||||
|
|
||||||
self.anchors_ignore = [re.compile(x)
|
self.anchors_ignore = [re.compile(x)
|
||||||
for x in self.config.linkcheck_anchors_ignore]
|
for x in self.config.linkcheck_anchors_ignore]
|
||||||
self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
|
self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
|
||||||
in self.config.linkcheck_auth]
|
in self.config.linkcheck_auth]
|
||||||
self.to_ignore = [re.compile(x) for x in self.config.linkcheck_ignore]
|
|
||||||
|
|
||||||
self._good = builder._good
|
self._good = builder._good
|
||||||
self._broken = builder._broken
|
self._broken = builder._broken
|
||||||
|
@ -21,7 +21,7 @@ from unittest import mock
|
|||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from sphinx.builders.linkcheck import (CheckExternalLinksBuilder,
|
from sphinx.builders.linkcheck import (CheckExternalLinksBuilder, HyperlinkAvailabilityChecker,
|
||||||
HyperlinkAvailabilityCheckWorker, RateLimit)
|
HyperlinkAvailabilityCheckWorker, RateLimit)
|
||||||
from sphinx.util.console import strip_colors
|
from sphinx.util.console import strip_colors
|
||||||
|
|
||||||
@ -536,10 +536,12 @@ class FakeResponse:
|
|||||||
|
|
||||||
|
|
||||||
def test_limit_rate_default_sleep(app):
|
def test_limit_rate_default_sleep(app):
|
||||||
checker = CheckExternalLinksBuilder(app)
|
builder = CheckExternalLinksBuilder(app)
|
||||||
checker.init()
|
builder.init()
|
||||||
|
checker = HyperlinkAvailabilityChecker(builder)
|
||||||
checker.rate_limits = {}
|
checker.rate_limits = {}
|
||||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
|
||||||
|
checker.rate_limits)
|
||||||
with mock.patch('time.time', return_value=0.0):
|
with mock.patch('time.time', return_value=0.0):
|
||||||
next_check = worker.limit_rate(FakeResponse())
|
next_check = worker.limit_rate(FakeResponse())
|
||||||
assert next_check == 60.0
|
assert next_check == 60.0
|
||||||
@ -547,40 +549,48 @@ def test_limit_rate_default_sleep(app):
|
|||||||
|
|
||||||
def test_limit_rate_user_max_delay(app):
|
def test_limit_rate_user_max_delay(app):
|
||||||
app.config.linkcheck_rate_limit_timeout = 0.0
|
app.config.linkcheck_rate_limit_timeout = 0.0
|
||||||
checker = CheckExternalLinksBuilder(app)
|
builder = CheckExternalLinksBuilder(app)
|
||||||
checker.init()
|
builder.init()
|
||||||
|
checker = HyperlinkAvailabilityChecker(builder)
|
||||||
checker.rate_limits = {}
|
checker.rate_limits = {}
|
||||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
|
||||||
|
checker.rate_limits)
|
||||||
next_check = worker.limit_rate(FakeResponse())
|
next_check = worker.limit_rate(FakeResponse())
|
||||||
assert next_check is None
|
assert next_check is None
|
||||||
|
|
||||||
|
|
||||||
def test_limit_rate_doubles_previous_wait_time(app):
|
def test_limit_rate_doubles_previous_wait_time(app):
|
||||||
checker = CheckExternalLinksBuilder(app)
|
builder = CheckExternalLinksBuilder(app)
|
||||||
checker.init()
|
builder.init()
|
||||||
|
checker = HyperlinkAvailabilityChecker(builder)
|
||||||
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
|
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
|
||||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
|
||||||
|
checker.rate_limits)
|
||||||
with mock.patch('time.time', return_value=0.0):
|
with mock.patch('time.time', return_value=0.0):
|
||||||
next_check = worker.limit_rate(FakeResponse())
|
next_check = worker.limit_rate(FakeResponse())
|
||||||
assert next_check == 120.0
|
assert next_check == 120.0
|
||||||
|
|
||||||
|
|
||||||
def test_limit_rate_clips_wait_time_to_max_time(app):
|
def test_limit_rate_clips_wait_time_to_max_time(app):
|
||||||
checker = CheckExternalLinksBuilder(app)
|
|
||||||
checker.init()
|
|
||||||
app.config.linkcheck_rate_limit_timeout = 90.0
|
app.config.linkcheck_rate_limit_timeout = 90.0
|
||||||
|
builder = CheckExternalLinksBuilder(app)
|
||||||
|
builder.init()
|
||||||
|
checker = HyperlinkAvailabilityChecker(builder)
|
||||||
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
|
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
|
||||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
|
||||||
|
checker.rate_limits)
|
||||||
with mock.patch('time.time', return_value=0.0):
|
with mock.patch('time.time', return_value=0.0):
|
||||||
next_check = worker.limit_rate(FakeResponse())
|
next_check = worker.limit_rate(FakeResponse())
|
||||||
assert next_check == 90.0
|
assert next_check == 90.0
|
||||||
|
|
||||||
|
|
||||||
def test_limit_rate_bails_out_after_waiting_max_time(app):
|
def test_limit_rate_bails_out_after_waiting_max_time(app):
|
||||||
checker = CheckExternalLinksBuilder(app)
|
|
||||||
checker.init()
|
|
||||||
app.config.linkcheck_rate_limit_timeout = 90.0
|
app.config.linkcheck_rate_limit_timeout = 90.0
|
||||||
|
builder = CheckExternalLinksBuilder(app)
|
||||||
|
builder.init()
|
||||||
|
checker = HyperlinkAvailabilityChecker(builder)
|
||||||
checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)}
|
checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)}
|
||||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
|
||||||
|
checker.rate_limits)
|
||||||
next_check = worker.limit_rate(FakeResponse())
|
next_check = worker.limit_rate(FakeResponse())
|
||||||
assert next_check is None
|
assert next_check is None
|
||||||
|
Loading…
Reference in New Issue
Block a user