mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
refactor: linkcheck: Separate thread manager feature from builder class
To reduce the complexity of the linkcheck builder, this separates the thread manager feature from the builder class as HyperlinkAvailabilityChecker.
This commit is contained in:
parent
30bc4d450a
commit
5c223d20d6
3
CHANGES
3
CHANGES
@ -23,7 +23,10 @@ Deprecated
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.broken``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.good``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.redirected``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.rqueue``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.workers``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.wqueue``
|
||||
* ``sphinx.builders.linkcheck.node_line_or_0()``
|
||||
* ``sphinx.ext.autodoc.AttributeDocumenter.isinstanceattribute()``
|
||||
* ``sphinx.ext.autodoc.directive.DocumenterBridge.reporter``
|
||||
|
@ -52,11 +52,26 @@ The following is a list of deprecated interfaces.
|
||||
- 5.0
|
||||
- N/A
|
||||
|
||||
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.rqueue``
|
||||
- 3.5
|
||||
- 5.0
|
||||
- N/A
|
||||
|
||||
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
|
||||
- 3.5
|
||||
- 5.0
|
||||
- N/A
|
||||
|
||||
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.workers``
|
||||
- 3.5
|
||||
- 5.0
|
||||
- N/A
|
||||
|
||||
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.wqueue``
|
||||
- 3.5
|
||||
- 5.0
|
||||
- N/A
|
||||
|
||||
* - ``sphinx.builders.linkcheck.node_line_or_0()``
|
||||
- 3.5
|
||||
- 5.0
|
||||
|
@ -19,7 +19,7 @@ from email.utils import parsedate_to_datetime
|
||||
from html.parser import HTMLParser
|
||||
from os import path
|
||||
from threading import Thread
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, Pattern, Set, Tuple, cast
|
||||
from typing import Any, Dict, Generator, List, NamedTuple, Optional, Pattern, Set, Tuple, cast
|
||||
from urllib.parse import unquote, urlparse
|
||||
|
||||
from docutils import nodes
|
||||
@ -121,17 +121,8 @@ class CheckExternalLinksBuilder(DummyBuilder):
|
||||
socket.setdefaulttimeout(5.0)
|
||||
|
||||
# create queues and worker threads
|
||||
self.rate_limits = {} # type: Dict[str, RateLimit]
|
||||
self.wqueue = queue.PriorityQueue() # type: queue.PriorityQueue
|
||||
self.rqueue = queue.Queue() # type: queue.Queue
|
||||
self.workers = [] # type: List[Thread]
|
||||
for i in range(self.config.linkcheck_workers):
|
||||
thread = HyperlinkAvailabilityCheckWorker(self)
|
||||
thread.start()
|
||||
self.workers.append(thread)
|
||||
|
||||
def is_ignored_uri(self, uri: str) -> bool:
|
||||
return any(pat.match(uri) for pat in self.to_ignore)
|
||||
self._wqueue = queue.PriorityQueue() # type: queue.PriorityQueue
|
||||
self._rqueue = queue.Queue() # type: queue.Queue
|
||||
|
||||
@property
|
||||
def anchors_ignore(self) -> List[Pattern]:
|
||||
@ -202,7 +193,31 @@ class CheckExternalLinksBuilder(DummyBuilder):
|
||||
RemovedInSphinx50Warning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return HyperlinkAvailabilityCheckWorker(self).limit_rate(response)
|
||||
return HyperlinkAvailabilityCheckWorker(self, None, None, {}).limit_rate(response)
|
||||
|
||||
def rqueue(self, response: Response) -> queue.Queue:
|
||||
warnings.warn(
|
||||
"%s.%s is deprecated." % (self.__class__.__name__, "rqueue"),
|
||||
RemovedInSphinx50Warning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self._rqueue
|
||||
|
||||
def workers(self, response: Response) -> List[Thread]:
|
||||
warnings.warn(
|
||||
"%s.%s is deprecated." % (self.__class__.__name__, "workers"),
|
||||
RemovedInSphinx50Warning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return []
|
||||
|
||||
def wqueue(self, response: Response) -> queue.Queue:
|
||||
warnings.warn(
|
||||
"%s.%s is deprecated." % (self.__class__.__name__, "wqueue"),
|
||||
RemovedInSphinx50Warning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self._wqueue
|
||||
|
||||
def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
|
||||
uri, docname, lineno, status, info, code = result
|
||||
@ -268,49 +283,84 @@ class CheckExternalLinksBuilder(DummyBuilder):
|
||||
self.json_outfile.write('\n')
|
||||
|
||||
def finish(self) -> None:
|
||||
checker = HyperlinkAvailabilityChecker(self)
|
||||
logger.info('')
|
||||
|
||||
with open(path.join(self.outdir, 'output.txt'), 'w') as self.txt_outfile,\
|
||||
open(path.join(self.outdir, 'output.json'), 'w') as self.json_outfile:
|
||||
for result in checker.check(self.hyperlinks):
|
||||
self.process_result(result)
|
||||
|
||||
if self._broken:
|
||||
self.app.statuscode = 1
|
||||
|
||||
|
||||
class HyperlinkAvailabilityChecker:
|
||||
def __init__(self, builder: CheckExternalLinksBuilder) -> None:
|
||||
self.builder = builder
|
||||
self.config = builder.config
|
||||
self.rate_limits = {} # type: Dict[str, RateLimit]
|
||||
self.workers = [] # type: List[Thread]
|
||||
|
||||
self.to_ignore = [re.compile(x) for x in self.config.linkcheck_ignore]
|
||||
|
||||
if builder:
|
||||
self.rqueue = builder._rqueue
|
||||
self.wqueue = builder._wqueue
|
||||
else:
|
||||
self.rqueue = queue.Queue()
|
||||
self.wqueue = queue.PriorityQueue()
|
||||
|
||||
def invoke_threads(self) -> None:
|
||||
for i in range(self.config.linkcheck_workers):
|
||||
thread = HyperlinkAvailabilityCheckWorker(self.builder, self.rqueue, self.wqueue,
|
||||
self.rate_limits)
|
||||
thread.start()
|
||||
self.workers.append(thread)
|
||||
|
||||
def shutdown_threads(self) -> None:
|
||||
self.wqueue.join()
|
||||
for worker in self.workers:
|
||||
self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
|
||||
|
||||
def check(self, hyperlinks: Dict[str, Hyperlink]) -> Generator[CheckResult, None, None]:
|
||||
self.invoke_threads()
|
||||
|
||||
total_links = 0
|
||||
for hyperlink in self.hyperlinks.values():
|
||||
for hyperlink in hyperlinks.values():
|
||||
if self.is_ignored_uri(hyperlink.uri):
|
||||
self.process_result(
|
||||
CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
|
||||
'ignored', '', 0))
|
||||
yield CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
|
||||
'ignored', '', 0)
|
||||
else:
|
||||
self.wqueue.put(hyperlink, False)
|
||||
total_links += 1
|
||||
|
||||
done = 0
|
||||
while done < total_links:
|
||||
self.process_result(self.rqueue.get())
|
||||
yield self.rqueue.get()
|
||||
done += 1
|
||||
|
||||
if self._broken:
|
||||
self.app.statuscode = 1
|
||||
self.shutdown_threads()
|
||||
|
||||
self.wqueue.join()
|
||||
# Shutdown threads.
|
||||
for worker in self.workers:
|
||||
self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
|
||||
def is_ignored_uri(self, uri: str) -> bool:
|
||||
return any(pat.match(uri) for pat in self.to_ignore)
|
||||
|
||||
|
||||
class HyperlinkAvailabilityCheckWorker(Thread):
|
||||
"""A worker class for checking the availability of hyperlinks."""
|
||||
|
||||
def __init__(self, builder: CheckExternalLinksBuilder) -> None:
|
||||
def __init__(self, builder: CheckExternalLinksBuilder, rqueue: queue.Queue,
|
||||
wqueue: queue.Queue, rate_limits: Dict[str, RateLimit]) -> None:
|
||||
self.config = builder.config
|
||||
self.env = builder.env
|
||||
self.rate_limits = builder.rate_limits
|
||||
self.rqueue = builder.rqueue
|
||||
self.wqueue = builder.wqueue
|
||||
self.rate_limits = rate_limits
|
||||
self.rqueue = rqueue
|
||||
self.wqueue = wqueue
|
||||
|
||||
self.anchors_ignore = [re.compile(x)
|
||||
for x in self.config.linkcheck_anchors_ignore]
|
||||
self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
|
||||
in self.config.linkcheck_auth]
|
||||
self.to_ignore = [re.compile(x) for x in self.config.linkcheck_ignore]
|
||||
|
||||
self._good = builder._good
|
||||
self._broken = builder._broken
|
||||
|
@ -21,7 +21,7 @@ from unittest import mock
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from sphinx.builders.linkcheck import (CheckExternalLinksBuilder,
|
||||
from sphinx.builders.linkcheck import (CheckExternalLinksBuilder, HyperlinkAvailabilityChecker,
|
||||
HyperlinkAvailabilityCheckWorker, RateLimit)
|
||||
from sphinx.util.console import strip_colors
|
||||
|
||||
@ -536,10 +536,12 @@ class FakeResponse:
|
||||
|
||||
|
||||
def test_limit_rate_default_sleep(app):
|
||||
checker = CheckExternalLinksBuilder(app)
|
||||
checker.init()
|
||||
builder = CheckExternalLinksBuilder(app)
|
||||
builder.init()
|
||||
checker = HyperlinkAvailabilityChecker(builder)
|
||||
checker.rate_limits = {}
|
||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
||||
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
|
||||
checker.rate_limits)
|
||||
with mock.patch('time.time', return_value=0.0):
|
||||
next_check = worker.limit_rate(FakeResponse())
|
||||
assert next_check == 60.0
|
||||
@ -547,40 +549,48 @@ def test_limit_rate_default_sleep(app):
|
||||
|
||||
def test_limit_rate_user_max_delay(app):
|
||||
app.config.linkcheck_rate_limit_timeout = 0.0
|
||||
checker = CheckExternalLinksBuilder(app)
|
||||
checker.init()
|
||||
builder = CheckExternalLinksBuilder(app)
|
||||
builder.init()
|
||||
checker = HyperlinkAvailabilityChecker(builder)
|
||||
checker.rate_limits = {}
|
||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
||||
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
|
||||
checker.rate_limits)
|
||||
next_check = worker.limit_rate(FakeResponse())
|
||||
assert next_check is None
|
||||
|
||||
|
||||
def test_limit_rate_doubles_previous_wait_time(app):
|
||||
checker = CheckExternalLinksBuilder(app)
|
||||
checker.init()
|
||||
builder = CheckExternalLinksBuilder(app)
|
||||
builder.init()
|
||||
checker = HyperlinkAvailabilityChecker(builder)
|
||||
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
|
||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
||||
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
|
||||
checker.rate_limits)
|
||||
with mock.patch('time.time', return_value=0.0):
|
||||
next_check = worker.limit_rate(FakeResponse())
|
||||
assert next_check == 120.0
|
||||
|
||||
|
||||
def test_limit_rate_clips_wait_time_to_max_time(app):
|
||||
checker = CheckExternalLinksBuilder(app)
|
||||
checker.init()
|
||||
app.config.linkcheck_rate_limit_timeout = 90.0
|
||||
builder = CheckExternalLinksBuilder(app)
|
||||
builder.init()
|
||||
checker = HyperlinkAvailabilityChecker(builder)
|
||||
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
|
||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
||||
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
|
||||
checker.rate_limits)
|
||||
with mock.patch('time.time', return_value=0.0):
|
||||
next_check = worker.limit_rate(FakeResponse())
|
||||
assert next_check == 90.0
|
||||
|
||||
|
||||
def test_limit_rate_bails_out_after_waiting_max_time(app):
|
||||
checker = CheckExternalLinksBuilder(app)
|
||||
checker.init()
|
||||
app.config.linkcheck_rate_limit_timeout = 90.0
|
||||
builder = CheckExternalLinksBuilder(app)
|
||||
builder.init()
|
||||
checker = HyperlinkAvailabilityChecker(builder)
|
||||
checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)}
|
||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
||||
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
|
||||
checker.rate_limits)
|
||||
next_check = worker.limit_rate(FakeResponse())
|
||||
assert next_check is None
|
||||
|
Loading…
Reference in New Issue
Block a user