refactor: linkcheck: Separate thread manager feature from builder class

To reduce the complexity of the linkcheck builder, this separates
the thread manager feature from the builder class as
HyperlinkAvailabilityChecker.
This commit is contained in:
Takeshi KOMIYA 2021-02-04 02:06:38 +09:00
parent 30bc4d450a
commit 5c223d20d6
4 changed files with 127 additions and 49 deletions

View File

@ -23,7 +23,10 @@ Deprecated
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.broken``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.good``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.redirected``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.rqueue``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.workers``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.wqueue``
* ``sphinx.builders.linkcheck.node_line_or_0()``
* ``sphinx.ext.autodoc.AttributeDocumenter.isinstanceattribute()``
* ``sphinx.ext.autodoc.directive.DocumenterBridge.reporter``

View File

@ -52,11 +52,26 @@ The following is a list of deprecated interfaces.
- 5.0
- N/A
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.rqueue``
- 3.5
- 5.0
- N/A
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
- 3.5
- 5.0
- N/A
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.workers``
- 3.5
- 5.0
- N/A
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.wqueue``
- 3.5
- 5.0
- N/A
* - ``sphinx.builders.linkcheck.node_line_or_0()``
- 3.5
- 5.0

View File

@ -19,7 +19,7 @@ from email.utils import parsedate_to_datetime
from html.parser import HTMLParser
from os import path
from threading import Thread
from typing import Any, Dict, List, NamedTuple, Optional, Pattern, Set, Tuple, cast
from typing import Any, Dict, Generator, List, NamedTuple, Optional, Pattern, Set, Tuple, cast
from urllib.parse import unquote, urlparse
from docutils import nodes
@ -121,17 +121,8 @@ class CheckExternalLinksBuilder(DummyBuilder):
socket.setdefaulttimeout(5.0)
# create queues and worker threads
self.rate_limits = {} # type: Dict[str, RateLimit]
self.wqueue = queue.PriorityQueue() # type: queue.PriorityQueue
self.rqueue = queue.Queue() # type: queue.Queue
self.workers = [] # type: List[Thread]
for i in range(self.config.linkcheck_workers):
thread = HyperlinkAvailabilityCheckWorker(self)
thread.start()
self.workers.append(thread)
def is_ignored_uri(self, uri: str) -> bool:
return any(pat.match(uri) for pat in self.to_ignore)
self._wqueue = queue.PriorityQueue() # type: queue.PriorityQueue
self._rqueue = queue.Queue() # type: queue.Queue
@property
def anchors_ignore(self) -> List[Pattern]:
@ -202,7 +193,31 @@ class CheckExternalLinksBuilder(DummyBuilder):
RemovedInSphinx50Warning,
stacklevel=2,
)
return HyperlinkAvailabilityCheckWorker(self).limit_rate(response)
return HyperlinkAvailabilityCheckWorker(self, None, None, {}).limit_rate(response)
def rqueue(self, response: Response) -> queue.Queue:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "rqueue"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return self._rqueue
def workers(self, response: Response) -> List[Thread]:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "workers"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return []
def wqueue(self, response: Response) -> queue.Queue:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "wqueue"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return self._wqueue
def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
uri, docname, lineno, status, info, code = result
@ -268,49 +283,84 @@ class CheckExternalLinksBuilder(DummyBuilder):
self.json_outfile.write('\n')
def finish(self) -> None:
checker = HyperlinkAvailabilityChecker(self)
logger.info('')
with open(path.join(self.outdir, 'output.txt'), 'w') as self.txt_outfile,\
open(path.join(self.outdir, 'output.json'), 'w') as self.json_outfile:
for result in checker.check(self.hyperlinks):
self.process_result(result)
if self._broken:
self.app.statuscode = 1
class HyperlinkAvailabilityChecker:
def __init__(self, builder: CheckExternalLinksBuilder) -> None:
self.builder = builder
self.config = builder.config
self.rate_limits = {} # type: Dict[str, RateLimit]
self.workers = [] # type: List[Thread]
self.to_ignore = [re.compile(x) for x in self.config.linkcheck_ignore]
if builder:
self.rqueue = builder._rqueue
self.wqueue = builder._wqueue
else:
self.rqueue = queue.Queue()
self.wqueue = queue.PriorityQueue()
def invoke_threads(self) -> None:
for i in range(self.config.linkcheck_workers):
thread = HyperlinkAvailabilityCheckWorker(self.builder, self.rqueue, self.wqueue,
self.rate_limits)
thread.start()
self.workers.append(thread)
def shutdown_threads(self) -> None:
self.wqueue.join()
for worker in self.workers:
self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
def check(self, hyperlinks: Dict[str, Hyperlink]) -> Generator[CheckResult, None, None]:
self.invoke_threads()
total_links = 0
for hyperlink in self.hyperlinks.values():
for hyperlink in hyperlinks.values():
if self.is_ignored_uri(hyperlink.uri):
self.process_result(
CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
'ignored', '', 0))
yield CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
'ignored', '', 0)
else:
self.wqueue.put(hyperlink, False)
total_links += 1
done = 0
while done < total_links:
self.process_result(self.rqueue.get())
yield self.rqueue.get()
done += 1
if self._broken:
self.app.statuscode = 1
self.shutdown_threads()
self.wqueue.join()
# Shutdown threads.
for worker in self.workers:
self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
def is_ignored_uri(self, uri: str) -> bool:
return any(pat.match(uri) for pat in self.to_ignore)
class HyperlinkAvailabilityCheckWorker(Thread):
"""A worker class for checking the availability of hyperlinks."""
def __init__(self, builder: CheckExternalLinksBuilder) -> None:
def __init__(self, builder: CheckExternalLinksBuilder, rqueue: queue.Queue,
wqueue: queue.Queue, rate_limits: Dict[str, RateLimit]) -> None:
self.config = builder.config
self.env = builder.env
self.rate_limits = builder.rate_limits
self.rqueue = builder.rqueue
self.wqueue = builder.wqueue
self.rate_limits = rate_limits
self.rqueue = rqueue
self.wqueue = wqueue
self.anchors_ignore = [re.compile(x)
for x in self.config.linkcheck_anchors_ignore]
self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
in self.config.linkcheck_auth]
self.to_ignore = [re.compile(x) for x in self.config.linkcheck_ignore]
self._good = builder._good
self._broken = builder._broken

View File

@ -21,7 +21,7 @@ from unittest import mock
import pytest
import requests
from sphinx.builders.linkcheck import (CheckExternalLinksBuilder,
from sphinx.builders.linkcheck import (CheckExternalLinksBuilder, HyperlinkAvailabilityChecker,
HyperlinkAvailabilityCheckWorker, RateLimit)
from sphinx.util.console import strip_colors
@ -536,10 +536,12 @@ class FakeResponse:
def test_limit_rate_default_sleep(app):
checker = CheckExternalLinksBuilder(app)
checker.init()
builder = CheckExternalLinksBuilder(app)
builder.init()
checker = HyperlinkAvailabilityChecker(builder)
checker.rate_limits = {}
worker = HyperlinkAvailabilityCheckWorker(checker)
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
checker.rate_limits)
with mock.patch('time.time', return_value=0.0):
next_check = worker.limit_rate(FakeResponse())
assert next_check == 60.0
@ -547,40 +549,48 @@ def test_limit_rate_default_sleep(app):
def test_limit_rate_user_max_delay(app):
app.config.linkcheck_rate_limit_timeout = 0.0
checker = CheckExternalLinksBuilder(app)
checker.init()
builder = CheckExternalLinksBuilder(app)
builder.init()
checker = HyperlinkAvailabilityChecker(builder)
checker.rate_limits = {}
worker = HyperlinkAvailabilityCheckWorker(checker)
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
checker.rate_limits)
next_check = worker.limit_rate(FakeResponse())
assert next_check is None
def test_limit_rate_doubles_previous_wait_time(app):
checker = CheckExternalLinksBuilder(app)
checker.init()
builder = CheckExternalLinksBuilder(app)
builder.init()
checker = HyperlinkAvailabilityChecker(builder)
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
worker = HyperlinkAvailabilityCheckWorker(checker)
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
checker.rate_limits)
with mock.patch('time.time', return_value=0.0):
next_check = worker.limit_rate(FakeResponse())
assert next_check == 120.0
def test_limit_rate_clips_wait_time_to_max_time(app):
checker = CheckExternalLinksBuilder(app)
checker.init()
app.config.linkcheck_rate_limit_timeout = 90.0
builder = CheckExternalLinksBuilder(app)
builder.init()
checker = HyperlinkAvailabilityChecker(builder)
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
worker = HyperlinkAvailabilityCheckWorker(checker)
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
checker.rate_limits)
with mock.patch('time.time', return_value=0.0):
next_check = worker.limit_rate(FakeResponse())
assert next_check == 90.0
def test_limit_rate_bails_out_after_waiting_max_time(app):
checker = CheckExternalLinksBuilder(app)
checker.init()
app.config.linkcheck_rate_limit_timeout = 90.0
builder = CheckExternalLinksBuilder(app)
builder.init()
checker = HyperlinkAvailabilityChecker(builder)
checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)}
worker = HyperlinkAvailabilityCheckWorker(checker)
worker = HyperlinkAvailabilityCheckWorker(builder, checker.rqueue, checker.wqueue,
checker.rate_limits)
next_check = worker.limit_rate(FakeResponse())
assert next_check is None