Merge pull request #8835 from tk0miya/refactor_linkcheck7

refactor: linkcheck: Separate thread manager feature from builder class
This commit is contained in:
Takeshi KOMIYA 2021-02-12 23:32:30 +09:00 committed by GitHub
commit 3b451bbcde
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 140 additions and 60 deletions

View File

@ -23,7 +23,10 @@ Deprecated
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.broken``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.good``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.redirected``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.rqueue``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.workers``
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.wqueue``
* ``sphinx.builders.linkcheck.node_line_or_0()``
* ``sphinx.ext.autodoc.AttributeDocumenter.isinstanceattribute()``
* ``sphinx.ext.autodoc.directive.DocumenterBridge.reporter``

View File

@ -52,11 +52,26 @@ The following is a list of deprecated interfaces.
- 5.0
- N/A
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.rqueue``
- 3.5
- 5.0
- N/A
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
- 3.5
- 5.0
- N/A
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.workers``
- 3.5
- 5.0
- N/A
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.wqueue``
- 3.5
- 5.0
- N/A
* - ``sphinx.builders.linkcheck.node_line_or_0()``
- 3.5
- 5.0

View File

@ -19,7 +19,7 @@ from email.utils import parsedate_to_datetime
from html.parser import HTMLParser
from os import path
from threading import Thread
from typing import Any, Dict, List, NamedTuple, Optional, Pattern, Set, Tuple, cast
from typing import Any, Dict, Generator, List, NamedTuple, Optional, Pattern, Set, Tuple, cast
from urllib.parse import unquote, urlparse
from docutils import nodes
@ -29,7 +29,9 @@ from requests.exceptions import HTTPError, TooManyRedirects
from sphinx.application import Sphinx
from sphinx.builders.dummy import DummyBuilder
from sphinx.config import Config
from sphinx.deprecation import RemovedInSphinx50Warning
from sphinx.environment import BuildEnvironment
from sphinx.locale import __
from sphinx.transforms.post_transforms import SphinxPostTransform
from sphinx.util import encode_uri, logging, requests
@ -121,17 +123,8 @@ class CheckExternalLinksBuilder(DummyBuilder):
socket.setdefaulttimeout(5.0)
# create queues and worker threads
self.rate_limits = {} # type: Dict[str, RateLimit]
self.wqueue = queue.PriorityQueue() # type: queue.PriorityQueue
self.rqueue = queue.Queue() # type: queue.Queue
self.workers = [] # type: List[Thread]
for i in range(self.config.linkcheck_workers):
thread = HyperlinkAvailabilityCheckWorker(self)
thread.start()
self.workers.append(thread)
def is_ignored_uri(self, uri: str) -> bool:
return any(pat.match(uri) for pat in self.to_ignore)
self._wqueue = queue.PriorityQueue() # type: queue.PriorityQueue
self._rqueue = queue.Queue() # type: queue.Queue
@property
def anchors_ignore(self) -> List[Pattern]:
@ -202,7 +195,33 @@ class CheckExternalLinksBuilder(DummyBuilder):
RemovedInSphinx50Warning,
stacklevel=2,
)
return HyperlinkAvailabilityCheckWorker(self).limit_rate(response)
worker = HyperlinkAvailabilityCheckWorker(self.env, self.config,
None, None, {})
return worker.limit_rate(response)
def rqueue(self, response: Response) -> queue.Queue:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "rqueue"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return self._rqueue
def workers(self, response: Response) -> List[Thread]:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "workers"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return []
def wqueue(self, response: Response) -> queue.Queue:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "wqueue"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return self._wqueue
def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
uri, docname, lineno, status, info, code = result
@ -268,53 +287,105 @@ class CheckExternalLinksBuilder(DummyBuilder):
self.json_outfile.write('\n')
def finish(self) -> None:
checker = HyperlinkAvailabilityChecker(self.env, self.config, self)
logger.info('')
with open(path.join(self.outdir, 'output.txt'), 'w') as self.txt_outfile,\
open(path.join(self.outdir, 'output.json'), 'w') as self.json_outfile:
total_links = 0
for hyperlink in self.hyperlinks.values():
if self.is_ignored_uri(hyperlink.uri):
self.process_result(
CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
'ignored', '', 0))
else:
self.wqueue.put(hyperlink, False)
total_links += 1
done = 0
while done < total_links:
self.process_result(self.rqueue.get())
done += 1
for result in checker.check(self.hyperlinks):
self.process_result(result)
if self._broken:
self.app.statuscode = 1
class HyperlinkAvailabilityChecker:
def __init__(self, env: BuildEnvironment, config: Config,
builder: CheckExternalLinksBuilder = None) -> None:
# Warning: builder argument will be removed in the sphinx-5.0.
# Don't use it from extensions.
# tag: RemovedInSphinx50Warning
self.builder = builder
self.config = config
self.env = env
self.rate_limits = {} # type: Dict[str, RateLimit]
self.workers = [] # type: List[Thread]
self.to_ignore = [re.compile(x) for x in self.config.linkcheck_ignore]
if builder:
self.rqueue = builder._rqueue
self.wqueue = builder._wqueue
else:
self.rqueue = queue.Queue()
self.wqueue = queue.PriorityQueue()
def invoke_threads(self) -> None:
for i in range(self.config.linkcheck_workers):
thread = HyperlinkAvailabilityCheckWorker(self.env, self.config,
self.rqueue, self.wqueue,
self.rate_limits, self.builder)
thread.start()
self.workers.append(thread)
def shutdown_threads(self) -> None:
self.wqueue.join()
# Shutdown threads.
for worker in self.workers:
self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
def check(self, hyperlinks: Dict[str, Hyperlink]) -> Generator[CheckResult, None, None]:
self.invoke_threads()
total_links = 0
for hyperlink in hyperlinks.values():
if self.is_ignored_uri(hyperlink.uri):
yield CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
'ignored', '', 0)
else:
self.wqueue.put(hyperlink, False)
total_links += 1
done = 0
while done < total_links:
yield self.rqueue.get()
done += 1
self.shutdown_threads()
def is_ignored_uri(self, uri: str) -> bool:
return any(pat.match(uri) for pat in self.to_ignore)
class HyperlinkAvailabilityCheckWorker(Thread):
"""A worker class for checking the availability of hyperlinks."""
def __init__(self, builder: CheckExternalLinksBuilder) -> None:
self.config = builder.config
self.env = builder.env
self.rate_limits = builder.rate_limits
self.rqueue = builder.rqueue
self.wqueue = builder.wqueue
def __init__(self, env: BuildEnvironment, config: Config, rqueue: queue.Queue,
wqueue: queue.Queue, rate_limits: Dict[str, RateLimit],
builder: CheckExternalLinksBuilder = None) -> None:
# Warning: builder argument will be removed in the sphinx-5.0.
# Don't use it from extensions.
# tag: RemovedInSphinx50Warning
self.config = config
self.env = env
self.rate_limits = rate_limits
self.rqueue = rqueue
self.wqueue = wqueue
self.anchors_ignore = [re.compile(x)
for x in self.config.linkcheck_anchors_ignore]
self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
in self.config.linkcheck_auth]
self.to_ignore = [re.compile(x) for x in self.config.linkcheck_ignore]
self._good = builder._good
self._broken = builder._broken
self._redirected = builder._redirected
if builder:
# if given, fill the result of checks as cache
self._good = builder._good
self._broken = builder._broken
self._redirected = builder._redirected
else:
# only for compatibility. Will be removed in Sphinx-5.0
self._good = set()
self._broken = {}
self._redirected = {}
super().__init__(daemon=True)

View File

@ -15,14 +15,14 @@ import textwrap
import time
import wsgiref.handlers
from datetime import datetime
from queue import Queue
from typing import Dict
from unittest import mock
import pytest
import requests
from sphinx.builders.linkcheck import (CheckExternalLinksBuilder,
HyperlinkAvailabilityCheckWorker, RateLimit)
from sphinx.builders.linkcheck import HyperlinkAvailabilityCheckWorker, RateLimit
from sphinx.util.console import strip_colors
from .utils import CERT_FILE, http_server, https_server
@ -536,10 +536,7 @@ class FakeResponse:
def test_limit_rate_default_sleep(app):
checker = CheckExternalLinksBuilder(app)
checker.init()
checker.rate_limits = {}
worker = HyperlinkAvailabilityCheckWorker(checker)
worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(), {})
with mock.patch('time.time', return_value=0.0):
next_check = worker.limit_rate(FakeResponse())
assert next_check == 60.0
@ -547,40 +544,34 @@ def test_limit_rate_default_sleep(app):
def test_limit_rate_user_max_delay(app):
app.config.linkcheck_rate_limit_timeout = 0.0
checker = CheckExternalLinksBuilder(app)
checker.init()
checker.rate_limits = {}
worker = HyperlinkAvailabilityCheckWorker(checker)
worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(), {})
next_check = worker.limit_rate(FakeResponse())
assert next_check is None
def test_limit_rate_doubles_previous_wait_time(app):
checker = CheckExternalLinksBuilder(app)
checker.init()
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
worker = HyperlinkAvailabilityCheckWorker(checker)
rate_limits = {"localhost": RateLimit(60.0, 0.0)}
worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(),
rate_limits)
with mock.patch('time.time', return_value=0.0):
next_check = worker.limit_rate(FakeResponse())
assert next_check == 120.0
def test_limit_rate_clips_wait_time_to_max_time(app):
checker = CheckExternalLinksBuilder(app)
checker.init()
app.config.linkcheck_rate_limit_timeout = 90.0
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
worker = HyperlinkAvailabilityCheckWorker(checker)
rate_limits = {"localhost": RateLimit(60.0, 0.0)}
worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(),
rate_limits)
with mock.patch('time.time', return_value=0.0):
next_check = worker.limit_rate(FakeResponse())
assert next_check == 90.0
def test_limit_rate_bails_out_after_waiting_max_time(app):
checker = CheckExternalLinksBuilder(app)
checker.init()
app.config.linkcheck_rate_limit_timeout = 90.0
checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)}
worker = HyperlinkAvailabilityCheckWorker(checker)
rate_limits = {"localhost": RateLimit(90.0, 0.0)}
worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(),
rate_limits)
next_check = worker.limit_rate(FakeResponse())
assert next_check is None