mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Merge pull request #8835 from tk0miya/refactor_linkcheck7
refactor: linkcheck: Separate thread manager feature from builder class
This commit is contained in:
commit
3b451bbcde
3
CHANGES
3
CHANGES
@ -23,7 +23,10 @@ Deprecated
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.broken``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.good``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.redirected``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.rqueue``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.workers``
|
||||
* ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.wqueue``
|
||||
* ``sphinx.builders.linkcheck.node_line_or_0()``
|
||||
* ``sphinx.ext.autodoc.AttributeDocumenter.isinstanceattribute()``
|
||||
* ``sphinx.ext.autodoc.directive.DocumenterBridge.reporter``
|
||||
|
@ -52,11 +52,26 @@ The following is a list of deprecated interfaces.
|
||||
- 5.0
|
||||
- N/A
|
||||
|
||||
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.rqueue``
|
||||
- 3.5
|
||||
- 5.0
|
||||
- N/A
|
||||
|
||||
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.to_ignore``
|
||||
- 3.5
|
||||
- 5.0
|
||||
- N/A
|
||||
|
||||
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.workers``
|
||||
- 3.5
|
||||
- 5.0
|
||||
- N/A
|
||||
|
||||
* - ``sphinx.builders.linkcheck.CheckExternalLinksBuilder.wqueue``
|
||||
- 3.5
|
||||
- 5.0
|
||||
- N/A
|
||||
|
||||
* - ``sphinx.builders.linkcheck.node_line_or_0()``
|
||||
- 3.5
|
||||
- 5.0
|
||||
|
@ -19,7 +19,7 @@ from email.utils import parsedate_to_datetime
|
||||
from html.parser import HTMLParser
|
||||
from os import path
|
||||
from threading import Thread
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, Pattern, Set, Tuple, cast
|
||||
from typing import Any, Dict, Generator, List, NamedTuple, Optional, Pattern, Set, Tuple, cast
|
||||
from urllib.parse import unquote, urlparse
|
||||
|
||||
from docutils import nodes
|
||||
@ -29,7 +29,9 @@ from requests.exceptions import HTTPError, TooManyRedirects
|
||||
|
||||
from sphinx.application import Sphinx
|
||||
from sphinx.builders.dummy import DummyBuilder
|
||||
from sphinx.config import Config
|
||||
from sphinx.deprecation import RemovedInSphinx50Warning
|
||||
from sphinx.environment import BuildEnvironment
|
||||
from sphinx.locale import __
|
||||
from sphinx.transforms.post_transforms import SphinxPostTransform
|
||||
from sphinx.util import encode_uri, logging, requests
|
||||
@ -121,17 +123,8 @@ class CheckExternalLinksBuilder(DummyBuilder):
|
||||
socket.setdefaulttimeout(5.0)
|
||||
|
||||
# create queues and worker threads
|
||||
self.rate_limits = {} # type: Dict[str, RateLimit]
|
||||
self.wqueue = queue.PriorityQueue() # type: queue.PriorityQueue
|
||||
self.rqueue = queue.Queue() # type: queue.Queue
|
||||
self.workers = [] # type: List[Thread]
|
||||
for i in range(self.config.linkcheck_workers):
|
||||
thread = HyperlinkAvailabilityCheckWorker(self)
|
||||
thread.start()
|
||||
self.workers.append(thread)
|
||||
|
||||
def is_ignored_uri(self, uri: str) -> bool:
|
||||
return any(pat.match(uri) for pat in self.to_ignore)
|
||||
self._wqueue = queue.PriorityQueue() # type: queue.PriorityQueue
|
||||
self._rqueue = queue.Queue() # type: queue.Queue
|
||||
|
||||
@property
|
||||
def anchors_ignore(self) -> List[Pattern]:
|
||||
@ -202,7 +195,33 @@ class CheckExternalLinksBuilder(DummyBuilder):
|
||||
RemovedInSphinx50Warning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return HyperlinkAvailabilityCheckWorker(self).limit_rate(response)
|
||||
worker = HyperlinkAvailabilityCheckWorker(self.env, self.config,
|
||||
None, None, {})
|
||||
return worker.limit_rate(response)
|
||||
|
||||
def rqueue(self, response: Response) -> queue.Queue:
|
||||
warnings.warn(
|
||||
"%s.%s is deprecated." % (self.__class__.__name__, "rqueue"),
|
||||
RemovedInSphinx50Warning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self._rqueue
|
||||
|
||||
def workers(self, response: Response) -> List[Thread]:
|
||||
warnings.warn(
|
||||
"%s.%s is deprecated." % (self.__class__.__name__, "workers"),
|
||||
RemovedInSphinx50Warning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return []
|
||||
|
||||
def wqueue(self, response: Response) -> queue.Queue:
|
||||
warnings.warn(
|
||||
"%s.%s is deprecated." % (self.__class__.__name__, "wqueue"),
|
||||
RemovedInSphinx50Warning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self._wqueue
|
||||
|
||||
def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
|
||||
uri, docname, lineno, status, info, code = result
|
||||
@ -268,53 +287,105 @@ class CheckExternalLinksBuilder(DummyBuilder):
|
||||
self.json_outfile.write('\n')
|
||||
|
||||
def finish(self) -> None:
|
||||
checker = HyperlinkAvailabilityChecker(self.env, self.config, self)
|
||||
logger.info('')
|
||||
|
||||
with open(path.join(self.outdir, 'output.txt'), 'w') as self.txt_outfile,\
|
||||
open(path.join(self.outdir, 'output.json'), 'w') as self.json_outfile:
|
||||
total_links = 0
|
||||
for hyperlink in self.hyperlinks.values():
|
||||
if self.is_ignored_uri(hyperlink.uri):
|
||||
self.process_result(
|
||||
CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
|
||||
'ignored', '', 0))
|
||||
else:
|
||||
self.wqueue.put(hyperlink, False)
|
||||
total_links += 1
|
||||
|
||||
done = 0
|
||||
while done < total_links:
|
||||
self.process_result(self.rqueue.get())
|
||||
done += 1
|
||||
for result in checker.check(self.hyperlinks):
|
||||
self.process_result(result)
|
||||
|
||||
if self._broken:
|
||||
self.app.statuscode = 1
|
||||
|
||||
|
||||
class HyperlinkAvailabilityChecker:
|
||||
def __init__(self, env: BuildEnvironment, config: Config,
|
||||
builder: CheckExternalLinksBuilder = None) -> None:
|
||||
# Warning: builder argument will be removed in the sphinx-5.0.
|
||||
# Don't use it from extensions.
|
||||
# tag: RemovedInSphinx50Warning
|
||||
self.builder = builder
|
||||
self.config = config
|
||||
self.env = env
|
||||
self.rate_limits = {} # type: Dict[str, RateLimit]
|
||||
self.workers = [] # type: List[Thread]
|
||||
|
||||
self.to_ignore = [re.compile(x) for x in self.config.linkcheck_ignore]
|
||||
|
||||
if builder:
|
||||
self.rqueue = builder._rqueue
|
||||
self.wqueue = builder._wqueue
|
||||
else:
|
||||
self.rqueue = queue.Queue()
|
||||
self.wqueue = queue.PriorityQueue()
|
||||
|
||||
def invoke_threads(self) -> None:
|
||||
for i in range(self.config.linkcheck_workers):
|
||||
thread = HyperlinkAvailabilityCheckWorker(self.env, self.config,
|
||||
self.rqueue, self.wqueue,
|
||||
self.rate_limits, self.builder)
|
||||
thread.start()
|
||||
self.workers.append(thread)
|
||||
|
||||
def shutdown_threads(self) -> None:
|
||||
self.wqueue.join()
|
||||
# Shutdown threads.
|
||||
for worker in self.workers:
|
||||
self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
|
||||
|
||||
def check(self, hyperlinks: Dict[str, Hyperlink]) -> Generator[CheckResult, None, None]:
|
||||
self.invoke_threads()
|
||||
|
||||
total_links = 0
|
||||
for hyperlink in hyperlinks.values():
|
||||
if self.is_ignored_uri(hyperlink.uri):
|
||||
yield CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
|
||||
'ignored', '', 0)
|
||||
else:
|
||||
self.wqueue.put(hyperlink, False)
|
||||
total_links += 1
|
||||
|
||||
done = 0
|
||||
while done < total_links:
|
||||
yield self.rqueue.get()
|
||||
done += 1
|
||||
|
||||
self.shutdown_threads()
|
||||
|
||||
def is_ignored_uri(self, uri: str) -> bool:
|
||||
return any(pat.match(uri) for pat in self.to_ignore)
|
||||
|
||||
|
||||
class HyperlinkAvailabilityCheckWorker(Thread):
|
||||
"""A worker class for checking the availability of hyperlinks."""
|
||||
|
||||
def __init__(self, builder: CheckExternalLinksBuilder) -> None:
|
||||
self.config = builder.config
|
||||
self.env = builder.env
|
||||
self.rate_limits = builder.rate_limits
|
||||
self.rqueue = builder.rqueue
|
||||
self.wqueue = builder.wqueue
|
||||
def __init__(self, env: BuildEnvironment, config: Config, rqueue: queue.Queue,
|
||||
wqueue: queue.Queue, rate_limits: Dict[str, RateLimit],
|
||||
builder: CheckExternalLinksBuilder = None) -> None:
|
||||
# Warning: builder argument will be removed in the sphinx-5.0.
|
||||
# Don't use it from extensions.
|
||||
# tag: RemovedInSphinx50Warning
|
||||
self.config = config
|
||||
self.env = env
|
||||
self.rate_limits = rate_limits
|
||||
self.rqueue = rqueue
|
||||
self.wqueue = wqueue
|
||||
|
||||
self.anchors_ignore = [re.compile(x)
|
||||
for x in self.config.linkcheck_anchors_ignore]
|
||||
self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
|
||||
in self.config.linkcheck_auth]
|
||||
self.to_ignore = [re.compile(x) for x in self.config.linkcheck_ignore]
|
||||
|
||||
self._good = builder._good
|
||||
self._broken = builder._broken
|
||||
self._redirected = builder._redirected
|
||||
if builder:
|
||||
# if given, fill the result of checks as cache
|
||||
self._good = builder._good
|
||||
self._broken = builder._broken
|
||||
self._redirected = builder._redirected
|
||||
else:
|
||||
# only for compatibility. Will be removed in Sphinx-5.0
|
||||
self._good = set()
|
||||
self._broken = {}
|
||||
self._redirected = {}
|
||||
|
||||
super().__init__(daemon=True)
|
||||
|
||||
|
@ -15,14 +15,14 @@ import textwrap
|
||||
import time
|
||||
import wsgiref.handlers
|
||||
from datetime import datetime
|
||||
from queue import Queue
|
||||
from typing import Dict
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from sphinx.builders.linkcheck import (CheckExternalLinksBuilder,
|
||||
HyperlinkAvailabilityCheckWorker, RateLimit)
|
||||
from sphinx.builders.linkcheck import HyperlinkAvailabilityCheckWorker, RateLimit
|
||||
from sphinx.util.console import strip_colors
|
||||
|
||||
from .utils import CERT_FILE, http_server, https_server
|
||||
@ -536,10 +536,7 @@ class FakeResponse:
|
||||
|
||||
|
||||
def test_limit_rate_default_sleep(app):
|
||||
checker = CheckExternalLinksBuilder(app)
|
||||
checker.init()
|
||||
checker.rate_limits = {}
|
||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
||||
worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(), {})
|
||||
with mock.patch('time.time', return_value=0.0):
|
||||
next_check = worker.limit_rate(FakeResponse())
|
||||
assert next_check == 60.0
|
||||
@ -547,40 +544,34 @@ def test_limit_rate_default_sleep(app):
|
||||
|
||||
def test_limit_rate_user_max_delay(app):
|
||||
app.config.linkcheck_rate_limit_timeout = 0.0
|
||||
checker = CheckExternalLinksBuilder(app)
|
||||
checker.init()
|
||||
checker.rate_limits = {}
|
||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
||||
worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(), {})
|
||||
next_check = worker.limit_rate(FakeResponse())
|
||||
assert next_check is None
|
||||
|
||||
|
||||
def test_limit_rate_doubles_previous_wait_time(app):
|
||||
checker = CheckExternalLinksBuilder(app)
|
||||
checker.init()
|
||||
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
|
||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
||||
rate_limits = {"localhost": RateLimit(60.0, 0.0)}
|
||||
worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(),
|
||||
rate_limits)
|
||||
with mock.patch('time.time', return_value=0.0):
|
||||
next_check = worker.limit_rate(FakeResponse())
|
||||
assert next_check == 120.0
|
||||
|
||||
|
||||
def test_limit_rate_clips_wait_time_to_max_time(app):
|
||||
checker = CheckExternalLinksBuilder(app)
|
||||
checker.init()
|
||||
app.config.linkcheck_rate_limit_timeout = 90.0
|
||||
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
|
||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
||||
rate_limits = {"localhost": RateLimit(60.0, 0.0)}
|
||||
worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(),
|
||||
rate_limits)
|
||||
with mock.patch('time.time', return_value=0.0):
|
||||
next_check = worker.limit_rate(FakeResponse())
|
||||
assert next_check == 90.0
|
||||
|
||||
|
||||
def test_limit_rate_bails_out_after_waiting_max_time(app):
|
||||
checker = CheckExternalLinksBuilder(app)
|
||||
checker.init()
|
||||
app.config.linkcheck_rate_limit_timeout = 90.0
|
||||
checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)}
|
||||
worker = HyperlinkAvailabilityCheckWorker(checker)
|
||||
rate_limits = {"localhost": RateLimit(90.0, 0.0)}
|
||||
worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(),
|
||||
rate_limits)
|
||||
next_check = worker.limit_rate(FakeResponse())
|
||||
assert next_check is None
|
||||
|
Loading…
Reference in New Issue
Block a user