refactor: linkcheck: Remove objects marked as RemovedInSphinx50Warning

This commit is contained in:
Takeshi KOMIYA 2021-09-06 02:14:26 +09:00
parent 2e40034d23
commit 43517219b7

View File

@ -12,26 +12,22 @@ import json
import re
import socket
import time
import warnings
from datetime import datetime, timezone
from email.utils import parsedate_to_datetime
from html.parser import HTMLParser
from os import path
from queue import PriorityQueue, Queue
from threading import Thread
from typing import (Any, Dict, Generator, List, NamedTuple, Optional, Pattern, Set, Tuple,
Union, cast)
from typing import Any, Dict, Generator, List, NamedTuple, Optional, Tuple, Union, cast
from urllib.parse import unquote, urlparse, urlunparse
from docutils import nodes
from docutils.nodes import Element
from requests import Response
from requests.exceptions import ConnectionError, HTTPError, TooManyRedirects
from sphinx.application import Sphinx
from sphinx.builders.dummy import DummyBuilder
from sphinx.config import Config
from sphinx.deprecation import RemovedInSphinx50Warning
from sphinx.environment import BuildEnvironment
from sphinx.locale import __
from sphinx.transforms.post_transforms import SphinxPostTransform
@ -67,16 +63,6 @@ QUEUE_POLL_SECS = 1
DEFAULT_DELAY = 60.0
def node_line_or_0(node: Element) -> int:
"""
PriorityQueue items must be comparable. The line number is part of the
tuple used by the PriorityQueue, keep an homogeneous type for comparison.
"""
warnings.warn('node_line_or_0() is deprecated.',
RemovedInSphinx50Warning, stacklevel=2)
return get_node_line(node) or 0
class AnchorCheckParser(HTMLParser):
"""Specialized HTML parser that looks for a specific anchor."""
@ -120,114 +106,11 @@ class CheckExternalLinksBuilder(DummyBuilder):
'%(outdir)s/output.txt')
def init(self) -> None:
self.broken_hyperlinks = 0
self.hyperlinks: Dict[str, Hyperlink] = {}
self._good: Set[str] = set()
self._broken: Dict[str, str] = {}
self._redirected: Dict[str, Tuple[str, int]] = {}
# set a timeout for non-responding servers
socket.setdefaulttimeout(5.0)
# create queues and worker threads
self._wqueue: PriorityQueue[CheckRequestType] = PriorityQueue()
self._rqueue: Queue[CheckResult] = Queue()
@property
def anchors_ignore(self) -> List[Pattern]:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "anchors_ignore"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return [re.compile(x) for x in self.config.linkcheck_anchors_ignore]
@property
def auth(self) -> List[Tuple[Pattern, Any]]:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "auth"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return [(re.compile(pattern), auth_info) for pattern, auth_info
in self.config.linkcheck_auth]
@property
def to_ignore(self) -> List[Pattern]:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "to_ignore"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return [re.compile(x) for x in self.config.linkcheck_ignore]
@property
def good(self) -> Set[str]:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "good"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return self._good
@property
def broken(self) -> Dict[str, str]:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "broken"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return self._broken
@property
def redirected(self) -> Dict[str, Tuple[str, int]]:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "redirected"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return self._redirected
def check_thread(self) -> None:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "check_thread"),
RemovedInSphinx50Warning,
stacklevel=2,
)
# do nothing.
def limit_rate(self, response: Response) -> Optional[float]:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "limit_rate"),
RemovedInSphinx50Warning,
stacklevel=2,
)
worker = HyperlinkAvailabilityCheckWorker(self.env, self.config,
None, None, {})
return worker.limit_rate(response)
def rqueue(self, response: Response) -> Queue:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "rqueue"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return self._rqueue
def workers(self, response: Response) -> List[Thread]:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "workers"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return []
def wqueue(self, response: Response) -> Queue:
warnings.warn(
"%s.%s is deprecated." % (self.__class__.__name__, "wqueue"),
RemovedInSphinx50Warning,
stacklevel=2,
)
return self._wqueue
def process_result(self, result: CheckResult) -> None:
filename = self.env.doc2path(result.docname, None)
@ -260,6 +143,7 @@ class CheckExternalLinksBuilder(DummyBuilder):
logger.info(red('broken ') + result.uri + red(' - ' + result.message))
self.write_entry('broken', result.docname, filename, result.lineno,
result.uri + ': ' + result.message)
self.broken_hyperlinks += 1
elif result.status == 'redirected':
try:
text, color = {
@ -292,7 +176,7 @@ class CheckExternalLinksBuilder(DummyBuilder):
self.json_outfile.write('\n')
def finish(self) -> None:
checker = HyperlinkAvailabilityChecker(self.env, self.config, self)
checker = HyperlinkAvailabilityChecker(self.env, self.config)
logger.info('')
with open(path.join(self.outdir, 'output.txt'), 'w') as self.txt_outfile,\
@ -300,36 +184,26 @@ class CheckExternalLinksBuilder(DummyBuilder):
for result in checker.check(self.hyperlinks):
self.process_result(result)
if self._broken:
if self.broken_hyperlinks:
self.app.statuscode = 1
class HyperlinkAvailabilityChecker:
def __init__(self, env: BuildEnvironment, config: Config,
builder: CheckExternalLinksBuilder = None) -> None:
# Warning: builder argument will be removed in the sphinx-5.0.
# Don't use it from extensions.
# tag: RemovedInSphinx50Warning
self.builder = builder
def __init__(self, env: BuildEnvironment, config: Config) -> None:
self.config = config
self.env = env
self.rate_limits: Dict[str, RateLimit] = {}
self.rqueue: Queue = Queue()
self.workers: List[Thread] = []
self.wqueue: PriorityQueue = PriorityQueue()
self.to_ignore = [re.compile(x) for x in self.config.linkcheck_ignore]
if builder:
self.rqueue = builder._rqueue
self.wqueue = builder._wqueue
else:
self.rqueue = Queue()
self.wqueue = PriorityQueue()
def invoke_threads(self) -> None:
for i in range(self.config.linkcheck_workers):
thread = HyperlinkAvailabilityCheckWorker(self.env, self.config,
self.rqueue, self.wqueue,
self.rate_limits, self.builder)
self.rate_limits)
thread.start()
self.workers.append(thread)
@ -365,11 +239,7 @@ class HyperlinkAvailabilityCheckWorker(Thread):
"""A worker class for checking the availability of hyperlinks."""
def __init__(self, env: BuildEnvironment, config: Config, rqueue: Queue,
wqueue: Queue, rate_limits: Dict[str, RateLimit],
builder: CheckExternalLinksBuilder = None) -> None:
# Warning: builder argument will be removed in the sphinx-5.0.
# Don't use it from extensions.
# tag: RemovedInSphinx50Warning
wqueue: Queue, rate_limits: Dict[str, RateLimit]) -> None:
self.config = config
self.env = env
self.rate_limits = rate_limits
@ -381,17 +251,6 @@ class HyperlinkAvailabilityCheckWorker(Thread):
self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
in self.config.linkcheck_auth]
if builder:
# if given, fill the result of checks as cache
self._good = builder._good
self._broken = builder._broken
self._redirected = builder._redirected
else:
# only for compatibility. Will be removed in Sphinx-5.0
self._good = set()
self._broken = {}
self._redirected = {}
super().__init__(daemon=True)
def run(self) -> None:
@ -530,14 +389,7 @@ class HyperlinkAvailabilityCheckWorker(Thread):
if path.exists(path.join(srcdir, uri)):
return 'working', '', 0
else:
self._broken[uri] = ''
return 'broken', '', 0
elif uri in self._good:
return 'working', 'old', 0
elif uri in self._broken:
return 'broken', self._broken[uri], 0
elif uri in self._redirected:
return 'redirected', self._redirected[uri][0], self._redirected[uri][1]
# need to actually check the URI
for _ in range(self.config.linkcheck_retries):
@ -545,13 +397,6 @@ class HyperlinkAvailabilityCheckWorker(Thread):
if status != "broken":
break
if status == "working":
self._good.add(uri)
elif status == "broken":
self._broken[uri] = info
elif status == "redirected":
self._redirected[uri] = (info, code)
return (status, info, code)
while True: