Speed up `test_linkcheck`

This commit is contained in:
Adam Turner
2022-09-10 17:26:41 +01:00
parent 9df3b59e00
commit 97f07ca83c
18 changed files with 192 additions and 120 deletions

View File

@@ -571,7 +571,7 @@ def setup(app: Sphinx) -> dict[str, Any]:
app.add_config_value('linkcheck_auth', [], False) app.add_config_value('linkcheck_auth', [], False)
app.add_config_value('linkcheck_request_headers', {}, False) app.add_config_value('linkcheck_request_headers', {}, False)
app.add_config_value('linkcheck_retries', 1, False) app.add_config_value('linkcheck_retries', 1, False)
app.add_config_value('linkcheck_timeout', None, False, [int]) app.add_config_value('linkcheck_timeout', None, False, [int, float])
app.add_config_value('linkcheck_workers', 5, False) app.add_config_value('linkcheck_workers', 5, False)
app.add_config_value('linkcheck_anchors', True, False) app.add_config_value('linkcheck_anchors', True, False)
# Anchors starting with ! are ignored since they are # Anchors starting with ! are ignored since they are

View File

@@ -0,0 +1,3 @@
exclude_patterns = ['_build']
linkcheck_anchors = True
linkcheck_timeout = 0.02

View File

@@ -0,0 +1,2 @@
* `Example Bar invalid <http://localhost:7777/#!bar>`_
* `Example Bar invalid <http://localhost:7777/#top>`_

View File

@@ -3,3 +3,4 @@ linkcheck_exclude_documents = [
'^broken_link$', '^broken_link$',
'br[0-9]ken_link', 'br[0-9]ken_link',
] ]
linkcheck_timeout = 0.01

View File

@@ -1,2 +1,3 @@
exclude_patterns = ['_build'] exclude_patterns = ['_build']
linkcheck_anchors = True linkcheck_anchors = True
linkcheck_timeout = 0.01

View File

@@ -1 +1,6 @@
import sys
exclude_patterns = ['_build'] exclude_patterns = ['_build']
linkcheck_timeout = 0.01 if sys.platform != 'win32' else 0.05
del sys

View File

@@ -1 +1,2 @@
exclude_patterns = ['_build'] exclude_patterns = ['_build']
linkcheck_timeout = 0.02

View File

@@ -1,2 +1,3 @@
`local server1 <http://localhost:7777/path1>`_ `local server1 <http://localhost:7777/path1>`_
`local server2 <http://localhost:7777/path2>`_ `local server2 <http://localhost:7777/path2>`_

View File

@@ -1 +1,2 @@
exclude_patterns = ['_build'] exclude_patterns = ['_build']
linkcheck_timeout = 0.01

View File

@@ -0,0 +1,2 @@
exclude_patterns = ['_build']
linkcheck_timeout = 0.01

View File

@@ -0,0 +1,2 @@
.. raw:: html
:url: http://localhost:7777/

View File

@@ -0,0 +1,3 @@
exclude_patterns = ['_build']
linkcheck_anchors = True
linkcheck_timeout = 0.01

View File

@@ -0,0 +1 @@
`Non-existing uri with localhost <https://localhost:7777/doesnotexist>`_

View File

@@ -1,4 +1,4 @@
root_doc = 'links' root_doc = 'links'
source_suffix = '.txt'
exclude_patterns = ['_build'] exclude_patterns = ['_build']
linkcheck_anchors = True linkcheck_anchors = True
linkcheck_timeout = 0.01

View File

@@ -0,0 +1,13 @@
Some additional anchors to exercise ignore code
* `Valid url <http://localhost:7777/>`_
* `Bar anchor invalid (trailing slash) <http://localhost:7777/#!bar>`_
* `Bar anchor invalid <http://localhost:7777#!bar>`_ tests that default ignore anchor of #! does not need to be prefixed with /
* `Top anchor invalid <http://localhost:7777/#top>`_
* `'does-not-exist' anchor invalid <http://localhost:7777#does-not-exist>`_
* `Valid local file <conf.py>`_
* `Invalid local file <path/to/notfound>`_
.. image:: http://localhost:7777/image.png
.. figure:: http://localhost:7777/image2.png

View File

@@ -1,22 +0,0 @@
This is from CPython documentation.
* Also, if there is a `default namespace <https://www.w3.org/TR/2006/REC-xml-names-20060816/#defaulting>`__, that full URI gets prepended to all of the non-prefixed tags.
* The URL having anchor: `https://www.sphinx-doc.org/en/master/usage/installation.html#overview`_
Some additional anchors to exercise ignore code
* `Example Bar invalid <https://www.google.com/#!bar>`_
* `Example Bar invalid <https://www.google.com#!bar>`_ tests that default ignore anchor of #! does not need to be prefixed with /
* `Example Bar invalid <https://www.google.com/#top>`_
* `Example anchor invalid <http://www.sphinx-doc.org/en/master/index.html#does-not-exist>`_
* `Complete nonsense <https://localhost:7777/doesnotexist>`_
* `Example valid local file <conf.py>`_
* `Example invalid local file <path/to/notfound>`_
* https://github.com/sphinx-doc/sphinx/blob/master/sphinx/__init__.py#L2
.. image:: https://www.google.com/image.png
.. figure:: https://www.google.com/image2.png
.. raw:: html
:url: https://www.sphinx-doc.org/

View File

@@ -10,6 +10,7 @@ import textwrap
import time import time
import wsgiref.handlers import wsgiref.handlers
from datetime import datetime from datetime import datetime
from os import path
from queue import Queue from queue import Queue
from unittest import mock from unittest import mock
@@ -22,101 +23,137 @@ from sphinx.util.console import strip_colors
from .utils import CERT_FILE, http_server, https_server from .utils import CERT_FILE, http_server, https_server
ts_re = re.compile(r".*\[(?P<ts>.*)\].*") ts_re = re.compile(r".*\[(?P<ts>.*)\].*")
SPHINX_DOCS_INDEX = path.abspath(path.join(__file__, "..", "roots", "test-linkcheck", "sphinx-docs-index.html"))
class DefaultsHandler(http.server.BaseHTTPRequestHandler):
def do_HEAD(self):
if self.path[1:].rstrip() == "":
self.send_response(200, "OK")
self.end_headers()
else:
self.send_response(404, "Not Found")
self.end_headers()
def do_GET(self):
self.do_HEAD()
if self.path[1:].rstrip() == "":
self.wfile.write(b"ok\n\n")
@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True) @pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
def test_defaults(app): def test_defaults(app):
app.build() with http_server(DefaultsHandler):
app.build()
# Text output
assert (app.outdir / 'output.txt').exists() assert (app.outdir / 'output.txt').exists()
content = (app.outdir / 'output.txt').read_text(encoding='utf8') content = (app.outdir / 'output.txt').read_text(encoding='utf8')
print(content)
# looking for '#top' and '#does-not-exist' not found should fail # looking for '#top' and '#does-not-exist' not found should fail
assert "Anchor 'top' not found" in content assert "Anchor 'top' not found" in content
assert "Anchor 'does-not-exist' not found" in content assert "Anchor 'does-not-exist' not found" in content
# looking for non-existent URL should fail
assert " Max retries exceeded with url: /doesnotexist" in content
# images should fail # images should fail
assert "Not Found for url: https://www.google.com/image.png" in content assert "Not Found for url: http://localhost:7777/image.png" in content
assert "Not Found for url: https://www.google.com/image2.png" in content assert "Not Found for url: http://localhost:7777/image2.png" in content
# looking for local file should fail # looking for local file should fail
assert "[broken] path/to/notfound" in content assert "[broken] path/to/notfound" in content
assert len(content.splitlines()) == 7 assert len(content.splitlines()) == 5
@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
def test_defaults_json(app):
app.build()
# JSON output
assert (app.outdir / 'output.json').exists() assert (app.outdir / 'output.json').exists()
content = (app.outdir / 'output.json').read_text(encoding='utf8') content = (app.outdir / 'output.json').read_text(encoding='utf8')
print(content)
rows = [json.loads(x) for x in content.splitlines()] rows = [json.loads(x) for x in content.splitlines()]
row = rows[0] row = rows[0]
for attr in ["filename", "lineno", "status", "code", "uri", for attr in ("filename", "lineno", "status", "code", "uri", "info"):
"info"]:
assert attr in row assert attr in row
assert len(content.splitlines()) == 12 assert len(content.splitlines()) == 9
assert len(rows) == 12 assert len(rows) == 9
# the output order of the rows is not stable # the output order of the rows is not stable
# due to possible variance in network latency # due to possible variance in network latency
rowsby = {row["uri"]: row for row in rows} rowsby = {row["uri"]: row for row in rows}
assert rowsby["https://www.google.com#!bar"] == { assert rowsby["http://localhost:7777#!bar"] == {
'filename': 'links.txt', 'filename': 'links.rst',
'lineno': 10, 'lineno': 5,
'status': 'working', 'status': 'working',
'code': 0, 'code': 0,
'uri': 'https://www.google.com#!bar', 'uri': 'http://localhost:7777#!bar',
'info': '', 'info': '',
} }
# looking for non-existent URL should fail assert rowsby['http://localhost:7777/image2.png'] == {
dnerow = rowsby['https://localhost:7777/doesnotexist'] 'filename': 'links.rst',
assert dnerow['filename'] == 'links.txt' 'lineno': 13,
assert dnerow['lineno'] == 13
assert dnerow['status'] == 'broken'
assert dnerow['code'] == 0
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
assert rowsby['https://www.google.com/image2.png'] == {
'filename': 'links.txt',
'lineno': 20,
'status': 'broken', 'status': 'broken',
'code': 0, 'code': 0,
'uri': 'https://www.google.com/image2.png', 'uri': 'http://localhost:7777/image2.png',
'info': '404 Client Error: Not Found for url: https://www.google.com/image2.png', 'info': '404 Client Error: Not Found for url: http://localhost:7777/image2.png',
} }
# looking for '#top' and '#does-not-exist' not found should fail # looking for '#top' and '#does-not-exist' not found should fail
assert rowsby["https://www.google.com/#top"]["info"] == "Anchor 'top' not found" assert rowsby["http://localhost:7777/#top"]["info"] == "Anchor 'top' not found"
assert rowsby["http://www.sphinx-doc.org/en/master/index.html#does-not-exist"]["info"] == "Anchor 'does-not-exist' not found" assert rowsby["http://localhost:7777#does-not-exist"]["info"] == "Anchor 'does-not-exist' not found"
# images should fail # images should fail
assert "Not Found for url: https://www.google.com/image.png" in \ assert "Not Found for url: http://localhost:7777/image.png" in rowsby["http://localhost:7777/image.png"]["info"]
rowsby["https://www.google.com/image.png"]["info"]
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-too-many-retries', freshenv=True)
def test_too_many_retries(app):
app.build()
# Text output
assert (app.outdir / 'output.txt').exists()
content = (app.outdir / 'output.txt').read_text(encoding='utf8')
# looking for non-existent URL should fail
assert " Max retries exceeded with url: /doesnotexist" in content
# JSON output
assert (app.outdir / 'output.json').exists()
content = (app.outdir / 'output.json').read_text(encoding='utf8')
assert len(content.splitlines()) == 1
row = json.loads(content)
# the output order of the rows is not stable
# due to possible variance in network latency
# looking for non-existent URL should fail
assert row['filename'] == 'index.rst'
assert row['lineno'] == 1
assert row['status'] == 'broken'
assert row['code'] == 0
assert row['uri'] == 'https://localhost:7777/doesnotexist'
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-raw-node', freshenv=True)
def test_raw_node(app):
with http_server(OKHandler):
app.build()
# JSON output
assert (app.outdir / 'output.json').exists()
content = (app.outdir / 'output.json').read_text(encoding='utf8')
assert len(content.splitlines()) == 1
row = json.loads(content)
# raw nodes' url should be checked too # raw nodes' url should be checked too
assert rowsby["https://www.sphinx-doc.org/"] == { assert row == {
'filename': 'links.txt', 'filename': 'index.rst',
'lineno': 21, 'lineno': 1,
'status': 'redirected', 'status': 'working',
'code': 302, 'code': 0,
'uri': 'https://www.sphinx-doc.org/', 'uri': 'http://localhost:7777/',
'info': 'https://www.sphinx-doc.org/en/master/', 'info': '',
} }
@pytest.mark.sphinx( @pytest.mark.sphinx(
'linkcheck', testroot='linkcheck', freshenv=True, 'linkcheck', testroot='linkcheck-anchors-ignore', freshenv=True,
confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"], confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"]})
'linkcheck_ignore': [
'https://localhost:7777/doesnotexist',
'http://www.sphinx-doc.org/en/master/index.html#',
'https://www.sphinx-doc.org/',
'https://www.google.com/image.png',
'https://www.google.com/image2.png',
'path/to/notfound'],
})
def test_anchors_ignored(app): def test_anchors_ignored(app):
app.build() with http_server(OKHandler):
app.build()
assert (app.outdir / 'output.txt').exists() assert (app.outdir / 'output.txt').exists()
content = (app.outdir / 'output.txt').read_text(encoding='utf8') content = (app.outdir / 'output.txt').read_text(encoding='utf8')
@@ -141,14 +178,16 @@ def test_raises_for_invalid_status(app):
) )
class HeadersDumperHandler(http.server.BaseHTTPRequestHandler): def capture_headers_handler(records):
def do_HEAD(self): class HeadersDumperHandler(http.server.BaseHTTPRequestHandler):
self.do_GET() def do_HEAD(self):
self.do_GET()
def do_GET(self): def do_GET(self):
self.send_response(200, "OK") self.send_response(200, "OK")
self.end_headers() self.end_headers()
print(self.headers.as_string()) records.append(self.headers.as_string())
return HeadersDumperHandler
@pytest.mark.sphinx( @pytest.mark.sphinx(
@@ -158,10 +197,12 @@ class HeadersDumperHandler(http.server.BaseHTTPRequestHandler):
(r'^http://localhost:7777/$', ('user1', 'password')), (r'^http://localhost:7777/$', ('user1', 'password')),
(r'.*local.*', ('user2', 'hunter2')), (r'.*local.*', ('user2', 'hunter2')),
]}) ]})
def test_auth_header_uses_first_match(app, capsys): def test_auth_header_uses_first_match(app):
with http_server(HeadersDumperHandler): records = []
with http_server(capture_headers_handler(records)):
app.build() app.build()
stdout, stderr = capsys.readouterr()
stdout = "\n".join(records)
encoded_auth = base64.b64encode(b'user1:password').decode('ascii') encoded_auth = base64.b64encode(b'user1:password').decode('ascii')
assert f"Authorization: Basic {encoded_auth}\n" in stdout assert f"Authorization: Basic {encoded_auth}\n" in stdout
@@ -169,10 +210,12 @@ def test_auth_header_uses_first_match(app, capsys):
@pytest.mark.sphinx( @pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver', freshenv=True, 'linkcheck', testroot='linkcheck-localserver', freshenv=True,
confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]}) confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]})
def test_auth_header_no_match(app, capsys): def test_auth_header_no_match(app):
with http_server(HeadersDumperHandler): records = []
with http_server(capture_headers_handler(records)):
app.build() app.build()
stdout, stderr = capsys.readouterr()
stdout = "\n".join(records)
assert "Authorization" not in stdout assert "Authorization" not in stdout
@@ -186,11 +229,12 @@ def test_auth_header_no_match(app, capsys):
"X-Secret": "open sesami", "X-Secret": "open sesami",
}, },
}}) }})
def test_linkcheck_request_headers(app, capsys): def test_linkcheck_request_headers(app):
with http_server(HeadersDumperHandler): records = []
with http_server(capture_headers_handler(records)):
app.build() app.build()
stdout, _stderr = capsys.readouterr() stdout = "\n".join(records)
assert "Accept: text/html\n" in stdout assert "Accept: text/html\n" in stdout
assert "X-Secret" not in stdout assert "X-Secret" not in stdout
assert "sesami" not in stdout assert "sesami" not in stdout
@@ -202,11 +246,12 @@ def test_linkcheck_request_headers(app, capsys):
"http://localhost:7777": {"Accept": "application/json"}, "http://localhost:7777": {"Accept": "application/json"},
"*": {"X-Secret": "open sesami"}, "*": {"X-Secret": "open sesami"},
}}) }})
def test_linkcheck_request_headers_no_slash(app, capsys): def test_linkcheck_request_headers_no_slash(app):
with http_server(HeadersDumperHandler): records = []
with http_server(capture_headers_handler(records)):
app.build() app.build()
stdout, _stderr = capsys.readouterr() stdout = "\n".join(records)
assert "Accept: application/json\n" in stdout assert "Accept: application/json\n" in stdout
assert "X-Secret" not in stdout assert "X-Secret" not in stdout
assert "sesami" not in stdout assert "sesami" not in stdout
@@ -218,11 +263,12 @@ def test_linkcheck_request_headers_no_slash(app, capsys):
"http://do.not.match.org": {"Accept": "application/json"}, "http://do.not.match.org": {"Accept": "application/json"},
"*": {"X-Secret": "open sesami"}, "*": {"X-Secret": "open sesami"},
}}) }})
def test_linkcheck_request_headers_default(app, capsys): def test_linkcheck_request_headers_default(app):
with http_server(HeadersDumperHandler): records = []
with http_server(capture_headers_handler(records)):
app.build() app.build()
stdout, _stderr = capsys.readouterr() stdout = "\n".join(records)
assert "Accepts: application/json\n" not in stdout assert "Accepts: application/json\n" not in stdout
assert "X-Secret: open sesami\n" in stdout assert "X-Secret: open sesami\n" in stdout
@@ -299,14 +345,21 @@ def test_linkcheck_allowed_redirects(app, warning):
app.build() app.build()
with open(app.outdir / 'output.json', encoding='utf-8') as fp: with open(app.outdir / 'output.json', encoding='utf-8') as fp:
records = [json.loads(l) for l in fp.readlines()] rows = [json.loads(l) for l in fp.readlines()]
assert len(records) == 2 assert len(rows) == 2
result = {r["uri"]: r["status"] for r in records} records = {row["uri"]: row for row in rows}
assert result["http://localhost:7777/path1"] == "working" assert records["http://localhost:7777/path1"]["status"] == "working"
assert result["http://localhost:7777/path2"] == "redirected" assert records["http://localhost:7777/path2"] == {
'filename': 'index.rst',
'lineno': 3,
'status': 'redirected',
'code': 302,
'uri': 'http://localhost:7777/path2',
'info': 'http://localhost:7777/?redirected=1',
}
assert ("index.rst:1: WARNING: redirect http://localhost:7777/path2 - with Found to " assert ("index.rst:3: WARNING: redirect http://localhost:7777/path2 - with Found to "
"http://localhost:7777/?redirected=1\n" in strip_escseq(warning.getvalue())) "http://localhost:7777/?redirected=1\n" in strip_escseq(warning.getvalue()))
assert len(warning.getvalue().splitlines()) == 1 assert len(warning.getvalue().splitlines()) == 1
@@ -422,18 +475,23 @@ def test_connect_to_selfsigned_nonexistent_cert_file(app):
} }
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) class InfiniteRedirectOnHeadHandler(http.server.BaseHTTPRequestHandler):
def test_TooManyRedirects_on_HEAD(app): def do_HEAD(self):
class InfiniteRedirectOnHeadHandler(http.server.BaseHTTPRequestHandler): self.send_response(302, "Found")
def do_HEAD(self): self.send_header("Location", "http://localhost:7777/")
self.send_response(302, "Found") self.end_headers()
self.send_header("Location", "http://localhost:7777/")
self.end_headers()
def do_GET(self): def do_GET(self):
self.send_response(200, "OK") self.send_response(200, "OK")
self.end_headers() self.end_headers()
self.wfile.write(b"ok\n") self.wfile.write(b"ok\n")
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
def test_TooManyRedirects_on_HEAD(app, monkeypatch):
import requests.sessions
monkeypatch.setattr(requests.sessions, "DEFAULT_REDIRECT_LIMIT", 5)
with http_server(InfiniteRedirectOnHeadHandler): with http_server(InfiniteRedirectOnHeadHandler):
app.build() app.build()
@@ -540,7 +598,7 @@ def test_too_many_requests_retry_after_without_header(app, capsys):
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
def test_too_many_requests_user_timeout(app, capsys): def test_too_many_requests_user_timeout(app):
app.config.linkcheck_rate_limit_timeout = 0.0 app.config.linkcheck_rate_limit_timeout = 0.0
with http_server(make_retry_after_handler([(429, None)])): with http_server(make_retry_after_handler([(429, None)])):
app.build() app.build()

View File

@@ -16,7 +16,7 @@ class HttpServerThread(threading.Thread):
self.server = http.server.HTTPServer(("localhost", 7777), handler) self.server = http.server.HTTPServer(("localhost", 7777), handler)
def run(self): def run(self):
self.server.serve_forever(poll_interval=0.01) self.server.serve_forever(poll_interval=0.001)
def terminate(self): def terminate(self):
self.server.shutdown() self.server.shutdown()