sphinx/tests/test_build_linkcheck.py
François Freitag d453088b58
linkcheck: Prevent REQUESTS_CA_BUNDLE leak in tests
Test `test_connect_to_selfsigned_nonexistent_cert_file` serves two
purposes:
- verify the behavior when the CA bundle file path is incorrect
- flag a leak of REQUESTS_CA_BUNDLE

Assumes that test runs after
`test_connect_to_selfsigned_with_requests_env_var`. Not great, but
better than no testing.
2020-11-14 19:58:02 +01:00

376 lines
13 KiB
Python

"""
test_build_linkcheck
~~~~~~~~~~~~~~~~~~~~
Test the build process with manpage builder with the test root.
:copyright: Copyright 2007-2020 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import http.server
import json
import re
import textwrap
from unittest import mock
import pytest
import requests
from utils import CERT_FILE, http_server, https_server, modify_env
@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
def test_defaults(app):
app.builder.build_all()
assert (app.outdir / 'output.txt').exists()
content = (app.outdir / 'output.txt').read_text()
print(content)
# looking for '#top' and '#does-not-exist' not found should fail
assert "Anchor 'top' not found" in content
assert "Anchor 'does-not-exist' not found" in content
# looking for non-existent URL should fail
assert " Max retries exceeded with url: /doesnotexist" in content
# images should fail
assert "Not Found for url: https://www.google.com/image.png" in content
assert "Not Found for url: https://www.google.com/image2.png" in content
# looking for local file should fail
assert "[broken] path/to/notfound" in content
assert len(content.splitlines()) == 6
@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
def test_defaults_json(app):
app.builder.build_all()
assert (app.outdir / 'output.json').exists()
content = (app.outdir / 'output.json').read_text()
print(content)
rows = [json.loads(x) for x in content.splitlines()]
row = rows[0]
for attr in ["filename", "lineno", "status", "code", "uri",
"info"]:
assert attr in row
assert len(content.splitlines()) == 10
assert len(rows) == 10
# the output order of the rows is not stable
# due to possible variance in network latency
rowsby = {row["uri"]:row for row in rows}
assert rowsby["https://www.google.com#!bar"] == {
'filename': 'links.txt',
'lineno': 10,
'status': 'working',
'code': 0,
'uri': 'https://www.google.com#!bar',
'info': ''
}
# looking for non-existent URL should fail
dnerow = rowsby['https://localhost:7777/doesnotexist']
assert dnerow['filename'] == 'links.txt'
assert dnerow['lineno'] == 13
assert dnerow['status'] == 'broken'
assert dnerow['code'] == 0
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
assert rowsby['https://www.google.com/image2.png'] == {
'filename': 'links.txt',
'lineno': 18,
'status': 'broken',
'code': 0,
'uri': 'https://www.google.com/image2.png',
'info': '404 Client Error: Not Found for url: https://www.google.com/image2.png'
}
# looking for '#top' and '#does-not-exist' not found should fail
assert "Anchor 'top' not found" == \
rowsby["https://www.google.com/#top"]["info"]
assert "Anchor 'does-not-exist' not found" == \
rowsby["http://www.sphinx-doc.org/en/1.7/intro.html#does-not-exist"]["info"]
# images should fail
assert "Not Found for url: https://www.google.com/image.png" in \
rowsby["https://www.google.com/image.png"]["info"]
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck', freshenv=True,
confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"],
'linkcheck_ignore': [
'https://localhost:7777/doesnotexist',
'http://www.sphinx-doc.org/en/1.7/intro.html#',
'https://www.google.com/image.png',
'https://www.google.com/image2.png',
'path/to/notfound']
})
def test_anchors_ignored(app):
app.builder.build_all()
assert (app.outdir / 'output.txt').exists()
content = (app.outdir / 'output.txt').read_text()
# expect all ok when excluding #top
assert not content
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-anchor', freshenv=True)
def test_raises_for_invalid_status(app):
class InternalServerErrorHandler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
self.send_error(500, "Internal Server Error")
with http_server(InternalServerErrorHandler):
app.builder.build_all()
content = (app.outdir / 'output.txt').read_text()
assert content == (
"index.rst:1: [broken] http://localhost:7777/#anchor: "
"500 Server Error: Internal Server Error "
"for url: http://localhost:7777/\n"
)
class HeadersDumperHandler(http.server.BaseHTTPRequestHandler):
def do_HEAD(self):
self.do_GET()
def do_GET(self):
self.send_response(200, "OK")
self.end_headers()
print(self.headers.as_string())
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver', freshenv=True,
confoverrides={'linkcheck_auth': [
(r'^$', ('no', 'match')),
(r'^http://localhost:7777/$', ('user1', 'password')),
(r'.*local.*', ('user2', 'hunter2')),
]})
def test_auth_header_uses_first_match(app, capsys):
with http_server(HeadersDumperHandler):
app.builder.build_all()
stdout, stderr = capsys.readouterr()
auth = requests.auth._basic_auth_str('user1', 'password')
assert "Authorization: %s\n" % auth in stdout
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver', freshenv=True,
confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]})
def test_auth_header_no_match(app, capsys):
with http_server(HeadersDumperHandler):
app.builder.build_all()
stdout, stderr = capsys.readouterr()
assert "Authorization" not in stdout
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver', freshenv=True,
confoverrides={'linkcheck_request_headers': {
"http://localhost:7777/": {
"Accept": "text/html",
},
"*": {
"X-Secret": "open sesami",
}
}})
def test_linkcheck_request_headers(app, capsys):
with http_server(HeadersDumperHandler):
app.builder.build_all()
stdout, _stderr = capsys.readouterr()
assert "Accept: text/html\n" in stdout
assert "X-Secret" not in stdout
assert "sesami" not in stdout
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver', freshenv=True,
confoverrides={'linkcheck_request_headers': {
"http://localhost:7777": {"Accept": "application/json"},
"*": {"X-Secret": "open sesami"}
}})
def test_linkcheck_request_headers_no_slash(app, capsys):
with http_server(HeadersDumperHandler):
app.builder.build_all()
stdout, _stderr = capsys.readouterr()
assert "Accept: application/json\n" in stdout
assert "X-Secret" not in stdout
assert "sesami" not in stdout
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver', freshenv=True,
confoverrides={'linkcheck_request_headers': {
"http://do.not.match.org": {"Accept": "application/json"},
"*": {"X-Secret": "open sesami"}
}})
def test_linkcheck_request_headers_default(app, capsys):
with http_server(HeadersDumperHandler):
app.builder.build_all()
stdout, _stderr = capsys.readouterr()
assert "Accepts: application/json\n" not in stdout
assert "X-Secret: open sesami\n" in stdout
def make_redirect_handler(*, support_head):
class RedirectOnceHandler(http.server.BaseHTTPRequestHandler):
def do_HEAD(self):
if support_head:
self.do_GET()
else:
self.send_response(405, "Method Not Allowed")
self.end_headers()
def do_GET(self):
if self.path == "/?redirected=1":
self.send_response(204, "No content")
else:
self.send_response(302, "Found")
self.send_header("Location", "http://localhost:7777/?redirected=1")
self.end_headers()
def log_date_time_string(self):
"""Strip date and time from logged messages for assertions."""
return ""
return RedirectOnceHandler
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
def test_follows_redirects_on_HEAD(app, capsys):
with http_server(make_redirect_handler(support_head=True)):
app.builder.build_all()
stdout, stderr = capsys.readouterr()
content = (app.outdir / 'output.txt').read_text()
assert content == (
"index.rst:1: [redirected with Found] "
"http://localhost:7777/ to http://localhost:7777/?redirected=1\n"
)
assert stderr == textwrap.dedent(
"""\
127.0.0.1 - - [] "HEAD / HTTP/1.1" 302 -
127.0.0.1 - - [] "HEAD /?redirected=1 HTTP/1.1" 204 -
"""
)
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
def test_follows_redirects_on_GET(app, capsys):
with http_server(make_redirect_handler(support_head=False)):
app.builder.build_all()
stdout, stderr = capsys.readouterr()
content = (app.outdir / 'output.txt').read_text()
assert content == (
"index.rst:1: [redirected with Found] "
"http://localhost:7777/ to http://localhost:7777/?redirected=1\n"
)
assert stderr == textwrap.dedent(
"""\
127.0.0.1 - - [] "HEAD / HTTP/1.1" 405 -
127.0.0.1 - - [] "GET / HTTP/1.1" 302 -
127.0.0.1 - - [] "GET /?redirected=1 HTTP/1.1" 204 -
"""
)
class OKHandler(http.server.BaseHTTPRequestHandler):
def do_HEAD(self):
self.send_response(200, "OK")
self.end_headers()
def do_GET(self):
self.do_HEAD()
self.wfile.write(b"ok\n")
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
def test_invalid_ssl(app):
# Link indicates SSL should be used (https) but the server does not handle it.
with http_server(OKHandler):
app.builder.build_all()
with open(app.outdir / 'output.json') as fp:
content = json.load(fp)
assert content["status"] == "broken"
assert content["filename"] == "index.rst"
assert content["lineno"] == 1
assert content["uri"] == "https://localhost:7777/"
assert "SSLError" in content["info"]
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
def test_connect_to_selfsigned_fails(app):
with https_server(OKHandler):
app.builder.build_all()
with open(app.outdir / 'output.json') as fp:
content = json.load(fp)
assert content["status"] == "broken"
assert content["filename"] == "index.rst"
assert content["lineno"] == 1
assert content["uri"] == "https://localhost:7777/"
assert "[SSL: CERTIFICATE_VERIFY_FAILED]" in content["info"]
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
def test_connect_to_selfsigned_with_tls_verify_false(app):
app.config.tls_verify = False
with https_server(OKHandler):
app.builder.build_all()
with open(app.outdir / 'output.json') as fp:
content = json.load(fp)
assert content == {
"code": 0,
"status": "working",
"filename": "index.rst",
"lineno": 1,
"uri": "https://localhost:7777/",
"info": "",
}
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
def test_connect_to_selfsigned_with_tls_cacerts(app):
app.config.tls_cacerts = CERT_FILE
with https_server(OKHandler):
app.builder.build_all()
with open(app.outdir / 'output.json') as fp:
content = json.load(fp)
assert content == {
"code": 0,
"status": "working",
"filename": "index.rst",
"lineno": 1,
"uri": "https://localhost:7777/",
"info": "",
}
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
def test_connect_to_selfsigned_with_requests_env_var(app):
with modify_env(REQUESTS_CA_BUNDLE=CERT_FILE), https_server(OKHandler):
app.builder.build_all()
with open(app.outdir / 'output.json') as fp:
content = json.load(fp)
assert content == {
"code": 0,
"status": "working",
"filename": "index.rst",
"lineno": 1,
"uri": "https://localhost:7777/",
"info": "",
}
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
def test_connect_to_selfsigned_nonexistent_cert_file(app):
app.config.tls_cacerts = "does/not/exist"
with https_server(OKHandler):
app.builder.build_all()
with open(app.outdir / 'output.json') as fp:
content = json.load(fp)
assert content == {
"code": 0,
"status": "broken",
"filename": "index.rst",
"lineno": 1,
"uri": "https://localhost:7777/",
"info": "Could not find a suitable TLS CA certificate bundle, invalid path: does/not/exist",
}