""" test_build_linkcheck ~~~~~~~~~~~~~~~~~~~~ Test the build process with manpage builder with the test root. :copyright: Copyright 2007-2020 by the Sphinx team, see AUTHORS. :license: BSD, see LICENSE for details. """ import http.server import json import textwrap import pytest import requests from .utils import CERT_FILE, http_server, https_server, modify_env @pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True) def test_defaults(app): app.builder.build_all() assert (app.outdir / 'output.txt').exists() content = (app.outdir / 'output.txt').read_text() print(content) # looking for '#top' and '#does-not-exist' not found should fail assert "Anchor 'top' not found" in content assert "Anchor 'does-not-exist' not found" in content # looking for non-existent URL should fail assert " Max retries exceeded with url: /doesnotexist" in content # images should fail assert "Not Found for url: https://www.google.com/image.png" in content assert "Not Found for url: https://www.google.com/image2.png" in content # looking for local file should fail assert "[broken] path/to/notfound" in content assert len(content.splitlines()) == 6 @pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True) def test_defaults_json(app): app.builder.build_all() assert (app.outdir / 'output.json').exists() content = (app.outdir / 'output.json').read_text() print(content) rows = [json.loads(x) for x in content.splitlines()] row = rows[0] for attr in ["filename", "lineno", "status", "code", "uri", "info"]: assert attr in row assert len(content.splitlines()) == 10 assert len(rows) == 10 # the output order of the rows is not stable # due to possible variance in network latency rowsby = {row["uri"]: row for row in rows} assert rowsby["https://www.google.com#!bar"] == { 'filename': 'links.txt', 'lineno': 10, 'status': 'working', 'code': 0, 'uri': 'https://www.google.com#!bar', 'info': '' } # looking for non-existent URL should fail dnerow = rowsby['https://localhost:7777/doesnotexist'] assert dnerow['filename'] == 'links.txt' assert dnerow['lineno'] == 13 assert dnerow['status'] == 'broken' assert dnerow['code'] == 0 assert dnerow['uri'] == 'https://localhost:7777/doesnotexist' assert rowsby['https://www.google.com/image2.png'] == { 'filename': 'links.txt', 'lineno': 18, 'status': 'broken', 'code': 0, 'uri': 'https://www.google.com/image2.png', 'info': '404 Client Error: Not Found for url: https://www.google.com/image2.png' } # looking for '#top' and '#does-not-exist' not found should fail assert "Anchor 'top' not found" == \ rowsby["https://www.google.com/#top"]["info"] assert "Anchor 'does-not-exist' not found" == \ rowsby["http://www.sphinx-doc.org/en/1.7/intro.html#does-not-exist"]["info"] # images should fail assert "Not Found for url: https://www.google.com/image.png" in \ rowsby["https://www.google.com/image.png"]["info"] @pytest.mark.sphinx( 'linkcheck', testroot='linkcheck', freshenv=True, confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"], 'linkcheck_ignore': [ 'https://localhost:7777/doesnotexist', 'http://www.sphinx-doc.org/en/1.7/intro.html#', 'https://www.google.com/image.png', 'https://www.google.com/image2.png', 'path/to/notfound'] }) def test_anchors_ignored(app): app.builder.build_all() assert (app.outdir / 'output.txt').exists() content = (app.outdir / 'output.txt').read_text() # expect all ok when excluding #top assert not content @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-anchor', freshenv=True) def test_raises_for_invalid_status(app): class InternalServerErrorHandler(http.server.BaseHTTPRequestHandler): def do_GET(self): self.send_error(500, "Internal Server Error") with http_server(InternalServerErrorHandler): app.builder.build_all() content = (app.outdir / 'output.txt').read_text() assert content == ( "index.rst:1: [broken] http://localhost:7777/#anchor: " "500 Server Error: Internal Server Error " "for url: http://localhost:7777/\n" ) class HeadersDumperHandler(http.server.BaseHTTPRequestHandler): def do_HEAD(self): self.do_GET() def do_GET(self): self.send_response(200, "OK") self.end_headers() print(self.headers.as_string()) @pytest.mark.sphinx( 'linkcheck', testroot='linkcheck-localserver', freshenv=True, confoverrides={'linkcheck_auth': [ (r'^$', ('no', 'match')), (r'^http://localhost:7777/$', ('user1', 'password')), (r'.*local.*', ('user2', 'hunter2')), ]}) def test_auth_header_uses_first_match(app, capsys): with http_server(HeadersDumperHandler): app.builder.build_all() stdout, stderr = capsys.readouterr() auth = requests.auth._basic_auth_str('user1', 'password') assert "Authorization: %s\n" % auth in stdout @pytest.mark.sphinx( 'linkcheck', testroot='linkcheck-localserver', freshenv=True, confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]}) def test_auth_header_no_match(app, capsys): with http_server(HeadersDumperHandler): app.builder.build_all() stdout, stderr = capsys.readouterr() assert "Authorization" not in stdout @pytest.mark.sphinx( 'linkcheck', testroot='linkcheck-localserver', freshenv=True, confoverrides={'linkcheck_request_headers': { "http://localhost:7777/": { "Accept": "text/html", }, "*": { "X-Secret": "open sesami", } }}) def test_linkcheck_request_headers(app, capsys): with http_server(HeadersDumperHandler): app.builder.build_all() stdout, _stderr = capsys.readouterr() assert "Accept: text/html\n" in stdout assert "X-Secret" not in stdout assert "sesami" not in stdout @pytest.mark.sphinx( 'linkcheck', testroot='linkcheck-localserver', freshenv=True, confoverrides={'linkcheck_request_headers': { "http://localhost:7777": {"Accept": "application/json"}, "*": {"X-Secret": "open sesami"} }}) def test_linkcheck_request_headers_no_slash(app, capsys): with http_server(HeadersDumperHandler): app.builder.build_all() stdout, _stderr = capsys.readouterr() assert "Accept: application/json\n" in stdout assert "X-Secret" not in stdout assert "sesami" not in stdout @pytest.mark.sphinx( 'linkcheck', testroot='linkcheck-localserver', freshenv=True, confoverrides={'linkcheck_request_headers': { "http://do.not.match.org": {"Accept": "application/json"}, "*": {"X-Secret": "open sesami"} }}) def test_linkcheck_request_headers_default(app, capsys): with http_server(HeadersDumperHandler): app.builder.build_all() stdout, _stderr = capsys.readouterr() assert "Accepts: application/json\n" not in stdout assert "X-Secret: open sesami\n" in stdout def make_redirect_handler(*, support_head): class RedirectOnceHandler(http.server.BaseHTTPRequestHandler): def do_HEAD(self): if support_head: self.do_GET() else: self.send_response(405, "Method Not Allowed") self.end_headers() def do_GET(self): if self.path == "/?redirected=1": self.send_response(204, "No content") else: self.send_response(302, "Found") self.send_header("Location", "http://localhost:7777/?redirected=1") self.end_headers() def log_date_time_string(self): """Strip date and time from logged messages for assertions.""" return "" return RedirectOnceHandler @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) def test_follows_redirects_on_HEAD(app, capsys): with http_server(make_redirect_handler(support_head=True)): app.builder.build_all() stdout, stderr = capsys.readouterr() content = (app.outdir / 'output.txt').read_text() assert content == ( "index.rst:1: [redirected with Found] " "http://localhost:7777/ to http://localhost:7777/?redirected=1\n" ) assert stderr == textwrap.dedent( """\ 127.0.0.1 - - [] "HEAD / HTTP/1.1" 302 - 127.0.0.1 - - [] "HEAD /?redirected=1 HTTP/1.1" 204 - """ ) @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) def test_follows_redirects_on_GET(app, capsys): with http_server(make_redirect_handler(support_head=False)): app.builder.build_all() stdout, stderr = capsys.readouterr() content = (app.outdir / 'output.txt').read_text() assert content == ( "index.rst:1: [redirected with Found] " "http://localhost:7777/ to http://localhost:7777/?redirected=1\n" ) assert stderr == textwrap.dedent( """\ 127.0.0.1 - - [] "HEAD / HTTP/1.1" 405 - 127.0.0.1 - - [] "GET / HTTP/1.1" 302 - 127.0.0.1 - - [] "GET /?redirected=1 HTTP/1.1" 204 - """ ) class OKHandler(http.server.BaseHTTPRequestHandler): def do_HEAD(self): self.send_response(200, "OK") self.end_headers() def do_GET(self): self.do_HEAD() self.wfile.write(b"ok\n") @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) def test_invalid_ssl(app): # Link indicates SSL should be used (https) but the server does not handle it. with http_server(OKHandler): app.builder.build_all() with open(app.outdir / 'output.json') as fp: content = json.load(fp) assert content["status"] == "broken" assert content["filename"] == "index.rst" assert content["lineno"] == 1 assert content["uri"] == "https://localhost:7777/" assert "SSLError" in content["info"] @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) def test_connect_to_selfsigned_fails(app): with https_server(OKHandler): app.builder.build_all() with open(app.outdir / 'output.json') as fp: content = json.load(fp) assert content["status"] == "broken" assert content["filename"] == "index.rst" assert content["lineno"] == 1 assert content["uri"] == "https://localhost:7777/" assert "[SSL: CERTIFICATE_VERIFY_FAILED]" in content["info"] @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) def test_connect_to_selfsigned_with_tls_verify_false(app): app.config.tls_verify = False with https_server(OKHandler): app.builder.build_all() with open(app.outdir / 'output.json') as fp: content = json.load(fp) assert content == { "code": 0, "status": "working", "filename": "index.rst", "lineno": 1, "uri": "https://localhost:7777/", "info": "", } @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) def test_connect_to_selfsigned_with_tls_cacerts(app): app.config.tls_cacerts = CERT_FILE with https_server(OKHandler): app.builder.build_all() with open(app.outdir / 'output.json') as fp: content = json.load(fp) assert content == { "code": 0, "status": "working", "filename": "index.rst", "lineno": 1, "uri": "https://localhost:7777/", "info": "", } @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) def test_connect_to_selfsigned_with_requests_env_var(app): with modify_env(REQUESTS_CA_BUNDLE=CERT_FILE), https_server(OKHandler): app.builder.build_all() with open(app.outdir / 'output.json') as fp: content = json.load(fp) assert content == { "code": 0, "status": "working", "filename": "index.rst", "lineno": 1, "uri": "https://localhost:7777/", "info": "", } @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) def test_connect_to_selfsigned_nonexistent_cert_file(app): app.config.tls_cacerts = "does/not/exist" with https_server(OKHandler): app.builder.build_all() with open(app.outdir / 'output.json') as fp: content = json.load(fp) assert content == { "code": 0, "status": "broken", "filename": "index.rst", "lineno": 1, "uri": "https://localhost:7777/", "info": "Could not find a suitable TLS CA certificate bundle, invalid path: does/not/exist", } @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) def test_TooManyRedirects_on_HEAD(app): class InfiniteRedirectOnHeadHandler(http.server.BaseHTTPRequestHandler): def do_HEAD(self): self.send_response(302, "Found") self.send_header("Location", "http://localhost:7777/") self.end_headers() def do_GET(self): self.send_response(200, "OK") self.end_headers() self.wfile.write(b"ok\n") with http_server(InfiniteRedirectOnHeadHandler): app.builder.build_all() with open(app.outdir / 'output.json') as fp: content = json.load(fp) assert content == { "code": 0, "status": "working", "filename": "index.rst", "lineno": 1, "uri": "http://localhost:7777/", "info": "", }