diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 1b0dd4011..9fe689ec9 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -8,6 +8,7 @@ :license: BSD, see LICENSE for details. """ +import json import queue import re import socket @@ -90,6 +91,8 @@ class CheckExternalLinksBuilder(Builder): socket.setdefaulttimeout(5.0) # create output file open(path.join(self.outdir, 'output.txt'), 'w').close() + # create JSON output file + open(path.join(self.outdir, 'output.json'), 'w').close() # create queues and worker threads self.wqueue = queue.Queue() # type: queue.Queue @@ -225,9 +228,16 @@ class CheckExternalLinksBuilder(Builder): def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None: uri, docname, lineno, status, info, code = result + + filename = self.env.doc2path(docname, None) + linkstat = dict(filename=filename, lineno=lineno, + status=status, code=code, uri=uri, + info=info) if status == 'unchecked': + self.write_linkstat(linkstat) return if status == 'working' and info == 'old': + self.write_linkstat(linkstat) return if lineno: logger.info('(line %4d) ', lineno, nonl=True) @@ -236,18 +246,22 @@ class CheckExternalLinksBuilder(Builder): logger.info(darkgray('-ignored- ') + uri + ': ' + info) else: logger.info(darkgray('-ignored- ') + uri) + self.write_linkstat(linkstat) elif status == 'local': logger.info(darkgray('-local- ') + uri) - self.write_entry('local', docname, lineno, uri) + self.write_entry('local', docname, filename, lineno, uri) + self.write_linkstat(linkstat) elif status == 'working': logger.info(darkgreen('ok ') + uri + info) + self.write_linkstat(linkstat) elif status == 'broken': - self.write_entry('broken', docname, lineno, uri + ': ' + info) if self.app.quiet or self.app.warningiserror: logger.warning(__('broken link: %s (%s)'), uri, info, - location=(self.env.doc2path(docname), lineno)) + location=(filename, lineno)) else: logger.info(red('broken ') + uri + red(' - ' + info)) + self.write_entry('broken', docname, filename, lineno, uri + ': ' + info) + self.write_linkstat(linkstat) elif status == 'redirected': try: text, color = { @@ -259,9 +273,11 @@ class CheckExternalLinksBuilder(Builder): }[code] except KeyError: text, color = ('with unknown code', purple) - self.write_entry('redirected ' + text, docname, lineno, - uri + ' to ' + info) + linkstat['text'] = text logger.info(color('redirect ') + uri + color(' - ' + text + ' to ' + info)) + self.write_entry('redirected ' + text, docname, filename, + lineno, uri + ' to ' + info) + self.write_linkstat(linkstat) def get_target_uri(self, docname: str, typ: str = None) -> str: return '' @@ -301,10 +317,15 @@ class CheckExternalLinksBuilder(Builder): if self.broken: self.app.statuscode = 1 - def write_entry(self, what: str, docname: str, line: int, uri: str) -> None: - with open(path.join(self.outdir, 'output.txt'), 'a', encoding='utf-8') as output: - output.write("%s:%s: [%s] %s\n" % (self.env.doc2path(docname, None), - line, what, uri)) + def write_entry(self, what: str, docname: str, filename: str, line: int, + uri: str) -> None: + with open(path.join(self.outdir, 'output.txt'), 'a') as output: + output.write("%s:%s: [%s] %s\n" % (filename, line, what, uri)) + + def write_linkstat(self, data: dict) -> None: + with open(path.join(self.outdir, 'output.json'), 'a') as output: + output.write(json.dumps(data)) + output.write('\n') def finish(self) -> None: for worker in self.workers: diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 7a01fb590..5220b480e 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -8,6 +8,8 @@ :license: BSD, see LICENSE for details. """ +import json +import re from unittest import mock import pytest @@ -20,7 +22,7 @@ def test_defaults(app, status, warning): content = (app.outdir / 'output.txt').read_text() print(content) - # looking for '#top' and 'does-not-exist' not found should fail + # looking for '#top' and '#does-not-exist' not found should fail assert "Anchor 'top' not found" in content assert "Anchor 'does-not-exist' not found" in content # looking for non-existent URL should fail @@ -31,6 +33,61 @@ def test_defaults(app, status, warning): assert len(content.splitlines()) == 5 +@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True) +def test_defaults_json(app, status, warning): + app.builder.build_all() + + assert (app.outdir / 'output.json').exists() + content = (app.outdir / 'output.json').read_text() + print(content) + + rows = [json.loads(x) for x in content.splitlines()] + row = rows[0] + for attr in ["filename", "lineno", "status", "code", "uri", + "info"]: + assert attr in row + + assert len(content.splitlines()) == 8 + assert len(rows) == 8 + # the output order of the rows is not stable + # due to possible variance in network latency + rowsby = {row["uri"]:row for row in rows} + assert rowsby["https://www.google.com#!bar"] == { + 'filename': 'links.txt', + 'lineno': 10, + 'status': 'working', + 'code': 0, + 'uri': 'https://www.google.com#!bar', + 'info': '' + } + # looking for non-existent URL should fail + dnerow = rowsby['https://localhost:7777/doesnotexist'] + assert dnerow['filename'] == 'links.txt' + assert dnerow['lineno'] == 13 + assert dnerow['status'] == 'broken' + assert dnerow['code'] == 0 + assert dnerow['uri'] == 'https://localhost:7777/doesnotexist' + #assert dnerow['info'] == "HTTPSConnectionPool(host='localhost', port=7777): Max retries exceeded with url: /doesnotexist (Caused by NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused'))" + #assert "[Errno 111] Connection refused" in dnerow['info'] + #assert "[WinError 10061] No connection could be made because the target machine actively refused it'" in dnerow['info'] + assert rowsby['https://www.google.com/image2.png'] == { + 'filename': 'links.txt', + 'lineno': 16, + 'status': 'broken', + 'code': 0, + 'uri': 'https://www.google.com/image2.png', + 'info': '404 Client Error: Not Found for url: https://www.google.com/image2.png' + } + # looking for '#top' and '#does-not-exist' not found should fail + assert "Anchor 'top' not found" == \ + rowsby["https://www.google.com/#top"]["info"] + assert "Anchor 'does-not-exist' not found" == \ + rowsby["http://www.sphinx-doc.org/en/1.7/intro.html#does-not-exist"]["info"] + # images should fail + assert "Not Found for url: https://www.google.com/image.png" in \ + rowsby["https://www.google.com/image.png"]["info"] + + @pytest.mark.sphinx( 'linkcheck', testroot='linkcheck', freshenv=True, confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"],