Merge pull request #7103 from westurner/linkcheck_json_output

ENH: linkcheck: also write all links to output.json
This commit is contained in:
Takeshi KOMIYA 2020-03-01 17:43:03 +09:00 committed by GitHub
commit 64d51a17aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 88 additions and 10 deletions

View File

@ -8,6 +8,7 @@
:license: BSD, see LICENSE for details.
"""
import json
import queue
import re
import socket
@ -90,6 +91,8 @@ class CheckExternalLinksBuilder(Builder):
socket.setdefaulttimeout(5.0)
# create output file
open(path.join(self.outdir, 'output.txt'), 'w').close()
# create JSON output file
open(path.join(self.outdir, 'output.json'), 'w').close()
# create queues and worker threads
self.wqueue = queue.Queue() # type: queue.Queue
@ -225,9 +228,16 @@ class CheckExternalLinksBuilder(Builder):
def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
uri, docname, lineno, status, info, code = result
filename = self.env.doc2path(docname, None)
linkstat = dict(filename=filename, lineno=lineno,
status=status, code=code, uri=uri,
info=info)
if status == 'unchecked':
self.write_linkstat(linkstat)
return
if status == 'working' and info == 'old':
self.write_linkstat(linkstat)
return
if lineno:
logger.info('(line %4d) ', lineno, nonl=True)
@ -236,18 +246,22 @@ class CheckExternalLinksBuilder(Builder):
logger.info(darkgray('-ignored- ') + uri + ': ' + info)
else:
logger.info(darkgray('-ignored- ') + uri)
self.write_linkstat(linkstat)
elif status == 'local':
logger.info(darkgray('-local- ') + uri)
self.write_entry('local', docname, lineno, uri)
self.write_entry('local', docname, filename, lineno, uri)
self.write_linkstat(linkstat)
elif status == 'working':
logger.info(darkgreen('ok ') + uri + info)
self.write_linkstat(linkstat)
elif status == 'broken':
self.write_entry('broken', docname, lineno, uri + ': ' + info)
if self.app.quiet or self.app.warningiserror:
logger.warning(__('broken link: %s (%s)'), uri, info,
location=(self.env.doc2path(docname), lineno))
location=(filename, lineno))
else:
logger.info(red('broken ') + uri + red(' - ' + info))
self.write_entry('broken', docname, filename, lineno, uri + ': ' + info)
self.write_linkstat(linkstat)
elif status == 'redirected':
try:
text, color = {
@ -259,9 +273,11 @@ class CheckExternalLinksBuilder(Builder):
}[code]
except KeyError:
text, color = ('with unknown code', purple)
self.write_entry('redirected ' + text, docname, lineno,
uri + ' to ' + info)
linkstat['text'] = text
logger.info(color('redirect ') + uri + color(' - ' + text + ' to ' + info))
self.write_entry('redirected ' + text, docname, filename,
lineno, uri + ' to ' + info)
self.write_linkstat(linkstat)
def get_target_uri(self, docname: str, typ: str = None) -> str:
return ''
@ -301,10 +317,15 @@ class CheckExternalLinksBuilder(Builder):
if self.broken:
self.app.statuscode = 1
def write_entry(self, what: str, docname: str, line: int, uri: str) -> None:
with open(path.join(self.outdir, 'output.txt'), 'a', encoding='utf-8') as output:
output.write("%s:%s: [%s] %s\n" % (self.env.doc2path(docname, None),
line, what, uri))
def write_entry(self, what: str, docname: str, filename: str, line: int,
uri: str) -> None:
with open(path.join(self.outdir, 'output.txt'), 'a') as output:
output.write("%s:%s: [%s] %s\n" % (filename, line, what, uri))
def write_linkstat(self, data: dict) -> None:
with open(path.join(self.outdir, 'output.json'), 'a') as output:
output.write(json.dumps(data))
output.write('\n')
def finish(self) -> None:
for worker in self.workers:

View File

@ -8,6 +8,8 @@
:license: BSD, see LICENSE for details.
"""
import json
import re
from unittest import mock
import pytest
@ -20,7 +22,7 @@ def test_defaults(app, status, warning):
content = (app.outdir / 'output.txt').read_text()
print(content)
# looking for '#top' and 'does-not-exist' not found should fail
# looking for '#top' and '#does-not-exist' not found should fail
assert "Anchor 'top' not found" in content
assert "Anchor 'does-not-exist' not found" in content
# looking for non-existent URL should fail
@ -31,6 +33,61 @@ def test_defaults(app, status, warning):
assert len(content.splitlines()) == 5
@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
def test_defaults_json(app, status, warning):
app.builder.build_all()
assert (app.outdir / 'output.json').exists()
content = (app.outdir / 'output.json').read_text()
print(content)
rows = [json.loads(x) for x in content.splitlines()]
row = rows[0]
for attr in ["filename", "lineno", "status", "code", "uri",
"info"]:
assert attr in row
assert len(content.splitlines()) == 8
assert len(rows) == 8
# the output order of the rows is not stable
# due to possible variance in network latency
rowsby = {row["uri"]:row for row in rows}
assert rowsby["https://www.google.com#!bar"] == {
'filename': 'links.txt',
'lineno': 10,
'status': 'working',
'code': 0,
'uri': 'https://www.google.com#!bar',
'info': ''
}
# looking for non-existent URL should fail
dnerow = rowsby['https://localhost:7777/doesnotexist']
assert dnerow['filename'] == 'links.txt'
assert dnerow['lineno'] == 13
assert dnerow['status'] == 'broken'
assert dnerow['code'] == 0
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
#assert dnerow['info'] == "HTTPSConnectionPool(host='localhost', port=7777): Max retries exceeded with url: /doesnotexist (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x7f447b3b9710>: Failed to establish a new connection: [Errno 111] Connection refused'))"
#assert "[Errno 111] Connection refused" in dnerow['info']
#assert "[WinError 10061] No connection could be made because the target machine actively refused it'" in dnerow['info']
assert rowsby['https://www.google.com/image2.png'] == {
'filename': 'links.txt',
'lineno': 16,
'status': 'broken',
'code': 0,
'uri': 'https://www.google.com/image2.png',
'info': '404 Client Error: Not Found for url: https://www.google.com/image2.png'
}
# looking for '#top' and '#does-not-exist' not found should fail
assert "Anchor 'top' not found" == \
rowsby["https://www.google.com/#top"]["info"]
assert "Anchor 'does-not-exist' not found" == \
rowsby["http://www.sphinx-doc.org/en/1.7/intro.html#does-not-exist"]["info"]
# images should fail
assert "Not Found for url: https://www.google.com/image.png" in \
rowsby["https://www.google.com/image.png"]["info"]
@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck', freshenv=True,
confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"],