mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Merge pull request #7103 from westurner/linkcheck_json_output
ENH: linkcheck: also write all links to output.json
This commit is contained in:
commit
64d51a17aa
@ -8,6 +8,7 @@
|
|||||||
:license: BSD, see LICENSE for details.
|
:license: BSD, see LICENSE for details.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
import queue
|
import queue
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
@ -90,6 +91,8 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
socket.setdefaulttimeout(5.0)
|
socket.setdefaulttimeout(5.0)
|
||||||
# create output file
|
# create output file
|
||||||
open(path.join(self.outdir, 'output.txt'), 'w').close()
|
open(path.join(self.outdir, 'output.txt'), 'w').close()
|
||||||
|
# create JSON output file
|
||||||
|
open(path.join(self.outdir, 'output.json'), 'w').close()
|
||||||
|
|
||||||
# create queues and worker threads
|
# create queues and worker threads
|
||||||
self.wqueue = queue.Queue() # type: queue.Queue
|
self.wqueue = queue.Queue() # type: queue.Queue
|
||||||
@ -225,9 +228,16 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
|
|
||||||
def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
|
def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
|
||||||
uri, docname, lineno, status, info, code = result
|
uri, docname, lineno, status, info, code = result
|
||||||
|
|
||||||
|
filename = self.env.doc2path(docname, None)
|
||||||
|
linkstat = dict(filename=filename, lineno=lineno,
|
||||||
|
status=status, code=code, uri=uri,
|
||||||
|
info=info)
|
||||||
if status == 'unchecked':
|
if status == 'unchecked':
|
||||||
|
self.write_linkstat(linkstat)
|
||||||
return
|
return
|
||||||
if status == 'working' and info == 'old':
|
if status == 'working' and info == 'old':
|
||||||
|
self.write_linkstat(linkstat)
|
||||||
return
|
return
|
||||||
if lineno:
|
if lineno:
|
||||||
logger.info('(line %4d) ', lineno, nonl=True)
|
logger.info('(line %4d) ', lineno, nonl=True)
|
||||||
@ -236,18 +246,22 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
logger.info(darkgray('-ignored- ') + uri + ': ' + info)
|
logger.info(darkgray('-ignored- ') + uri + ': ' + info)
|
||||||
else:
|
else:
|
||||||
logger.info(darkgray('-ignored- ') + uri)
|
logger.info(darkgray('-ignored- ') + uri)
|
||||||
|
self.write_linkstat(linkstat)
|
||||||
elif status == 'local':
|
elif status == 'local':
|
||||||
logger.info(darkgray('-local- ') + uri)
|
logger.info(darkgray('-local- ') + uri)
|
||||||
self.write_entry('local', docname, lineno, uri)
|
self.write_entry('local', docname, filename, lineno, uri)
|
||||||
|
self.write_linkstat(linkstat)
|
||||||
elif status == 'working':
|
elif status == 'working':
|
||||||
logger.info(darkgreen('ok ') + uri + info)
|
logger.info(darkgreen('ok ') + uri + info)
|
||||||
|
self.write_linkstat(linkstat)
|
||||||
elif status == 'broken':
|
elif status == 'broken':
|
||||||
self.write_entry('broken', docname, lineno, uri + ': ' + info)
|
|
||||||
if self.app.quiet or self.app.warningiserror:
|
if self.app.quiet or self.app.warningiserror:
|
||||||
logger.warning(__('broken link: %s (%s)'), uri, info,
|
logger.warning(__('broken link: %s (%s)'), uri, info,
|
||||||
location=(self.env.doc2path(docname), lineno))
|
location=(filename, lineno))
|
||||||
else:
|
else:
|
||||||
logger.info(red('broken ') + uri + red(' - ' + info))
|
logger.info(red('broken ') + uri + red(' - ' + info))
|
||||||
|
self.write_entry('broken', docname, filename, lineno, uri + ': ' + info)
|
||||||
|
self.write_linkstat(linkstat)
|
||||||
elif status == 'redirected':
|
elif status == 'redirected':
|
||||||
try:
|
try:
|
||||||
text, color = {
|
text, color = {
|
||||||
@ -259,9 +273,11 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
}[code]
|
}[code]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
text, color = ('with unknown code', purple)
|
text, color = ('with unknown code', purple)
|
||||||
self.write_entry('redirected ' + text, docname, lineno,
|
linkstat['text'] = text
|
||||||
uri + ' to ' + info)
|
|
||||||
logger.info(color('redirect ') + uri + color(' - ' + text + ' to ' + info))
|
logger.info(color('redirect ') + uri + color(' - ' + text + ' to ' + info))
|
||||||
|
self.write_entry('redirected ' + text, docname, filename,
|
||||||
|
lineno, uri + ' to ' + info)
|
||||||
|
self.write_linkstat(linkstat)
|
||||||
|
|
||||||
def get_target_uri(self, docname: str, typ: str = None) -> str:
|
def get_target_uri(self, docname: str, typ: str = None) -> str:
|
||||||
return ''
|
return ''
|
||||||
@ -301,10 +317,15 @@ class CheckExternalLinksBuilder(Builder):
|
|||||||
if self.broken:
|
if self.broken:
|
||||||
self.app.statuscode = 1
|
self.app.statuscode = 1
|
||||||
|
|
||||||
def write_entry(self, what: str, docname: str, line: int, uri: str) -> None:
|
def write_entry(self, what: str, docname: str, filename: str, line: int,
|
||||||
with open(path.join(self.outdir, 'output.txt'), 'a', encoding='utf-8') as output:
|
uri: str) -> None:
|
||||||
output.write("%s:%s: [%s] %s\n" % (self.env.doc2path(docname, None),
|
with open(path.join(self.outdir, 'output.txt'), 'a') as output:
|
||||||
line, what, uri))
|
output.write("%s:%s: [%s] %s\n" % (filename, line, what, uri))
|
||||||
|
|
||||||
|
def write_linkstat(self, data: dict) -> None:
|
||||||
|
with open(path.join(self.outdir, 'output.json'), 'a') as output:
|
||||||
|
output.write(json.dumps(data))
|
||||||
|
output.write('\n')
|
||||||
|
|
||||||
def finish(self) -> None:
|
def finish(self) -> None:
|
||||||
for worker in self.workers:
|
for worker in self.workers:
|
||||||
|
@ -8,6 +8,8 @@
|
|||||||
:license: BSD, see LICENSE for details.
|
:license: BSD, see LICENSE for details.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@ -20,7 +22,7 @@ def test_defaults(app, status, warning):
|
|||||||
content = (app.outdir / 'output.txt').read_text()
|
content = (app.outdir / 'output.txt').read_text()
|
||||||
|
|
||||||
print(content)
|
print(content)
|
||||||
# looking for '#top' and 'does-not-exist' not found should fail
|
# looking for '#top' and '#does-not-exist' not found should fail
|
||||||
assert "Anchor 'top' not found" in content
|
assert "Anchor 'top' not found" in content
|
||||||
assert "Anchor 'does-not-exist' not found" in content
|
assert "Anchor 'does-not-exist' not found" in content
|
||||||
# looking for non-existent URL should fail
|
# looking for non-existent URL should fail
|
||||||
@ -31,6 +33,61 @@ def test_defaults(app, status, warning):
|
|||||||
assert len(content.splitlines()) == 5
|
assert len(content.splitlines()) == 5
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
|
||||||
|
def test_defaults_json(app, status, warning):
|
||||||
|
app.builder.build_all()
|
||||||
|
|
||||||
|
assert (app.outdir / 'output.json').exists()
|
||||||
|
content = (app.outdir / 'output.json').read_text()
|
||||||
|
print(content)
|
||||||
|
|
||||||
|
rows = [json.loads(x) for x in content.splitlines()]
|
||||||
|
row = rows[0]
|
||||||
|
for attr in ["filename", "lineno", "status", "code", "uri",
|
||||||
|
"info"]:
|
||||||
|
assert attr in row
|
||||||
|
|
||||||
|
assert len(content.splitlines()) == 8
|
||||||
|
assert len(rows) == 8
|
||||||
|
# the output order of the rows is not stable
|
||||||
|
# due to possible variance in network latency
|
||||||
|
rowsby = {row["uri"]:row for row in rows}
|
||||||
|
assert rowsby["https://www.google.com#!bar"] == {
|
||||||
|
'filename': 'links.txt',
|
||||||
|
'lineno': 10,
|
||||||
|
'status': 'working',
|
||||||
|
'code': 0,
|
||||||
|
'uri': 'https://www.google.com#!bar',
|
||||||
|
'info': ''
|
||||||
|
}
|
||||||
|
# looking for non-existent URL should fail
|
||||||
|
dnerow = rowsby['https://localhost:7777/doesnotexist']
|
||||||
|
assert dnerow['filename'] == 'links.txt'
|
||||||
|
assert dnerow['lineno'] == 13
|
||||||
|
assert dnerow['status'] == 'broken'
|
||||||
|
assert dnerow['code'] == 0
|
||||||
|
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
|
||||||
|
#assert dnerow['info'] == "HTTPSConnectionPool(host='localhost', port=7777): Max retries exceeded with url: /doesnotexist (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x7f447b3b9710>: Failed to establish a new connection: [Errno 111] Connection refused'))"
|
||||||
|
#assert "[Errno 111] Connection refused" in dnerow['info']
|
||||||
|
#assert "[WinError 10061] No connection could be made because the target machine actively refused it'" in dnerow['info']
|
||||||
|
assert rowsby['https://www.google.com/image2.png'] == {
|
||||||
|
'filename': 'links.txt',
|
||||||
|
'lineno': 16,
|
||||||
|
'status': 'broken',
|
||||||
|
'code': 0,
|
||||||
|
'uri': 'https://www.google.com/image2.png',
|
||||||
|
'info': '404 Client Error: Not Found for url: https://www.google.com/image2.png'
|
||||||
|
}
|
||||||
|
# looking for '#top' and '#does-not-exist' not found should fail
|
||||||
|
assert "Anchor 'top' not found" == \
|
||||||
|
rowsby["https://www.google.com/#top"]["info"]
|
||||||
|
assert "Anchor 'does-not-exist' not found" == \
|
||||||
|
rowsby["http://www.sphinx-doc.org/en/1.7/intro.html#does-not-exist"]["info"]
|
||||||
|
# images should fail
|
||||||
|
assert "Not Found for url: https://www.google.com/image.png" in \
|
||||||
|
rowsby["https://www.google.com/image.png"]["info"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.sphinx(
|
@pytest.mark.sphinx(
|
||||||
'linkcheck', testroot='linkcheck', freshenv=True,
|
'linkcheck', testroot='linkcheck', freshenv=True,
|
||||||
confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"],
|
confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"],
|
||||||
|
Loading…
Reference in New Issue
Block a user