mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Move link checker to its own file. Use different user-agent to enable Wikipedia lookup.
This commit is contained in:
parent
f2d713c577
commit
e886f2c5ae
@ -5,7 +5,7 @@
|
||||
|
||||
Builder classes for different output formats.
|
||||
|
||||
:copyright: 2007-2008 by Georg Brandl, Thomas Lamb.
|
||||
:copyright: 2007-2008 by Georg Brandl.
|
||||
:license: BSD.
|
||||
"""
|
||||
|
||||
@ -13,11 +13,9 @@ import os
|
||||
import time
|
||||
import codecs
|
||||
import shutil
|
||||
import socket
|
||||
import cPickle as pickle
|
||||
from os import path
|
||||
from cgi import escape
|
||||
from urllib2 import urlopen, HTTPError
|
||||
|
||||
from docutils import nodes
|
||||
from docutils.io import StringOutput, FileOutput, DocTreeInput
|
||||
@ -891,108 +889,7 @@ class ChangesBuilder(Builder):
|
||||
pass
|
||||
|
||||
|
||||
class CheckExternalLinksBuilder(Builder):
|
||||
"""
|
||||
Checks for broken external links.
|
||||
"""
|
||||
name = 'linkcheck'
|
||||
|
||||
def init(self):
|
||||
self.good = set()
|
||||
self.broken = {}
|
||||
self.redirected = {}
|
||||
# set a timeout for non-responding servers
|
||||
socket.setdefaulttimeout(5.0)
|
||||
# create output file
|
||||
open(path.join(self.outdir, 'output.txt'), 'w').close()
|
||||
|
||||
def get_target_uri(self, docname, typ=None):
|
||||
return ''
|
||||
|
||||
def get_outdated_docs(self):
|
||||
return self.env.all_docs
|
||||
|
||||
def prepare_writing(self, docnames):
|
||||
return
|
||||
|
||||
def write_doc(self, docname, doctree):
|
||||
self.info()
|
||||
for node in doctree.traverse(nodes.reference):
|
||||
try:
|
||||
self.check(node, docname)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
def check(self, node, docname):
|
||||
uri = node['refuri']
|
||||
|
||||
if '#' in uri:
|
||||
uri = uri.split('#')[0]
|
||||
|
||||
if uri in self.good:
|
||||
return
|
||||
|
||||
if uri[0:5] == 'http:' or uri[0:6] == 'https:':
|
||||
self.info(uri, nonl=1)
|
||||
lineno = None
|
||||
while lineno is None and node:
|
||||
node = node.parent
|
||||
lineno = node.line
|
||||
|
||||
if uri in self.broken:
|
||||
(r, s) = self.broken[uri]
|
||||
elif uri in self.redirected:
|
||||
(r, s) = self.redirected[uri]
|
||||
else:
|
||||
(r, s) = self.resolve(uri)
|
||||
|
||||
if r == 0:
|
||||
self.info(' - ' + darkgreen('working'))
|
||||
self.good.add(uri)
|
||||
elif r == 2:
|
||||
self.info(' - ' + red('broken: ') + s)
|
||||
self.broken[uri] = (r, s)
|
||||
self.write_entry('broken', docname, lineno, uri + ': ' + s)
|
||||
else:
|
||||
self.info(' - ' + purple('redirected') + ' to ' + s)
|
||||
self.redirected[uri] = (r, s)
|
||||
self.write_entry('redirected', docname, lineno, uri + ' to ' + s)
|
||||
|
||||
elif len(uri) == 0 or uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
|
||||
return
|
||||
else:
|
||||
self.info(uri + ' - ' + red('malformed!'))
|
||||
self.write_entry('malformed', docname, lineno, uri)
|
||||
|
||||
return
|
||||
|
||||
def write_entry(self, what, docname, line, uri):
|
||||
output = open(path.join(self.outdir, 'output.txt'), 'a')
|
||||
output.write("%s:%s [%s] %s\n" % (self.env.doc2path(docname, None),
|
||||
line, what, uri))
|
||||
output.close()
|
||||
|
||||
def resolve(self, uri):
|
||||
try:
|
||||
f = urlopen(uri)
|
||||
f.close()
|
||||
except HTTPError, err:
|
||||
if err.code == 403 and uri.startswith('http://en.wikipedia.org/'):
|
||||
# Wikipedia blocks requests from urllib User-Agent
|
||||
return (0, 0)
|
||||
return (2, str(err))
|
||||
except Exception, err:
|
||||
return (2, str(err))
|
||||
if f.url.rstrip('/') == uri.rstrip('/'):
|
||||
return (0, 0)
|
||||
else:
|
||||
return (1, f.url)
|
||||
|
||||
def finish(self):
|
||||
return
|
||||
|
||||
|
||||
|
||||
from sphinx.linkcheck import CheckExternalLinksBuilder
|
||||
|
||||
builtin_builders = {
|
||||
'html': StandaloneHTMLBuilder,
|
||||
|
124
sphinx/linkcheck.py
Normal file
124
sphinx/linkcheck.py
Normal file
@ -0,0 +1,124 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
sphinx.linkcheck
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
The CheckExternalLinksBuilder class.
|
||||
|
||||
:copyright: 2008 by Georg Brandl, Thomas Lamb.
|
||||
:license: BSD.
|
||||
"""
|
||||
|
||||
import socket
|
||||
from os import path
|
||||
from urllib2 import build_opener, HTTPError
|
||||
|
||||
from docutils import nodes
|
||||
|
||||
from sphinx.builder import Builder
|
||||
from sphinx.util.console import bold, purple, red, darkgreen
|
||||
|
||||
# create an opener that will simulate a browser user-agent
|
||||
opener = build_opener()
|
||||
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
|
||||
|
||||
|
||||
class CheckExternalLinksBuilder(Builder):
|
||||
"""
|
||||
Checks for broken external links.
|
||||
"""
|
||||
name = 'linkcheck'
|
||||
|
||||
def init(self):
|
||||
self.good = set()
|
||||
self.broken = {}
|
||||
self.redirected = {}
|
||||
# set a timeout for non-responding servers
|
||||
socket.setdefaulttimeout(5.0)
|
||||
# create output file
|
||||
open(path.join(self.outdir, 'output.txt'), 'w').close()
|
||||
|
||||
def get_target_uri(self, docname, typ=None):
|
||||
return ''
|
||||
|
||||
def get_outdated_docs(self):
|
||||
return self.env.all_docs
|
||||
|
||||
def prepare_writing(self, docnames):
|
||||
return
|
||||
|
||||
def write_doc(self, docname, doctree):
|
||||
self.info()
|
||||
for node in doctree.traverse(nodes.reference):
|
||||
try:
|
||||
self.check(node, docname)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
def check(self, node, docname):
|
||||
uri = node['refuri']
|
||||
|
||||
if '#' in uri:
|
||||
uri = uri.split('#')[0]
|
||||
|
||||
if uri in self.good:
|
||||
return
|
||||
|
||||
if uri[0:5] == 'http:' or uri[0:6] == 'https:':
|
||||
self.info(uri, nonl=1)
|
||||
lineno = None
|
||||
while lineno is None and node:
|
||||
node = node.parent
|
||||
lineno = node.line
|
||||
|
||||
if uri in self.broken:
|
||||
(r, s) = self.broken[uri]
|
||||
elif uri in self.redirected:
|
||||
(r, s) = self.redirected[uri]
|
||||
else:
|
||||
(r, s) = self.resolve(uri)
|
||||
|
||||
if r == 0:
|
||||
self.info(' - ' + darkgreen('working'))
|
||||
self.good.add(uri)
|
||||
elif r == 2:
|
||||
self.info(' - ' + red('broken: ') + s)
|
||||
self.broken[uri] = (r, s)
|
||||
self.write_entry('broken', docname, lineno, uri + ': ' + s)
|
||||
else:
|
||||
self.info(' - ' + purple('redirected') + ' to ' + s)
|
||||
self.redirected[uri] = (r, s)
|
||||
self.write_entry('redirected', docname, lineno, uri + ' to ' + s)
|
||||
|
||||
elif len(uri) == 0 or uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
|
||||
return
|
||||
else:
|
||||
self.info(uri + ' - ' + red('malformed!'))
|
||||
self.write_entry('malformed', docname, lineno, uri)
|
||||
|
||||
return
|
||||
|
||||
def write_entry(self, what, docname, line, uri):
|
||||
output = open(path.join(self.outdir, 'output.txt'), 'a')
|
||||
output.write("%s:%s [%s] %s\n" % (self.env.doc2path(docname, None),
|
||||
line, what, uri))
|
||||
output.close()
|
||||
|
||||
def resolve(self, uri):
|
||||
try:
|
||||
f = opener.open(uri)
|
||||
f.close()
|
||||
except HTTPError, err:
|
||||
#if err.code == 403 and uri.startswith('http://en.wikipedia.org/'):
|
||||
# # Wikipedia blocks requests from urllib User-Agent
|
||||
# return (0, 0)
|
||||
return (2, str(err))
|
||||
except Exception, err:
|
||||
return (2, str(err))
|
||||
if f.url.rstrip('/') == uri.rstrip('/'):
|
||||
return (0, 0)
|
||||
else:
|
||||
return (1, f.url)
|
||||
|
||||
def finish(self):
|
||||
return
|
Loading…
Reference in New Issue
Block a user