2008-03-14 18:47:30 -05:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
2008-11-29 12:56:58 -06:00
|
|
|
sphinx.builders.linkcheck
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
2008-03-14 18:47:30 -05:00
|
|
|
|
|
|
|
The CheckExternalLinksBuilder class.
|
|
|
|
|
2010-01-01 07:09:13 -06:00
|
|
|
:copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
|
2008-12-27 05:19:17 -06:00
|
|
|
:license: BSD, see LICENSE for details.
|
2008-03-14 18:47:30 -05:00
|
|
|
"""
|
|
|
|
|
2010-10-22 04:40:38 -05:00
|
|
|
import re
|
2008-03-14 18:47:30 -05:00
|
|
|
import socket
|
|
|
|
from os import path
|
|
|
|
from urllib2 import build_opener, HTTPError
|
|
|
|
|
|
|
|
from docutils import nodes
|
|
|
|
|
2008-11-29 12:56:58 -06:00
|
|
|
from sphinx.builders import Builder
|
2010-07-11 04:00:07 -05:00
|
|
|
from sphinx.util.console import purple, red, darkgreen, darkgray
|
2008-03-14 18:47:30 -05:00
|
|
|
|
|
|
|
# create an opener that will simulate a browser user-agent
|
|
|
|
opener = build_opener()
|
|
|
|
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
|
|
|
|
|
|
|
|
|
|
|
|
class CheckExternalLinksBuilder(Builder):
|
|
|
|
"""
|
|
|
|
Checks for broken external links.
|
|
|
|
"""
|
|
|
|
name = 'linkcheck'
|
|
|
|
|
|
|
|
def init(self):
|
2010-10-22 04:40:38 -05:00
|
|
|
self.to_ignore = map(re.compile, self.app.config.linkcheck_ignore)
|
2008-03-14 18:47:30 -05:00
|
|
|
self.good = set()
|
|
|
|
self.broken = {}
|
|
|
|
self.redirected = {}
|
|
|
|
# set a timeout for non-responding servers
|
|
|
|
socket.setdefaulttimeout(5.0)
|
|
|
|
# create output file
|
|
|
|
open(path.join(self.outdir, 'output.txt'), 'w').close()
|
|
|
|
|
|
|
|
def get_target_uri(self, docname, typ=None):
|
|
|
|
return ''
|
|
|
|
|
|
|
|
def get_outdated_docs(self):
|
2008-03-25 05:16:51 -05:00
|
|
|
return self.env.found_docs
|
2008-03-14 18:47:30 -05:00
|
|
|
|
|
|
|
def prepare_writing(self, docnames):
|
|
|
|
return
|
|
|
|
|
|
|
|
def write_doc(self, docname, doctree):
|
|
|
|
self.info()
|
|
|
|
for node in doctree.traverse(nodes.reference):
|
|
|
|
try:
|
|
|
|
self.check(node, docname)
|
|
|
|
except KeyError:
|
|
|
|
continue
|
|
|
|
|
|
|
|
def check(self, node, docname):
|
|
|
|
uri = node['refuri']
|
|
|
|
|
|
|
|
if '#' in uri:
|
|
|
|
uri = uri.split('#')[0]
|
|
|
|
|
|
|
|
if uri in self.good:
|
|
|
|
return
|
|
|
|
|
Merged revisions 64703-64708,64710-64712 via svnmerge from
svn+ssh://pythondev@svn.python.org/doctools/branches/0.4.x
........
r64703 | georg.brandl | 2008-07-04 19:24:00 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix linkcheck builder crash for malformed URLs.
........
r64704 | georg.brandl | 2008-07-04 19:32:40 +0200 (Fri, 04 Jul 2008) | 2 lines
Add 0.5 compatibility for more admonitions.
........
r64705 | georg.brandl | 2008-07-04 19:38:37 +0200 (Fri, 04 Jul 2008) | 2 lines
Remove silly "rubric" restriction in latex writer.
........
r64706 | georg.brandl | 2008-07-04 19:41:44 +0200 (Fri, 04 Jul 2008) | 2 lines
Document rubric:: Footnotes behavior.
........
r64707 | georg.brandl | 2008-07-04 19:45:28 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix XHTML.
........
r64708 | georg.brandl | 2008-07-04 19:50:56 +0200 (Fri, 04 Jul 2008) | 2 lines
More XHTML fixes.
........
r64710 | georg.brandl | 2008-07-04 19:59:56 +0200 (Fri, 04 Jul 2008) | 2 lines
Copy the html_logo to the output static dir.
........
r64711 | georg.brandl | 2008-07-04 20:37:43 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix latex code for module names with underscores that have platforms.
........
r64712 | georg.brandl | 2008-07-04 20:46:40 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix a crash with nonlocal image URIs.
........
2008-07-04 13:49:39 -05:00
|
|
|
lineno = None
|
2010-04-18 02:55:38 -05:00
|
|
|
while lineno is None:
|
Merged revisions 64703-64708,64710-64712 via svnmerge from
svn+ssh://pythondev@svn.python.org/doctools/branches/0.4.x
........
r64703 | georg.brandl | 2008-07-04 19:24:00 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix linkcheck builder crash for malformed URLs.
........
r64704 | georg.brandl | 2008-07-04 19:32:40 +0200 (Fri, 04 Jul 2008) | 2 lines
Add 0.5 compatibility for more admonitions.
........
r64705 | georg.brandl | 2008-07-04 19:38:37 +0200 (Fri, 04 Jul 2008) | 2 lines
Remove silly "rubric" restriction in latex writer.
........
r64706 | georg.brandl | 2008-07-04 19:41:44 +0200 (Fri, 04 Jul 2008) | 2 lines
Document rubric:: Footnotes behavior.
........
r64707 | georg.brandl | 2008-07-04 19:45:28 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix XHTML.
........
r64708 | georg.brandl | 2008-07-04 19:50:56 +0200 (Fri, 04 Jul 2008) | 2 lines
More XHTML fixes.
........
r64710 | georg.brandl | 2008-07-04 19:59:56 +0200 (Fri, 04 Jul 2008) | 2 lines
Copy the html_logo to the output static dir.
........
r64711 | georg.brandl | 2008-07-04 20:37:43 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix latex code for module names with underscores that have platforms.
........
r64712 | georg.brandl | 2008-07-04 20:46:40 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix a crash with nonlocal image URIs.
........
2008-07-04 13:49:39 -05:00
|
|
|
node = node.parent
|
2010-04-18 02:55:38 -05:00
|
|
|
if node is None:
|
|
|
|
break
|
Merged revisions 64703-64708,64710-64712 via svnmerge from
svn+ssh://pythondev@svn.python.org/doctools/branches/0.4.x
........
r64703 | georg.brandl | 2008-07-04 19:24:00 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix linkcheck builder crash for malformed URLs.
........
r64704 | georg.brandl | 2008-07-04 19:32:40 +0200 (Fri, 04 Jul 2008) | 2 lines
Add 0.5 compatibility for more admonitions.
........
r64705 | georg.brandl | 2008-07-04 19:38:37 +0200 (Fri, 04 Jul 2008) | 2 lines
Remove silly "rubric" restriction in latex writer.
........
r64706 | georg.brandl | 2008-07-04 19:41:44 +0200 (Fri, 04 Jul 2008) | 2 lines
Document rubric:: Footnotes behavior.
........
r64707 | georg.brandl | 2008-07-04 19:45:28 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix XHTML.
........
r64708 | georg.brandl | 2008-07-04 19:50:56 +0200 (Fri, 04 Jul 2008) | 2 lines
More XHTML fixes.
........
r64710 | georg.brandl | 2008-07-04 19:59:56 +0200 (Fri, 04 Jul 2008) | 2 lines
Copy the html_logo to the output static dir.
........
r64711 | georg.brandl | 2008-07-04 20:37:43 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix latex code for module names with underscores that have platforms.
........
r64712 | georg.brandl | 2008-07-04 20:46:40 +0200 (Fri, 04 Jul 2008) | 2 lines
Fix a crash with nonlocal image URIs.
........
2008-07-04 13:49:39 -05:00
|
|
|
lineno = node.line
|
|
|
|
|
2010-07-11 04:00:07 -05:00
|
|
|
if len(uri) == 0 or uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
|
|
|
|
return
|
|
|
|
|
|
|
|
if lineno:
|
|
|
|
self.info('(line %3d) ' % lineno, nonl=1)
|
2010-10-22 04:40:38 -05:00
|
|
|
for rex in self.to_ignore:
|
|
|
|
if rex.match(uri):
|
|
|
|
self.info(uri + ' - ' + darkgray('ignored'))
|
|
|
|
return
|
2008-03-14 18:47:30 -05:00
|
|
|
if uri[0:5] == 'http:' or uri[0:6] == 'https:':
|
|
|
|
self.info(uri, nonl=1)
|
|
|
|
|
|
|
|
if uri in self.broken:
|
|
|
|
(r, s) = self.broken[uri]
|
|
|
|
elif uri in self.redirected:
|
|
|
|
(r, s) = self.redirected[uri]
|
|
|
|
else:
|
|
|
|
(r, s) = self.resolve(uri)
|
|
|
|
|
|
|
|
if r == 0:
|
|
|
|
self.info(' - ' + darkgreen('working'))
|
|
|
|
self.good.add(uri)
|
|
|
|
elif r == 2:
|
|
|
|
self.info(' - ' + red('broken: ') + s)
|
|
|
|
self.write_entry('broken', docname, lineno, uri + ': ' + s)
|
2008-11-08 09:28:01 -06:00
|
|
|
self.broken[uri] = (r, s)
|
|
|
|
if self.app.quiet:
|
2009-03-05 02:21:35 -06:00
|
|
|
self.warn('broken link: %s' % uri,
|
|
|
|
'%s:%s' % (self.env.doc2path(docname), lineno))
|
2008-03-14 18:47:30 -05:00
|
|
|
else:
|
|
|
|
self.info(' - ' + purple('redirected') + ' to ' + s)
|
2009-01-10 14:23:39 -06:00
|
|
|
self.write_entry('redirected', docname,
|
|
|
|
lineno, uri + ' to ' + s)
|
2008-11-08 09:28:01 -06:00
|
|
|
self.redirected[uri] = (r, s)
|
2008-03-14 18:47:30 -05:00
|
|
|
else:
|
2010-07-11 04:00:07 -05:00
|
|
|
self.info(uri + ' - ' + darkgray('local'))
|
|
|
|
self.write_entry('local', docname, lineno, uri)
|
2008-03-14 18:47:30 -05:00
|
|
|
|
2008-11-08 09:28:01 -06:00
|
|
|
if self.broken:
|
|
|
|
self.app.statuscode = 1
|
2008-03-14 18:47:30 -05:00
|
|
|
|
|
|
|
def write_entry(self, what, docname, line, uri):
|
|
|
|
output = open(path.join(self.outdir, 'output.txt'), 'a')
|
2008-03-15 03:02:19 -05:00
|
|
|
output.write("%s:%s: [%s] %s\n" % (self.env.doc2path(docname, None),
|
|
|
|
line, what, uri))
|
2008-03-14 18:47:30 -05:00
|
|
|
output.close()
|
|
|
|
|
|
|
|
def resolve(self, uri):
|
|
|
|
try:
|
|
|
|
f = opener.open(uri)
|
|
|
|
f.close()
|
|
|
|
except HTTPError, err:
|
|
|
|
#if err.code == 403 and uri.startswith('http://en.wikipedia.org/'):
|
|
|
|
# # Wikipedia blocks requests from urllib User-Agent
|
|
|
|
# return (0, 0)
|
|
|
|
return (2, str(err))
|
|
|
|
except Exception, err:
|
|
|
|
return (2, str(err))
|
|
|
|
if f.url.rstrip('/') == uri.rstrip('/'):
|
|
|
|
return (0, 0)
|
|
|
|
else:
|
|
|
|
return (1, f.url)
|
|
|
|
|
|
|
|
def finish(self):
|
|
|
|
return
|