Merged in intgr/sphinx (pull request #45)

2025-02-25 18:55:22 -06:00 · 2012-03-10 18:07:16 +01:00 · 2012-03-10 18:07:16 +01:00 · 94f8dda758
commit 94f8dda758
parent 30b392d865 fd70920456
4 changed files with 81 additions and 17 deletions
--- a/doc/config.rst
+++ b/doc/config.rst
@ -1318,6 +1318,14 @@ Options for the linkcheck builder

   .. versionadded:: 1.1

+.. confval:: linkcheck_anchors
+
+   True or false, whether to check the existence of #anchor in links. Since
+   this requires downloading the whole document, it's considerably slower
+   when enabled. Default is ``True``.
+
+   .. versionadded:: 1.2
+

 .. rubric:: Footnotes

--- a/doc/rest.rst
+++ b/doc/rest.rst
@ -265,8 +265,9 @@ Docutils supports the following directives:

 * Admonitions: :dudir:`attention`, :dudir:`caution`, :dudir:`danger`,
  :dudir:`error`, :dudir:`hint`, :dudir:`important`, :dudir:`note`,
-  :dudir:`tip`, :dudir:`warning` and the generic :dudir:`admonition`.
-  (Most themes style only "note" and "warning" specially.)
+  :dudir:`tip`, :dudir:`warning` and the generic
+  :dudir:`admonition <admonitions>`.  (Most themes style only "note" and
+  "warning" specially.)

 * Images:

@ -285,7 +286,7 @@ Docutils supports the following directives:
  - :dudir:`epigraph` (a block quote with optional attribution line)
  - :dudir:`highlights`, :dudir:`pull-quote` (block quotes with their own
    class attribute)
-  - :dudir:`compound` (a compound paragraph)
+  - :dudir:`compound <compound-paragraph>` (a compound paragraph)

 * Special tables:

@ -295,7 +296,7 @@ Docutils supports the following directives:

 * Special directives:

-  - :dudir:`raw` (include raw target-format markup)
+  - :dudir:`raw <raw-data-pass-through>` (include raw target-format markup)
  - :dudir:`include` (include reStructuredText from another file)
    -- in Sphinx, when given an absolute include file path, this directive takes
    it as relative to the source directory
@ -304,7 +305,7 @@ Docutils supports the following directives:
 * HTML specifics:

  - :dudir:`meta` (generation of HTML ``<meta>`` tags)
-  - :dudir:`title` (override document title)
+  - :dudir:`title <metadata-document-title>` (override document title)

 * Influencing markup:

@ -472,9 +473,8 @@ There are some problems one commonly runs into while authoring reST documents:

 * **Separation of inline markup:** As said above, inline markup spans must be
  separated from the surrounding text by non-word characters, you have to use a
-  backslash-escaped space to get around that.  See `the reference
-  <http://docutils.sf.net/docs/ref/rst/restructuredtext.html#inline-markup>`_
-  for the details.
+  backslash-escaped space to get around that.  See
+  :duref:`the reference <substitution-definitions>` for the details.

 * **No nested inline markup:** Something like ``*see :func:`foo`*`` is not
  possible.
--- a/sphinx/builders/linkcheck.py
+++ b/sphinx/builders/linkcheck.py
@ -15,7 +15,8 @@ import Queue
 import socket
 import threading
 from os import path
-from urllib2 import build_opener, Request
+from urllib2 import build_opener, unquote, Request
+from HTMLParser import HTMLParser, HTMLParseError

 from docutils import nodes

@ -33,6 +34,42 @@ class HeadRequest(Request):
        return 'HEAD'


+class AnchorCheckParser(HTMLParser):
+    def __init__(self, search_anchor):
+        HTMLParser.__init__(self)
+
+        self.search_anchor = search_anchor
+        self.found = False
+
+    def handle_starttag(self, tag, attrs):
+        for key, value in attrs:
+            if key in ('id', 'name') and value == self.search_anchor:
+                self.found = True
+
+def check_anchor(f, hash):
+    """Reads HTML data from a filelike object 'f' searching for anchor 'hash'.
+
+    Returns True if anchor was found, False otherwise"""
+
+    parser = AnchorCheckParser(hash)
+
+    try:
+        # Read file in chunks of 8192 bytes. If we find a matching anchor, we
+        # break the loop early in hopes not to have to download the whole thing
+
+        chunk = f.read(8192)
+        while chunk and not parser.found:
+            parser.feed(chunk)
+            chunk = f.read(8192)
+
+        parser.close()
+    except HTMLParseError:
+        # HTMLParser is usually pretty good with sloppy HTML, but it tends to
+        # choke on EOF. But we're done then anyway.
+        pass
+
+    return parser.found
+
 class CheckExternalLinksBuilder(Builder):
    """
    Checks for broken external links.
@ -66,7 +103,7 @@ class CheckExternalLinksBuilder(Builder):

        def check():
            # check for various conditions without bothering the network
-            if len(uri) == 0 or uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
+            if len(uri) == 0 or uri[0] == '#' or uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
                return 'unchecked', ''
            elif not (uri[0:5] == 'http:' or uri[0:6] == 'https:'):
                return 'local', ''
@ -80,19 +117,39 @@ class CheckExternalLinksBuilder(Builder):
                if rex.match(uri):
                    return 'ignored', ''

+            if '#' in uri:
+                req_url, hash = uri.split('#', 1)
+            else:
+                req_url = uri
+                hash = None
+
            # need to actually check the URI
            try:
-                f = opener.open(HeadRequest(uri), **kwargs)
+                if hash and self.app.config.linkcheck_anchors:
+                    # Read the whole document and see if #hash exists
+                    f = opener.open(Request(req_url), **kwargs)
+                    found = check_anchor(f, unquote(hash))
                    f.close()
+
+                    if not found:
+                        raise Exception("Anchor '%s' not found" % hash)
+                else:
+                    f = opener.open(HeadRequest(req_url), **kwargs)
+                    f.close()
+
            except Exception, err:
                self.broken[uri] = str(err)
                return 'broken', str(err)
-            if f.url.rstrip('/') == uri.rstrip('/'):
+            if f.url.rstrip('/') == req_url.rstrip('/'):
                self.good.add(uri)
                return 'working', 'new'
            else:
-                self.redirected[uri] = f.url
-                return 'redirected', f.url
+                new_url = f.url
+                if hash:
+                    new_url += '#' + hash
+
+                self.redirected[uri] = new_url
+                return 'redirected', new_url

        while True:
            uri, docname, lineno = self.wqueue.get()
@ -142,8 +199,6 @@ class CheckExternalLinksBuilder(Builder):
            if 'refuri' not in node:
                continue
            uri = node['refuri']
-            if '#' in uri:
-                uri = uri.split('#')[0]
            lineno = None
            while lineno is None:
                node = node.parent
--- a/sphinx/config.py
+++ b/sphinx/config.py
@ -179,6 +179,7 @@ class Config(object):
        linkcheck_ignore = ([], None),
        linkcheck_timeout = (None, None),
        linkcheck_workers = (5, None),
+        linkcheck_anchors = (True, None),

        # gettext options
        gettext_compact = (True, 'gettext'),