Add testcase for encoding URIs in linkcheck builder

This commit is contained in:
Takeshi KOMIYA
2015-12-19 22:49:14 +09:00
parent 209e5f29b1
commit 788c0ebffc
3 changed files with 35 additions and 10 deletions

View File

@@ -17,7 +17,7 @@ from os import path
from six.moves import queue
from six.moves.urllib.request import build_opener, Request, HTTPRedirectHandler
from six.moves.urllib.parse import unquote, urlsplit, quote
from six.moves.urllib.parse import unquote
from six.moves.urllib.error import HTTPError
from six.moves.html_parser import HTMLParser
from docutils import nodes
@@ -33,6 +33,7 @@ except ImportError:
pass
from sphinx.builders import Builder
from sphinx.util import encode_uri
from sphinx.util.console import purple, red, darkgreen, darkgray, \
darkred, turquoise
from sphinx.util.pycompat import TextIOWrapper
@@ -153,15 +154,7 @@ class CheckExternalLinksBuilder(Builder):
try:
req_url.encode('ascii')
except UnicodeError:
split = urlsplit(req_url)
req_url = (split[0].encode() + '://' + # scheme
split[1].encode('idna') + # netloc
quote(split[2].encode('utf-8'))) # path
if split[3]: # query
req_url += '?' + quote(split[3].encode('utf-8'))
# go back to Unicode strings which is required by Python 3
# (but now all parts are pure ascii)
req_url = req_url.decode('ascii')
req_url = encode_uri(req_url)
# need to actually check the URI
try:

View File

@@ -23,6 +23,7 @@ from collections import deque
from six import iteritems, text_type, binary_type
from six.moves import range
from six.moves.urllib.parse import urlsplit, quote
import docutils
from docutils.utils import relative_path
@@ -523,3 +524,15 @@ def import_object(objname, source=None):
raise ExtensionError('Could not find %s' % objname +
(source and ' (needed for %s)' % source or ''),
err)
def encode_uri(uri):
split = urlsplit(uri)
req_url = (split[0].encode() + '://' + # scheme
split[1].encode('idna') + # netloc
quote(split[2].encode('utf-8'))) # path
if split[3]: # query
req_url += '?' + quote(split[3].encode('utf-8'))
# go back to Unicode strings which is required by Python 3
# (but now all parts are pure ascii)
return req_url.decode('ascii')

19
tests/test_util.py Normal file
View File

@@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
"""
test_util
~~~~~~~~~~~~~~~
Tests util functions.
:copyright: Copyright 2007-2015 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from sphinx.util import encode_uri
def test_encode_uri():
expected = (u'https://ru.wikipedia.org/wiki/%D0%A1%D0%B8%D1%81%D1%82%D0%B5%D0%BC%D0%B0_'
u'%D1%83%D0%BF%D1%80%D0%B0%D0%B2%D0%BB%D0%B5%D0%BD%D0%B8%D1%8F_'
u'%D0%B1%D0%B0%D0%B7%D0%B0%D0%BC%D0%B8_%D0%B4%D0%B0%D0%BD%D0%BD%D1%8B%D1%85')
uri = 'https://ru.wikipedia.org/wiki/Система_управления_базами_данных'
assert expected, encode_uri(uri)