Fix #2171: cannot linkcheck url with unicode

This commit is contained in:
Takeshi KOMIYA 2015-12-19 22:58:53 +09:00
parent 788c0ebffc
commit 9669f2a48b
3 changed files with 16 additions and 11 deletions

View File

@ -12,6 +12,7 @@ Bugs fixed
* #967: Fix SVG inheritance diagram is not hyperlinked (clickable)
* #1237: Fix footnotes not working in definition list in LaTeX
* #2168: Fix raw directive does not work for text writer
* #2171: Fix cannot linkcheck url with unicode
Release 1.3.3 (released Dec 2, 2015)

View File

@ -23,7 +23,7 @@ from collections import deque
from six import iteritems, text_type, binary_type
from six.moves import range
from six.moves.urllib.parse import urlsplit, quote
from six.moves.urllib.parse import urlsplit, urlunsplit, quote_plus, parse_qsl, urlencode
import docutils
from docutils.utils import relative_path
@ -527,12 +527,10 @@ def import_object(objname, source=None):
def encode_uri(uri):
split = urlsplit(uri)
req_url = (split[0].encode() + '://' + # scheme
split[1].encode('idna') + # netloc
quote(split[2].encode('utf-8'))) # path
if split[3]: # query
req_url += '?' + quote(split[3].encode('utf-8'))
# go back to Unicode strings which is required by Python 3
# (but now all parts are pure ascii)
return req_url.decode('ascii')
split = list(urlsplit(uri))
split[1] = split[1].encode('idna').decode('ascii')
split[2] = quote_plus(split[2].encode('utf-8'), '/').decode('ascii')
query = list((q, quote_plus(v.encode('utf-8')))
for (q, v) in parse_qsl(split[3]))
split[3] = urlencode(query).decode('ascii')
return urlunsplit(split)

View File

@ -15,5 +15,11 @@ def test_encode_uri():
expected = (u'https://ru.wikipedia.org/wiki/%D0%A1%D0%B8%D1%81%D1%82%D0%B5%D0%BC%D0%B0_'
u'%D1%83%D0%BF%D1%80%D0%B0%D0%B2%D0%BB%D0%B5%D0%BD%D0%B8%D1%8F_'
u'%D0%B1%D0%B0%D0%B7%D0%B0%D0%BC%D0%B8_%D0%B4%D0%B0%D0%BD%D0%BD%D1%8B%D1%85')
uri = 'https://ru.wikipedia.org/wiki/Система_управления_базами_данных'
uri = u'https://ru.wikipedia.org/wiki/Система_управления_базами_данных'
assert expected, encode_uri(uri)
expected = (u'https://github.com/search?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+is%3A'
u'sprint-friendly+user%3Ajupyter&type=Issues&ref=searchresults')
uri = (u'https://github.com/search?utf8=✓&q=is%3Aissue+is%3Aopen+is%3A'
u'sprint-friendly+user%3Ajupyter&type=Issues&ref=searchresults')
assert expected, encode_uri(uri)