From 9669f2a48b3c91ba4fa9398a0a607a5f75f0b5ae Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Sat, 19 Dec 2015 22:58:53 +0900 Subject: [PATCH] Fix #2171: cannot linkcheck url with unicode --- CHANGES | 1 + sphinx/util/__init__.py | 18 ++++++++---------- tests/test_util.py | 8 +++++++- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/CHANGES b/CHANGES index cc132f465..ef7083505 100644 --- a/CHANGES +++ b/CHANGES @@ -12,6 +12,7 @@ Bugs fixed * #967: Fix SVG inheritance diagram is not hyperlinked (clickable) * #1237: Fix footnotes not working in definition list in LaTeX * #2168: Fix raw directive does not work for text writer +* #2171: Fix cannot linkcheck url with unicode Release 1.3.3 (released Dec 2, 2015) diff --git a/sphinx/util/__init__.py b/sphinx/util/__init__.py index 11af02f82..bd7f564f7 100644 --- a/sphinx/util/__init__.py +++ b/sphinx/util/__init__.py @@ -23,7 +23,7 @@ from collections import deque from six import iteritems, text_type, binary_type from six.moves import range -from six.moves.urllib.parse import urlsplit, quote +from six.moves.urllib.parse import urlsplit, urlunsplit, quote_plus, parse_qsl, urlencode import docutils from docutils.utils import relative_path @@ -527,12 +527,10 @@ def import_object(objname, source=None): def encode_uri(uri): - split = urlsplit(uri) - req_url = (split[0].encode() + '://' + # scheme - split[1].encode('idna') + # netloc - quote(split[2].encode('utf-8'))) # path - if split[3]: # query - req_url += '?' + quote(split[3].encode('utf-8')) - # go back to Unicode strings which is required by Python 3 - # (but now all parts are pure ascii) - return req_url.decode('ascii') + split = list(urlsplit(uri)) + split[1] = split[1].encode('idna').decode('ascii') + split[2] = quote_plus(split[2].encode('utf-8'), '/').decode('ascii') + query = list((q, quote_plus(v.encode('utf-8'))) + for (q, v) in parse_qsl(split[3])) + split[3] = urlencode(query).decode('ascii') + return urlunsplit(split) diff --git a/tests/test_util.py b/tests/test_util.py index 1e2eee5a0..3d30b6fe0 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -15,5 +15,11 @@ def test_encode_uri(): expected = (u'https://ru.wikipedia.org/wiki/%D0%A1%D0%B8%D1%81%D1%82%D0%B5%D0%BC%D0%B0_' u'%D1%83%D0%BF%D1%80%D0%B0%D0%B2%D0%BB%D0%B5%D0%BD%D0%B8%D1%8F_' u'%D0%B1%D0%B0%D0%B7%D0%B0%D0%BC%D0%B8_%D0%B4%D0%B0%D0%BD%D0%BD%D1%8B%D1%85') - uri = 'https://ru.wikipedia.org/wiki/Система_управления_базами_данных' + uri = u'https://ru.wikipedia.org/wiki/Система_управления_базами_данных' + assert expected, encode_uri(uri) + + expected = (u'https://github.com/search?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+is%3A' + u'sprint-friendly+user%3Ajupyter&type=Issues&ref=searchresults') + uri = (u'https://github.com/search?utf8=✓&q=is%3Aissue+is%3Aopen+is%3A' + u'sprint-friendly+user%3Ajupyter&type=Issues&ref=searchresults') assert expected, encode_uri(uri)