mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Add testcase for encoding URIs in linkcheck builder
This commit is contained in:
@@ -17,7 +17,7 @@ from os import path
|
||||
|
||||
from six.moves import queue
|
||||
from six.moves.urllib.request import build_opener, Request, HTTPRedirectHandler
|
||||
from six.moves.urllib.parse import unquote, urlsplit, quote
|
||||
from six.moves.urllib.parse import unquote
|
||||
from six.moves.urllib.error import HTTPError
|
||||
from six.moves.html_parser import HTMLParser
|
||||
from docutils import nodes
|
||||
@@ -33,6 +33,7 @@ except ImportError:
|
||||
pass
|
||||
|
||||
from sphinx.builders import Builder
|
||||
from sphinx.util import encode_uri
|
||||
from sphinx.util.console import purple, red, darkgreen, darkgray, \
|
||||
darkred, turquoise
|
||||
from sphinx.util.pycompat import TextIOWrapper
|
||||
@@ -153,15 +154,7 @@ class CheckExternalLinksBuilder(Builder):
|
||||
try:
|
||||
req_url.encode('ascii')
|
||||
except UnicodeError:
|
||||
split = urlsplit(req_url)
|
||||
req_url = (split[0].encode() + '://' + # scheme
|
||||
split[1].encode('idna') + # netloc
|
||||
quote(split[2].encode('utf-8'))) # path
|
||||
if split[3]: # query
|
||||
req_url += '?' + quote(split[3].encode('utf-8'))
|
||||
# go back to Unicode strings which is required by Python 3
|
||||
# (but now all parts are pure ascii)
|
||||
req_url = req_url.decode('ascii')
|
||||
req_url = encode_uri(req_url)
|
||||
|
||||
# need to actually check the URI
|
||||
try:
|
||||
|
||||
@@ -23,6 +23,7 @@ from collections import deque
|
||||
|
||||
from six import iteritems, text_type, binary_type
|
||||
from six.moves import range
|
||||
from six.moves.urllib.parse import urlsplit, quote
|
||||
import docutils
|
||||
from docutils.utils import relative_path
|
||||
|
||||
@@ -523,3 +524,15 @@ def import_object(objname, source=None):
|
||||
raise ExtensionError('Could not find %s' % objname +
|
||||
(source and ' (needed for %s)' % source or ''),
|
||||
err)
|
||||
|
||||
|
||||
def encode_uri(uri):
|
||||
split = urlsplit(uri)
|
||||
req_url = (split[0].encode() + '://' + # scheme
|
||||
split[1].encode('idna') + # netloc
|
||||
quote(split[2].encode('utf-8'))) # path
|
||||
if split[3]: # query
|
||||
req_url += '?' + quote(split[3].encode('utf-8'))
|
||||
# go back to Unicode strings which is required by Python 3
|
||||
# (but now all parts are pure ascii)
|
||||
return req_url.decode('ascii')
|
||||
|
||||
19
tests/test_util.py
Normal file
19
tests/test_util.py
Normal file
@@ -0,0 +1,19 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
test_util
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
Tests util functions.
|
||||
|
||||
:copyright: Copyright 2007-2015 by the Sphinx team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
from sphinx.util import encode_uri
|
||||
|
||||
|
||||
def test_encode_uri():
|
||||
expected = (u'https://ru.wikipedia.org/wiki/%D0%A1%D0%B8%D1%81%D1%82%D0%B5%D0%BC%D0%B0_'
|
||||
u'%D1%83%D0%BF%D1%80%D0%B0%D0%B2%D0%BB%D0%B5%D0%BD%D0%B8%D1%8F_'
|
||||
u'%D0%B1%D0%B0%D0%B7%D0%B0%D0%BC%D0%B8_%D0%B4%D0%B0%D0%BD%D0%BD%D1%8B%D1%85')
|
||||
uri = 'https://ru.wikipedia.org/wiki/Система_управления_базами_данных'
|
||||
assert expected, encode_uri(uri)
|
||||
Reference in New Issue
Block a user