mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Merge pull request #2884 from tk0miya/refactor_intersphinx
Refactor intersphinx: Use requests package
This commit is contained in:
commit
07e30b279b
@ -14,10 +14,7 @@ import socket
|
||||
import codecs
|
||||
import threading
|
||||
from os import path
|
||||
import warnings
|
||||
|
||||
import pkg_resources
|
||||
import requests
|
||||
from requests.exceptions import HTTPError
|
||||
from six.moves import queue
|
||||
from six.moves.urllib.parse import unquote
|
||||
@ -38,32 +35,7 @@ from sphinx.builders import Builder
|
||||
from sphinx.util import encode_uri
|
||||
from sphinx.util.console import purple, red, darkgreen, darkgray, \
|
||||
darkred, turquoise
|
||||
|
||||
try:
|
||||
pkg_resources.require(['requests[security]'])
|
||||
except pkg_resources.DistributionNotFound:
|
||||
import ssl
|
||||
if not getattr(ssl, 'HAS_SNI', False):
|
||||
# don't complain on each url processed about the SSL issue
|
||||
requests.packages.urllib3.disable_warnings(
|
||||
requests.packages.urllib3.exceptions.InsecurePlatformWarning)
|
||||
warnings.warn(
|
||||
'Some links may return broken results due to being unable to '
|
||||
'check the Server Name Indication (SNI) in the returned SSL cert '
|
||||
'against the hostname in the url requested. Recommended to '
|
||||
'install "requests[security]" as a dependency or upgrade to '
|
||||
'a python version with SNI support (Python 3 and Python 2.7.9+).'
|
||||
)
|
||||
except pkg_resources.UnknownExtra:
|
||||
warnings.warn(
|
||||
'Some links may return broken results due to being unable to '
|
||||
'check the Server Name Indication (SNI) in the returned SSL cert '
|
||||
'against the hostname in the url requested. Recommended to '
|
||||
'install requests-2.4.1+.'
|
||||
)
|
||||
|
||||
requests_user_agent = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:25.0) '
|
||||
'Gecko/20100101 Firefox/25.0')]
|
||||
from sphinx.util.requests import requests, useragent_header
|
||||
|
||||
|
||||
class AnchorCheckParser(HTMLParser):
|
||||
@ -118,7 +90,7 @@ class CheckExternalLinksBuilder(Builder):
|
||||
open(path.join(self.outdir, 'output.txt'), 'w').close()
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers = dict(requests_user_agent)
|
||||
self.session.headers = dict(useragent_header)
|
||||
|
||||
# create queues and worker threads
|
||||
self.wqueue = queue.Queue()
|
||||
|
@ -34,7 +34,6 @@ from os import path
|
||||
import re
|
||||
|
||||
from six import iteritems, string_types
|
||||
from six.moves.urllib import request
|
||||
from six.moves.urllib.parse import urlsplit, urlunsplit
|
||||
from docutils import nodes
|
||||
from docutils.utils import relative_path
|
||||
@ -42,17 +41,9 @@ from docutils.utils import relative_path
|
||||
import sphinx
|
||||
from sphinx.locale import _
|
||||
from sphinx.builders.html import INVENTORY_FILENAME
|
||||
from sphinx.util.requests import requests, useragent_header
|
||||
|
||||
|
||||
default_handlers = [request.ProxyHandler(), request.HTTPRedirectHandler(),
|
||||
request.HTTPHandler()]
|
||||
try:
|
||||
default_handlers.append(request.HTTPSHandler)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
default_opener = request.build_opener(*default_handlers)
|
||||
|
||||
UTF8StreamReader = codecs.lookup('utf-8')[2]
|
||||
|
||||
|
||||
@ -125,6 +116,14 @@ def read_inventory_v2(f, uri, join, bufsize=16*1024):
|
||||
return invdata
|
||||
|
||||
|
||||
def read_inventory(f, uri, join, bufsize=16*1024):
|
||||
line = f.readline().rstrip().decode('utf-8')
|
||||
if line == '# Sphinx inventory version 1':
|
||||
return read_inventory_v1(f, uri, join)
|
||||
elif line == '# Sphinx inventory version 2':
|
||||
return read_inventory_v2(f, uri, join, bufsize=bufsize)
|
||||
|
||||
|
||||
def _strip_basic_auth(url):
|
||||
"""Returns *url* with basic auth credentials removed. Also returns the
|
||||
basic auth username and password if they're present in *url*.
|
||||
@ -136,27 +135,14 @@ def _strip_basic_auth(url):
|
||||
:param url: url which may or may not contain basic auth credentials
|
||||
:type url: ``str``
|
||||
|
||||
:return: 3-``tuple`` of:
|
||||
|
||||
* (``str``) -- *url* with any basic auth creds removed
|
||||
* (``str`` or ``NoneType``) -- basic auth username or ``None`` if basic
|
||||
auth username not given
|
||||
* (``str`` or ``NoneType``) -- basic auth password or ``None`` if basic
|
||||
auth password not given
|
||||
|
||||
:rtype: ``tuple``
|
||||
:return: *url* with any basic auth creds removed
|
||||
:rtype: ``str``
|
||||
"""
|
||||
url_parts = urlsplit(url)
|
||||
username = url_parts.username
|
||||
password = url_parts.password
|
||||
frags = list(url_parts)
|
||||
frags = list(urlsplit(url))
|
||||
# swap out "user[:pass]@hostname" for "hostname"
|
||||
if url_parts.port:
|
||||
frags[1] = "%s:%s" % (url_parts.hostname, url_parts.port)
|
||||
else:
|
||||
frags[1] = url_parts.hostname
|
||||
url = urlunsplit(frags)
|
||||
return (url, username, password)
|
||||
if '@' in frags[1]:
|
||||
frags[1] = frags[1].split('@')[1]
|
||||
return urlunsplit(frags)
|
||||
|
||||
|
||||
def _read_from_url(url):
|
||||
@ -175,48 +161,35 @@ def _read_from_url(url):
|
||||
:return: data read from resource described by *url*
|
||||
:rtype: ``file``-like object
|
||||
"""
|
||||
url, username, password = _strip_basic_auth(url)
|
||||
if username is not None and password is not None:
|
||||
# case: url contains basic auth creds
|
||||
password_mgr = request.HTTPPasswordMgrWithDefaultRealm()
|
||||
password_mgr.add_password(None, url, username, password)
|
||||
handler = request.HTTPBasicAuthHandler(password_mgr)
|
||||
opener = request.build_opener(*(default_handlers + [handler]))
|
||||
else:
|
||||
opener = default_opener
|
||||
|
||||
return opener.open(url)
|
||||
r = requests.get(url, stream=True, headers=dict(useragent_header))
|
||||
r.raise_for_status()
|
||||
r.raw.url = r.url
|
||||
return r.raw
|
||||
|
||||
|
||||
def _get_safe_url(url):
|
||||
"""Gets version of *url* with basic auth passwords obscured. This function
|
||||
returns results suitable for printing and logging.
|
||||
|
||||
E.g.: https://user:12345@example.com => https://user:********@example.com
|
||||
|
||||
.. note::
|
||||
|
||||
The number of astrisks is invariant in the length of the basic auth
|
||||
password, so minimal information is leaked.
|
||||
E.g.: https://user:12345@example.com => https://user@example.com
|
||||
|
||||
:param url: a url
|
||||
:type url: ``str``
|
||||
|
||||
:return: *url* with password obscured
|
||||
:return: *url* with password removed
|
||||
:rtype: ``str``
|
||||
"""
|
||||
safe_url = url
|
||||
url, username, _ = _strip_basic_auth(url)
|
||||
if username is not None:
|
||||
# case: url contained basic auth creds; obscure password
|
||||
url_parts = urlsplit(url)
|
||||
safe_netloc = '{0}@{1}'.format(username, url_parts.hostname)
|
||||
# replace original netloc w/ obscured version
|
||||
frags = list(url_parts)
|
||||
frags[1] = safe_netloc
|
||||
safe_url = urlunsplit(frags)
|
||||
parts = urlsplit(url)
|
||||
if parts.username is None:
|
||||
return url
|
||||
else:
|
||||
frags = list(parts)
|
||||
if parts.port:
|
||||
frags[1] = '{0}@{1}:{2}'.format(parts.username, parts.hostname, parts.port)
|
||||
else:
|
||||
frags[1] = '{0}@{1}'.format(parts.username, parts.hostname)
|
||||
|
||||
return safe_url
|
||||
return urlunsplit(frags)
|
||||
|
||||
|
||||
def fetch_inventory(app, uri, inv):
|
||||
@ -226,8 +199,7 @@ def fetch_inventory(app, uri, inv):
|
||||
localuri = '://' not in uri
|
||||
if not localuri:
|
||||
# case: inv URI points to remote resource; strip any existing auth
|
||||
uri, _, _ = _strip_basic_auth(uri)
|
||||
join = localuri and path.join or posixpath.join
|
||||
uri = _strip_basic_auth(uri)
|
||||
try:
|
||||
if '://' in inv:
|
||||
f = _read_from_url(inv)
|
||||
@ -238,25 +210,19 @@ def fetch_inventory(app, uri, inv):
|
||||
'%s: %s' % (inv, err.__class__, err))
|
||||
return
|
||||
try:
|
||||
if hasattr(f, 'geturl'):
|
||||
newinv = f.geturl()
|
||||
if hasattr(f, 'url'):
|
||||
newinv = f.url
|
||||
if inv != newinv:
|
||||
app.info('intersphinx inventory has moved: %s -> %s' % (inv, newinv))
|
||||
|
||||
if uri in (inv, path.dirname(inv), path.dirname(inv) + '/'):
|
||||
uri = path.dirname(newinv)
|
||||
line = f.readline().rstrip().decode('utf-8')
|
||||
try:
|
||||
if line == '# Sphinx inventory version 1':
|
||||
invdata = read_inventory_v1(f, uri, join)
|
||||
elif line == '# Sphinx inventory version 2':
|
||||
invdata = read_inventory_v2(f, uri, join)
|
||||
else:
|
||||
raise ValueError
|
||||
f.close()
|
||||
except ValueError:
|
||||
f.close()
|
||||
raise ValueError('unknown or unsupported inventory version')
|
||||
with f:
|
||||
try:
|
||||
join = localuri and path.join or posixpath.join
|
||||
invdata = read_inventory(f, uri, join)
|
||||
except ValueError:
|
||||
raise ValueError('unknown or unsupported inventory version')
|
||||
except Exception as err:
|
||||
app.warn('intersphinx inventory %r not readable due to '
|
||||
'%s: %s' % (inv, err.__class__.__name__, err))
|
||||
|
43
sphinx/util/requests.py
Normal file
43
sphinx/util/requests.py
Normal file
@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
sphinx.util.requests
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Simple requests package loader
|
||||
|
||||
:copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import requests
|
||||
import warnings
|
||||
import pkg_resources
|
||||
|
||||
# try to load requests[security]
|
||||
try:
|
||||
pkg_resources.require(['requests[security]'])
|
||||
except pkg_resources.DistributionNotFound:
|
||||
import ssl
|
||||
if not getattr(ssl, 'HAS_SNI', False):
|
||||
# don't complain on each url processed about the SSL issue
|
||||
requests.packages.urllib3.disable_warnings(
|
||||
requests.packages.urllib3.exceptions.InsecurePlatformWarning)
|
||||
warnings.warn(
|
||||
'Some links may return broken results due to being unable to '
|
||||
'check the Server Name Indication (SNI) in the returned SSL cert '
|
||||
'against the hostname in the url requested. Recommended to '
|
||||
'install "requests[security]" as a dependency or upgrade to '
|
||||
'a python version with SNI support (Python 3 and Python 2.7.9+).'
|
||||
)
|
||||
except pkg_resources.UnknownExtra:
|
||||
warnings.warn(
|
||||
'Some links may return broken results due to being unable to '
|
||||
'check the Server Name Indication (SNI) in the returned SSL cert '
|
||||
'against the hostname in the url requested. Recommended to '
|
||||
'install requests-2.4.1+.'
|
||||
)
|
||||
|
||||
useragent_header = [('User-agent',
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0')]
|
@ -17,7 +17,7 @@ from six import BytesIO
|
||||
from docutils import nodes
|
||||
|
||||
from sphinx import addnodes
|
||||
from sphinx.ext.intersphinx import read_inventory_v1, read_inventory_v2, \
|
||||
from sphinx.ext.intersphinx import read_inventory, \
|
||||
load_mappings, missing_reference, _strip_basic_auth, _read_from_url, \
|
||||
_get_safe_url, fetch_inventory, INVENTORY_FILENAME
|
||||
|
||||
@ -49,8 +49,7 @@ a term including:colon std:term -1 glossary.html#term-a-term-including-colon -
|
||||
|
||||
def test_read_inventory_v1():
|
||||
f = BytesIO(inventory_v1)
|
||||
f.readline()
|
||||
invdata = read_inventory_v1(f, '/util', posixpath.join)
|
||||
invdata = read_inventory(f, '/util', posixpath.join)
|
||||
assert invdata['py:module']['module'] == \
|
||||
('foo', '1.0', '/util/foo.html#module-module', '-')
|
||||
assert invdata['py:class']['module.cls'] == \
|
||||
@ -59,13 +58,11 @@ def test_read_inventory_v1():
|
||||
|
||||
def test_read_inventory_v2():
|
||||
f = BytesIO(inventory_v2)
|
||||
f.readline()
|
||||
invdata1 = read_inventory_v2(f, '/util', posixpath.join)
|
||||
invdata1 = read_inventory(f, '/util', posixpath.join)
|
||||
|
||||
# try again with a small buffer size to test the chunking algorithm
|
||||
f = BytesIO(inventory_v2)
|
||||
f.readline()
|
||||
invdata2 = read_inventory_v2(f, '/util', posixpath.join, bufsize=5)
|
||||
invdata2 = read_inventory(f, '/util', posixpath.join, bufsize=5)
|
||||
|
||||
assert invdata1 == invdata2
|
||||
|
||||
@ -84,47 +81,47 @@ def test_read_inventory_v2():
|
||||
|
||||
|
||||
@with_app()
|
||||
@mock.patch('sphinx.ext.intersphinx.read_inventory_v2')
|
||||
@mock.patch('sphinx.ext.intersphinx.read_inventory')
|
||||
@mock.patch('sphinx.ext.intersphinx._read_from_url')
|
||||
def test_fetch_inventory_redirection(app, status, warning, _read_from_url, read_inventory_v2):
|
||||
def test_fetch_inventory_redirection(app, status, warning, _read_from_url, read_inventory):
|
||||
_read_from_url().readline.return_value = '# Sphinx inventory version 2'.encode('utf-8')
|
||||
|
||||
# same uri and inv, not redirected
|
||||
_read_from_url().geturl.return_value = 'http://hostname/' + INVENTORY_FILENAME
|
||||
_read_from_url().url = 'http://hostname/' + INVENTORY_FILENAME
|
||||
fetch_inventory(app, 'http://hostname/', 'http://hostname/' + INVENTORY_FILENAME)
|
||||
assert 'intersphinx inventory has moved' not in status.getvalue()
|
||||
assert read_inventory_v2.call_args[0][1] == 'http://hostname/'
|
||||
assert read_inventory.call_args[0][1] == 'http://hostname/'
|
||||
|
||||
# same uri and inv, redirected
|
||||
status.seek(0)
|
||||
status.truncate(0)
|
||||
_read_from_url().geturl.return_value = 'http://hostname/new/' + INVENTORY_FILENAME
|
||||
_read_from_url().url = 'http://hostname/new/' + INVENTORY_FILENAME
|
||||
|
||||
fetch_inventory(app, 'http://hostname/', 'http://hostname/' + INVENTORY_FILENAME)
|
||||
assert status.getvalue() == ('intersphinx inventory has moved: '
|
||||
'http://hostname/%s -> http://hostname/new/%s\n' %
|
||||
(INVENTORY_FILENAME, INVENTORY_FILENAME))
|
||||
assert read_inventory_v2.call_args[0][1] == 'http://hostname/new'
|
||||
assert read_inventory.call_args[0][1] == 'http://hostname/new'
|
||||
|
||||
# different uri and inv, not redirected
|
||||
status.seek(0)
|
||||
status.truncate(0)
|
||||
_read_from_url().geturl.return_value = 'http://hostname/new/' + INVENTORY_FILENAME
|
||||
_read_from_url().url = 'http://hostname/new/' + INVENTORY_FILENAME
|
||||
|
||||
fetch_inventory(app, 'http://hostname/', 'http://hostname/new/' + INVENTORY_FILENAME)
|
||||
assert 'intersphinx inventory has moved' not in status.getvalue()
|
||||
assert read_inventory_v2.call_args[0][1] == 'http://hostname/'
|
||||
assert read_inventory.call_args[0][1] == 'http://hostname/'
|
||||
|
||||
# different uri and inv, redirected
|
||||
status.seek(0)
|
||||
status.truncate(0)
|
||||
_read_from_url().geturl.return_value = 'http://hostname/other/' + INVENTORY_FILENAME
|
||||
_read_from_url().url = 'http://hostname/other/' + INVENTORY_FILENAME
|
||||
|
||||
fetch_inventory(app, 'http://hostname/', 'http://hostname/new/' + INVENTORY_FILENAME)
|
||||
assert status.getvalue() == ('intersphinx inventory has moved: '
|
||||
'http://hostname/new/%s -> http://hostname/other/%s\n' %
|
||||
(INVENTORY_FILENAME, INVENTORY_FILENAME))
|
||||
assert read_inventory_v2.call_args[0][1] == 'http://hostname/'
|
||||
assert read_inventory.call_args[0][1] == 'http://hostname/'
|
||||
|
||||
|
||||
@with_app()
|
||||
@ -233,64 +230,23 @@ class TestStripBasicAuth(unittest.TestCase):
|
||||
"""basic auth creds stripped from URL containing creds"""
|
||||
url = 'https://user:12345@domain.com/project/objects.inv'
|
||||
expected = 'https://domain.com/project/objects.inv'
|
||||
actual_url, actual_username, actual_password = _strip_basic_auth(url)
|
||||
self.assertEqual(expected, actual_url)
|
||||
self.assertEqual('user', actual_username)
|
||||
self.assertEqual('12345', actual_password)
|
||||
actual = _strip_basic_auth(url)
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
def test_no_auth(self):
|
||||
"""url unchanged if param doesn't contain basic auth creds"""
|
||||
url = 'https://domain.com/project/objects.inv'
|
||||
expected = 'https://domain.com/project/objects.inv'
|
||||
actual_url, actual_username, actual_password = _strip_basic_auth(url)
|
||||
self.assertEqual(expected, actual_url)
|
||||
self.assertEqual(None, actual_username)
|
||||
self.assertEqual(None, actual_password)
|
||||
actual = _strip_basic_auth(url)
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
def test_having_port(self):
|
||||
"""basic auth creds correctly stripped from URL containing creds even if URL
|
||||
contains port"""
|
||||
url = 'https://user:12345@domain.com:8080/project/objects.inv'
|
||||
expected = 'https://domain.com:8080/project/objects.inv'
|
||||
actual_url, actual_username, actual_password = _strip_basic_auth(url)
|
||||
self.assertEqual(expected, actual_url)
|
||||
self.assertEqual('user', actual_username)
|
||||
self.assertEqual('12345', actual_password)
|
||||
|
||||
|
||||
@mock.patch('six.moves.urllib.request.HTTPBasicAuthHandler')
|
||||
@mock.patch('six.moves.urllib.request.HTTPPasswordMgrWithDefaultRealm')
|
||||
@mock.patch('six.moves.urllib.request.build_opener')
|
||||
def test_readfromurl_authed(m_build_opener, m_HTTPPasswordMgrWithDefaultRealm,
|
||||
m_HTTPBasicAuthHandler):
|
||||
# read from URL containing basic auth creds
|
||||
password_mgr = mock.Mock()
|
||||
m_HTTPPasswordMgrWithDefaultRealm.return_value = password_mgr
|
||||
|
||||
url = 'https://user:12345@domain.com/project/objects.inv'
|
||||
_read_from_url(url)
|
||||
|
||||
m_HTTPPasswordMgrWithDefaultRealm.assert_called_once_with()
|
||||
password_mgr.add_password.assert_called_with(
|
||||
None, 'https://domain.com/project/objects.inv', 'user', '12345')
|
||||
|
||||
|
||||
@mock.patch('six.moves.urllib.request.HTTPBasicAuthHandler')
|
||||
@mock.patch('six.moves.urllib.request.HTTPPasswordMgrWithDefaultRealm')
|
||||
@mock.patch('sphinx.ext.intersphinx.default_opener')
|
||||
def test_readfromurl_unauthed(m_default_opener, m_HTTPPasswordMgrWithDefaultRealm,
|
||||
m_HTTPBasicAuthHandler):
|
||||
# read from URL without auth creds
|
||||
password_mgr = mock.Mock()
|
||||
m_HTTPPasswordMgrWithDefaultRealm.return_value = password_mgr
|
||||
|
||||
url = 'https://domain.com/project/objects.inv'
|
||||
_read_from_url(url)
|
||||
|
||||
# assert password manager not created
|
||||
assert m_HTTPPasswordMgrWithDefaultRealm.call_args is None
|
||||
# assert no password added to the password manager
|
||||
assert password_mgr.add_password.call_args is None
|
||||
actual = _strip_basic_auth(url)
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
|
||||
def test_getsafeurl_authed():
|
||||
@ -301,6 +257,14 @@ def test_getsafeurl_authed():
|
||||
assert expected == actual
|
||||
|
||||
|
||||
def test_getsafeurl_authed_having_port():
|
||||
"""_get_safe_url() with a url with basic auth having port"""
|
||||
url = 'https://user:12345@domain.com:8080/project/objects.inv'
|
||||
expected = 'https://user@domain.com:8080/project/objects.inv'
|
||||
actual = _get_safe_url(url)
|
||||
assert expected == actual
|
||||
|
||||
|
||||
def test_getsafeurl_unauthed():
|
||||
"""_get_safe_url() with a url without basic auth"""
|
||||
url = 'https://domain.com/project/objects.inv'
|
||||
|
Loading…
Reference in New Issue
Block a user