Intersphinx: inventory support for URLs w/ basic auth

This commit is contained in:
Michael Wilson
2015-10-15 16:37:55 -05:00
parent 5f09ecd4cf
commit 564ce3b91d
4 changed files with 182 additions and 6 deletions

View File

@@ -57,6 +57,7 @@ Other contributors, listed alphabetically, are:
* John Waltman -- Texinfo builder
* Barry Warsaw -- setup command improvements
* Sebastian Wiesner -- image handling, distutils support
* Michael Wilson -- Intersphinx HTTP basic auth support
* Joel Wurtz -- cellspanning support in LaTeX
Many thanks for all contributions!

View File

@@ -14,6 +14,8 @@ Features added
* C++ type alias support (e.g., ``.. type:: T = int``)
* C++ template support for classes, functions, type aliases, and variables (#1729, #1314).
* C++, added new scope management directives ``namespace-push`` and ``namespace-pop``.
* Intersphinx: Added support for fetching Intersphinx inventories with URLs
using HTTP basic auth
Bugs fixed
----------

View File

@@ -34,7 +34,7 @@ from os import path
import re
from six import iteritems
from six.moves.urllib import request
from six.moves.urllib import parse, request
from docutils import nodes
from docutils.utils import relative_path
@@ -124,15 +124,110 @@ def read_inventory_v2(f, uri, join, bufsize=16*1024):
return invdata
def _strip_basic_auth(url):
"""Returns *url* with basic auth credentials removed. Also returns the
basic auth username and password if they're present in *url*.
E.g.: https://user:pass@example.com => https://example.com
*url* need not include basic auth credentials.
:param url: url which may or may not contain basic auth credentials
:type url: ``str``
:return: 3-``tuple`` of:
* (``str``) -- *url* with any basic auth creds removed
* (``str`` or ``NoneType``) -- basic auth username or ``None`` if basic
auth username not given
* (``str`` or ``NoneType``) -- basic auth password or ``None`` if basic
auth password not given
:rtype: ``tuple``
"""
url_parts = parse.urlsplit(url)
username = url_parts.username
password = url_parts.password
frags = list(url_parts)
# swap out "user[:pass]@hostname" for "hostname"
frags[1] = url_parts.hostname
url = parse.urlunsplit(frags)
return (url, username, password)
def _read_from_url(url):
"""Reads data from *url* with an HTTP *GET*.
This function supports fetching from resources which use basic HTTP auth as
laid out by RFC1738 § 3.1. See § 5 for grammar definitions for URLs.
.. seealso:
https://www.ietf.org/rfc/rfc1738.txt
:param url: URL of an HTTP resource
:type url: ``str``
:return: data read from resource described by *url*
:rtype: ``file``-like object
"""
url, username, password = _strip_basic_auth(url)
handler = request.BaseHandler()
if username is not None and password is not None:
# case: url contains basic auth creds
password_mgr = request.HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(None, url, username, password)
handler = request.HTTPBasicAuthHandler(password_mgr)
opener = request.build_opener(handler)
return opener.open(url)
def _get_safe_url(url):
"""Gets version of *url* with basic auth passwords obscured. This function
returns results suitable for printing and logging.
E.g.: https://user:12345@example.com => https://user:********@example.com
.. note::
The number of astrisks is invariant in the length of the basic auth
password, so minimal information is leaked.
:param url: a url
:type url: ``str``
:return: *url* with password obscured
:rtype: ``str``
"""
safe_url = url
url, username, _ = _strip_basic_auth(url)
if username is not None:
# case: url contained basic auth creds; obscure password
url_parts = parse.urlsplit(url)
safe_netloc = '{0}:********@{1}'.format(username, url_parts.hostname)
# replace original netloc w/ obscured version
frags = list(url_parts)
frags[1] = safe_netloc
safe_url = parse.urlunsplit(frags)
return safe_url
def fetch_inventory(app, uri, inv):
"""Fetch, parse and return an intersphinx inventory file."""
# both *uri* (base URI of the links to generate) and *inv* (actual
# location of the inventory file) can be local or remote URIs
localuri = uri.find('://') == -1
if localuri is False:
# case: inv URI points to remote resource; strip any existing auth
uri, _, _ = _strip_basic_auth(uri)
join = localuri and path.join or posixpath.join
try:
if inv.find('://') != -1:
f = request.urlopen(inv)
f = _read_from_url(inv)
else:
f = open(path.join(app.srcdir, inv), 'rb')
except Exception as err:
@@ -194,7 +289,9 @@ def load_mappings(app):
# files; remote ones only if the cache time is expired
if '://' not in inv or uri not in cache \
or cache[uri][1] < cache_time:
app.info('loading intersphinx inventory from %s...' % inv)
safe_inv_url = _get_safe_url(inv)
app.info(
'loading intersphinx inventory from %s...' % safe_inv_url)
invdata = fetch_inventory(app, uri, inv)
if invdata:
cache[uri] = (name, now, invdata)

View File

@@ -9,17 +9,19 @@
:license: BSD, see LICENSE for details.
"""
import zlib
import posixpath
import unittest
import zlib
from six import BytesIO
from docutils import nodes
from sphinx import addnodes
from sphinx.ext.intersphinx import read_inventory_v1, read_inventory_v2, \
load_mappings, missing_reference
load_mappings, missing_reference, _strip_basic_auth, _read_from_url, \
_get_safe_url
from util import with_app, with_tempdir
from util import with_app, with_tempdir, mock
inventory_v1 = '''\
@@ -175,3 +177,77 @@ def test_load_mappings_warnings(tempdir, app, status, warning):
# load the inventory and check if it's done correctly
load_mappings(app)
assert warning.getvalue().count('\n') == 2
class TestStripBasicAuth(unittest.TestCase):
"""Tests for sphinx.ext.intersphinx._strip_basic_auth()"""
def test_auth_stripped(self):
"""basic auth creds stripped from URL containing creds"""
url = 'https://user:12345@domain.com/project/objects.inv'
expected = 'https://domain.com/project/objects.inv'
actual_url, actual_username, actual_password = _strip_basic_auth(url)
self.assertEqual(expected, actual_url)
self.assertEqual('user', actual_username)
self.assertEqual('12345', actual_password)
def test_no_auth(self):
"""url unchanged if param doesn't contain basic auth creds"""
url = 'https://domain.com/project/objects.inv'
expected = 'https://domain.com/project/objects.inv'
actual_url, actual_username, actual_password = _strip_basic_auth(url)
self.assertEqual(expected, actual_url)
self.assertEqual(None, actual_username)
self.assertEqual(None, actual_password)
class TestReadFromUrl(unittest.TestCase):
"""Tests for sphinx.ext.intersphinx._read_from_url()"""
@mock.patch('six.moves.urllib.request.HTTPBasicAuthHandler')
@mock.patch('six.moves.urllib.request.HTTPPasswordMgrWithDefaultRealm')
@mock.patch('six.moves.urllib.request.build_opener')
def test_authed(self, m_build_opener, m_HTTPPasswordMgrWithDefaultRealm,
m_HTTPBasicAuthHandler):
"""read from URL containing basic auth creds"""
password_mgr = mock.Mock()
m_HTTPPasswordMgrWithDefaultRealm.return_value = password_mgr
url = 'https://user:12345@domain.com/project/objects.inv'
_read_from_url(url)
m_HTTPPasswordMgrWithDefaultRealm.assert_called_once_with()
password_mgr.add_password.assert_called_with(
None, 'https://domain.com/project/objects.inv', 'user', '12345')
@mock.patch('six.moves.urllib.request.HTTPBasicAuthHandler')
@mock.patch('six.moves.urllib.request.HTTPPasswordMgrWithDefaultRealm')
@mock.patch('six.moves.urllib.request.build_opener')
def test_unauthed(self, m_build_opener, m_HTTPPasswordMgrWithDefaultRealm,
m_HTTPBasicAuthHandler):
"""read from URL without auth creds"""
password_mgr = mock.Mock()
m_HTTPPasswordMgrWithDefaultRealm.return_value = password_mgr
url = 'https://domain.com/project/objects.inv'
_read_from_url(url)
# assert password manager not created
self.assertEqual(None, m_HTTPPasswordMgrWithDefaultRealm.call_args)
# assert no password added to the password manager
self.assertEqual(None, password_mgr.add_password.call_args)
class TestGetSafeUrl(unittest.TestCase):
"""Tests for sphinx.ext.intersphinx._get_safe_url()"""
def test_authed(self):
"""_get_safe_url() with a url with basic auth"""
url = 'https://user:12345@domain.com/project/objects.inv'
expected = 'https://user:********@domain.com/project/objects.inv'
actual = _get_safe_url(url)
self.assertEqual(expected, actual)
def test_unauthed(self):
"""_get_safe_url() with a url without basic auth"""
url = 'https://domain.com/project/objects.inv'
expected = 'https://domain.com/project/objects.inv'
actual = _get_safe_url(url)
self.assertEqual(expected, actual)