From dcdd401d655ffa0fe9ec22a8bf0c571611417589 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Thu, 18 Aug 2016 00:56:40 +0900 Subject: [PATCH 1/7] intersphinx: Add read_inventory() --- sphinx/ext/intersphinx.py | 27 ++++++++++++++------------- tests/test_ext_intersphinx.py | 11 ++++------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/sphinx/ext/intersphinx.py b/sphinx/ext/intersphinx.py index 16a487ef8..5e035ece1 100644 --- a/sphinx/ext/intersphinx.py +++ b/sphinx/ext/intersphinx.py @@ -125,6 +125,14 @@ def read_inventory_v2(f, uri, join, bufsize=16*1024): return invdata +def read_inventory(f, uri, join, bufsize=16*1024): + line = f.readline().rstrip().decode('utf-8') + if line == '# Sphinx inventory version 1': + return read_inventory_v1(f, uri, join) + elif line == '# Sphinx inventory version 2': + return read_inventory_v2(f, uri, join, bufsize=bufsize) + + def _strip_basic_auth(url): """Returns *url* with basic auth credentials removed. Also returns the basic auth username and password if they're present in *url*. @@ -227,7 +235,6 @@ def fetch_inventory(app, uri, inv): if not localuri: # case: inv URI points to remote resource; strip any existing auth uri, _, _ = _strip_basic_auth(uri) - join = localuri and path.join or posixpath.join try: if '://' in inv: f = _read_from_url(inv) @@ -245,18 +252,12 @@ def fetch_inventory(app, uri, inv): if uri in (inv, path.dirname(inv), path.dirname(inv) + '/'): uri = path.dirname(newinv) - line = f.readline().rstrip().decode('utf-8') - try: - if line == '# Sphinx inventory version 1': - invdata = read_inventory_v1(f, uri, join) - elif line == '# Sphinx inventory version 2': - invdata = read_inventory_v2(f, uri, join) - else: - raise ValueError - f.close() - except ValueError: - f.close() - raise ValueError('unknown or unsupported inventory version') + with f: + try: + join = localuri and path.join or posixpath.join + invdata = read_inventory(f, uri, join) + except ValueError: + raise ValueError('unknown or unsupported inventory version') except Exception as err: app.warn('intersphinx inventory %r not readable due to ' '%s: %s' % (inv, err.__class__.__name__, err)) diff --git a/tests/test_ext_intersphinx.py b/tests/test_ext_intersphinx.py index 334d53971..ebbf76fca 100644 --- a/tests/test_ext_intersphinx.py +++ b/tests/test_ext_intersphinx.py @@ -17,7 +17,7 @@ from six import BytesIO from docutils import nodes from sphinx import addnodes -from sphinx.ext.intersphinx import read_inventory_v1, read_inventory_v2, \ +from sphinx.ext.intersphinx import read_inventory, \ load_mappings, missing_reference, _strip_basic_auth, _read_from_url, \ _get_safe_url, fetch_inventory, INVENTORY_FILENAME @@ -49,8 +49,7 @@ a term including:colon std:term -1 glossary.html#term-a-term-including-colon - def test_read_inventory_v1(): f = BytesIO(inventory_v1) - f.readline() - invdata = read_inventory_v1(f, '/util', posixpath.join) + invdata = read_inventory(f, '/util', posixpath.join) assert invdata['py:module']['module'] == \ ('foo', '1.0', '/util/foo.html#module-module', '-') assert invdata['py:class']['module.cls'] == \ @@ -59,13 +58,11 @@ def test_read_inventory_v1(): def test_read_inventory_v2(): f = BytesIO(inventory_v2) - f.readline() - invdata1 = read_inventory_v2(f, '/util', posixpath.join) + invdata1 = read_inventory(f, '/util', posixpath.join) # try again with a small buffer size to test the chunking algorithm f = BytesIO(inventory_v2) - f.readline() - invdata2 = read_inventory_v2(f, '/util', posixpath.join, bufsize=5) + invdata2 = read_inventory(f, '/util', posixpath.join, bufsize=5) assert invdata1 == invdata2 From 847efaf541e0a6e2c1c542d8140f8f00990d81ff Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Thu, 18 Aug 2016 00:58:17 +0900 Subject: [PATCH 2/7] Update comment --- sphinx/ext/intersphinx.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/sphinx/ext/intersphinx.py b/sphinx/ext/intersphinx.py index 5e035ece1..912da100e 100644 --- a/sphinx/ext/intersphinx.py +++ b/sphinx/ext/intersphinx.py @@ -200,17 +200,12 @@ def _get_safe_url(url): """Gets version of *url* with basic auth passwords obscured. This function returns results suitable for printing and logging. - E.g.: https://user:12345@example.com => https://user:********@example.com - - .. note:: - - The number of astrisks is invariant in the length of the basic auth - password, so minimal information is leaked. + E.g.: https://user:12345@example.com => https://user@example.com :param url: a url :type url: ``str`` - :return: *url* with password obscured + :return: *url* with password removed :rtype: ``str`` """ safe_url = url From 609ae0394715efee38a2dfaeaf40625d3fc506fc Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Thu, 18 Aug 2016 01:25:29 +0900 Subject: [PATCH 3/7] intersphinx: Refactor _get_safe_url() --- sphinx/ext/intersphinx.py | 21 ++++++++++----------- tests/test_ext_intersphinx.py | 8 ++++++++ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/sphinx/ext/intersphinx.py b/sphinx/ext/intersphinx.py index 912da100e..40e8418e7 100644 --- a/sphinx/ext/intersphinx.py +++ b/sphinx/ext/intersphinx.py @@ -208,18 +208,17 @@ def _get_safe_url(url): :return: *url* with password removed :rtype: ``str`` """ - safe_url = url - url, username, _ = _strip_basic_auth(url) - if username is not None: - # case: url contained basic auth creds; obscure password - url_parts = urlsplit(url) - safe_netloc = '{0}@{1}'.format(username, url_parts.hostname) - # replace original netloc w/ obscured version - frags = list(url_parts) - frags[1] = safe_netloc - safe_url = urlunsplit(frags) + parts = urlsplit(url) + if parts.username is None: + return url + else: + frags = list(parts) + if parts.port: + frags[1] = '{0}@{1}:{2}'.format(parts.username, parts.hostname, parts.port) + else: + frags[1] = '{0}@{1}'.format(parts.username, parts.hostname) - return safe_url + return urlunsplit(frags) def fetch_inventory(app, uri, inv): diff --git a/tests/test_ext_intersphinx.py b/tests/test_ext_intersphinx.py index ebbf76fca..5b018032c 100644 --- a/tests/test_ext_intersphinx.py +++ b/tests/test_ext_intersphinx.py @@ -298,6 +298,14 @@ def test_getsafeurl_authed(): assert expected == actual +def test_getsafeurl_authed_having_port(): + """_get_safe_url() with a url with basic auth having port""" + url = 'https://user:12345@domain.com:8080/project/objects.inv' + expected = 'https://user@domain.com:8080/project/objects.inv' + actual = _get_safe_url(url) + assert expected == actual + + def test_getsafeurl_unauthed(): """_get_safe_url() with a url without basic auth""" url = 'https://domain.com/project/objects.inv' From fe1e5f7a7dd3518a4739c45c05510e01c6780640 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Thu, 18 Aug 2016 11:35:52 +0900 Subject: [PATCH 4/7] linkcheck: Move requests package loader to sphinx.util.requests --- sphinx/builders/linkcheck.py | 32 ++------------------------- sphinx/util/requests.py | 43 ++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 30 deletions(-) create mode 100644 sphinx/util/requests.py diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index dfaeb5bba..d4b260425 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -14,10 +14,7 @@ import socket import codecs import threading from os import path -import warnings -import pkg_resources -import requests from requests.exceptions import HTTPError from six.moves import queue from six.moves.urllib.parse import unquote @@ -38,32 +35,7 @@ from sphinx.builders import Builder from sphinx.util import encode_uri from sphinx.util.console import purple, red, darkgreen, darkgray, \ darkred, turquoise - -try: - pkg_resources.require(['requests[security]']) -except pkg_resources.DistributionNotFound: - import ssl - if not getattr(ssl, 'HAS_SNI', False): - # don't complain on each url processed about the SSL issue - requests.packages.urllib3.disable_warnings( - requests.packages.urllib3.exceptions.InsecurePlatformWarning) - warnings.warn( - 'Some links may return broken results due to being unable to ' - 'check the Server Name Indication (SNI) in the returned SSL cert ' - 'against the hostname in the url requested. Recommended to ' - 'install "requests[security]" as a dependency or upgrade to ' - 'a python version with SNI support (Python 3 and Python 2.7.9+).' - ) -except pkg_resources.UnknownExtra: - warnings.warn( - 'Some links may return broken results due to being unable to ' - 'check the Server Name Indication (SNI) in the returned SSL cert ' - 'against the hostname in the url requested. Recommended to ' - 'install requests-2.4.1+.' - ) - -requests_user_agent = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:25.0) ' - 'Gecko/20100101 Firefox/25.0')] +from sphinx.util.requests import requests, useragent_header class AnchorCheckParser(HTMLParser): @@ -118,7 +90,7 @@ class CheckExternalLinksBuilder(Builder): open(path.join(self.outdir, 'output.txt'), 'w').close() self.session = requests.Session() - self.session.headers = dict(requests_user_agent) + self.session.headers = dict(useragent_header) # create queues and worker threads self.wqueue = queue.Queue() diff --git a/sphinx/util/requests.py b/sphinx/util/requests.py new file mode 100644 index 000000000..095bf33e6 --- /dev/null +++ b/sphinx/util/requests.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +""" + sphinx.util.requests + ~~~~~~~~~~~~~~~~~~~~ + + Simple requests package loader + + :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from __future__ import absolute_import + +import requests +import warnings +import pkg_resources + +# try to load requests[security] +try: + pkg_resources.require(['requests[security]']) +except pkg_resources.DistributionNotFound: + import ssl + if not getattr(ssl, 'HAS_SNI', False): + # don't complain on each url processed about the SSL issue + requests.packages.urllib3.disable_warnings( + requests.packages.urllib3.exceptions.InsecurePlatformWarning) + warnings.warn( + 'Some links may return broken results due to being unable to ' + 'check the Server Name Indication (SNI) in the returned SSL cert ' + 'against the hostname in the url requested. Recommended to ' + 'install "requests[security]" as a dependency or upgrade to ' + 'a python version with SNI support (Python 3 and Python 2.7.9+).' + ) +except pkg_resources.UnknownExtra: + warnings.warn( + 'Some links may return broken results due to being unable to ' + 'check the Server Name Indication (SNI) in the returned SSL cert ' + 'against the hostname in the url requested. Recommended to ' + 'install requests-2.4.1+.' + ) + +useragent_header = [('User-agent', + 'Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0')] From 35232a3dd63c0dacf56159abb1830baadc143656 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Thu, 18 Aug 2016 12:33:28 +0900 Subject: [PATCH 5/7] intersphinx: Use requests package --- sphinx/ext/intersphinx.py | 30 +++++-------------- tests/test_ext_intersphinx.py | 55 +++++++---------------------------- 2 files changed, 17 insertions(+), 68 deletions(-) diff --git a/sphinx/ext/intersphinx.py b/sphinx/ext/intersphinx.py index 40e8418e7..6c7b4aa11 100644 --- a/sphinx/ext/intersphinx.py +++ b/sphinx/ext/intersphinx.py @@ -34,7 +34,6 @@ from os import path import re from six import iteritems, string_types -from six.moves.urllib import request from six.moves.urllib.parse import urlsplit, urlunsplit from docutils import nodes from docutils.utils import relative_path @@ -42,17 +41,9 @@ from docutils.utils import relative_path import sphinx from sphinx.locale import _ from sphinx.builders.html import INVENTORY_FILENAME +from sphinx.util.requests import requests -default_handlers = [request.ProxyHandler(), request.HTTPRedirectHandler(), - request.HTTPHandler()] -try: - default_handlers.append(request.HTTPSHandler) -except AttributeError: - pass - -default_opener = request.build_opener(*default_handlers) - UTF8StreamReader = codecs.lookup('utf-8')[2] @@ -183,17 +174,10 @@ def _read_from_url(url): :return: data read from resource described by *url* :rtype: ``file``-like object """ - url, username, password = _strip_basic_auth(url) - if username is not None and password is not None: - # case: url contains basic auth creds - password_mgr = request.HTTPPasswordMgrWithDefaultRealm() - password_mgr.add_password(None, url, username, password) - handler = request.HTTPBasicAuthHandler(password_mgr) - opener = request.build_opener(*(default_handlers + [handler])) - else: - opener = default_opener - - return opener.open(url) + r = requests.get(url, stream=True) + r.raise_for_status() + r.raw.url = r.url + return r.raw def _get_safe_url(url): @@ -239,8 +223,8 @@ def fetch_inventory(app, uri, inv): '%s: %s' % (inv, err.__class__, err)) return try: - if hasattr(f, 'geturl'): - newinv = f.geturl() + if hasattr(f, 'url'): + newinv = f.url if inv != newinv: app.info('intersphinx inventory has moved: %s -> %s' % (inv, newinv)) diff --git a/tests/test_ext_intersphinx.py b/tests/test_ext_intersphinx.py index 5b018032c..64cc03d75 100644 --- a/tests/test_ext_intersphinx.py +++ b/tests/test_ext_intersphinx.py @@ -81,47 +81,47 @@ def test_read_inventory_v2(): @with_app() -@mock.patch('sphinx.ext.intersphinx.read_inventory_v2') +@mock.patch('sphinx.ext.intersphinx.read_inventory') @mock.patch('sphinx.ext.intersphinx._read_from_url') -def test_fetch_inventory_redirection(app, status, warning, _read_from_url, read_inventory_v2): +def test_fetch_inventory_redirection(app, status, warning, _read_from_url, read_inventory): _read_from_url().readline.return_value = '# Sphinx inventory version 2'.encode('utf-8') # same uri and inv, not redirected - _read_from_url().geturl.return_value = 'http://hostname/' + INVENTORY_FILENAME + _read_from_url().url = 'http://hostname/' + INVENTORY_FILENAME fetch_inventory(app, 'http://hostname/', 'http://hostname/' + INVENTORY_FILENAME) assert 'intersphinx inventory has moved' not in status.getvalue() - assert read_inventory_v2.call_args[0][1] == 'http://hostname/' + assert read_inventory.call_args[0][1] == 'http://hostname/' # same uri and inv, redirected status.seek(0) status.truncate(0) - _read_from_url().geturl.return_value = 'http://hostname/new/' + INVENTORY_FILENAME + _read_from_url().url = 'http://hostname/new/' + INVENTORY_FILENAME fetch_inventory(app, 'http://hostname/', 'http://hostname/' + INVENTORY_FILENAME) assert status.getvalue() == ('intersphinx inventory has moved: ' 'http://hostname/%s -> http://hostname/new/%s\n' % (INVENTORY_FILENAME, INVENTORY_FILENAME)) - assert read_inventory_v2.call_args[0][1] == 'http://hostname/new' + assert read_inventory.call_args[0][1] == 'http://hostname/new' # different uri and inv, not redirected status.seek(0) status.truncate(0) - _read_from_url().geturl.return_value = 'http://hostname/new/' + INVENTORY_FILENAME + _read_from_url().url = 'http://hostname/new/' + INVENTORY_FILENAME fetch_inventory(app, 'http://hostname/', 'http://hostname/new/' + INVENTORY_FILENAME) assert 'intersphinx inventory has moved' not in status.getvalue() - assert read_inventory_v2.call_args[0][1] == 'http://hostname/' + assert read_inventory.call_args[0][1] == 'http://hostname/' # different uri and inv, redirected status.seek(0) status.truncate(0) - _read_from_url().geturl.return_value = 'http://hostname/other/' + INVENTORY_FILENAME + _read_from_url().url = 'http://hostname/other/' + INVENTORY_FILENAME fetch_inventory(app, 'http://hostname/', 'http://hostname/new/' + INVENTORY_FILENAME) assert status.getvalue() == ('intersphinx inventory has moved: ' 'http://hostname/new/%s -> http://hostname/other/%s\n' % (INVENTORY_FILENAME, INVENTORY_FILENAME)) - assert read_inventory_v2.call_args[0][1] == 'http://hostname/' + assert read_inventory.call_args[0][1] == 'http://hostname/' @with_app() @@ -255,41 +255,6 @@ class TestStripBasicAuth(unittest.TestCase): self.assertEqual('12345', actual_password) -@mock.patch('six.moves.urllib.request.HTTPBasicAuthHandler') -@mock.patch('six.moves.urllib.request.HTTPPasswordMgrWithDefaultRealm') -@mock.patch('six.moves.urllib.request.build_opener') -def test_readfromurl_authed(m_build_opener, m_HTTPPasswordMgrWithDefaultRealm, - m_HTTPBasicAuthHandler): - # read from URL containing basic auth creds - password_mgr = mock.Mock() - m_HTTPPasswordMgrWithDefaultRealm.return_value = password_mgr - - url = 'https://user:12345@domain.com/project/objects.inv' - _read_from_url(url) - - m_HTTPPasswordMgrWithDefaultRealm.assert_called_once_with() - password_mgr.add_password.assert_called_with( - None, 'https://domain.com/project/objects.inv', 'user', '12345') - - -@mock.patch('six.moves.urllib.request.HTTPBasicAuthHandler') -@mock.patch('six.moves.urllib.request.HTTPPasswordMgrWithDefaultRealm') -@mock.patch('sphinx.ext.intersphinx.default_opener') -def test_readfromurl_unauthed(m_default_opener, m_HTTPPasswordMgrWithDefaultRealm, - m_HTTPBasicAuthHandler): - # read from URL without auth creds - password_mgr = mock.Mock() - m_HTTPPasswordMgrWithDefaultRealm.return_value = password_mgr - - url = 'https://domain.com/project/objects.inv' - _read_from_url(url) - - # assert password manager not created - assert m_HTTPPasswordMgrWithDefaultRealm.call_args is None - # assert no password added to the password manager - assert password_mgr.add_password.call_args is None - - def test_getsafeurl_authed(): """_get_safe_url() with a url with basic auth""" url = 'https://user:12345@domain.com/project/objects.inv' From e7ff996d133aa36eb2325445a9fca742db5e66cf Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Thu, 18 Aug 2016 12:41:38 +0900 Subject: [PATCH 6/7] intersphinx: _strip_basic_auth() only removes credentials from URL --- sphinx/ext/intersphinx.py | 27 +++++++-------------------- tests/test_ext_intersphinx.py | 18 ++++++------------ 2 files changed, 13 insertions(+), 32 deletions(-) diff --git a/sphinx/ext/intersphinx.py b/sphinx/ext/intersphinx.py index 6c7b4aa11..4433f61ff 100644 --- a/sphinx/ext/intersphinx.py +++ b/sphinx/ext/intersphinx.py @@ -135,27 +135,14 @@ def _strip_basic_auth(url): :param url: url which may or may not contain basic auth credentials :type url: ``str`` - :return: 3-``tuple`` of: - - * (``str``) -- *url* with any basic auth creds removed - * (``str`` or ``NoneType``) -- basic auth username or ``None`` if basic - auth username not given - * (``str`` or ``NoneType``) -- basic auth password or ``None`` if basic - auth password not given - - :rtype: ``tuple`` + :return: *url* with any basic auth creds removed + :rtype: ``str`` """ - url_parts = urlsplit(url) - username = url_parts.username - password = url_parts.password - frags = list(url_parts) + frags = list(urlsplit(url)) # swap out "user[:pass]@hostname" for "hostname" - if url_parts.port: - frags[1] = "%s:%s" % (url_parts.hostname, url_parts.port) - else: - frags[1] = url_parts.hostname - url = urlunsplit(frags) - return (url, username, password) + if '@' in frags[1]: + frags[1] = frags[1].split('@')[1] + return urlunsplit(frags) def _read_from_url(url): @@ -212,7 +199,7 @@ def fetch_inventory(app, uri, inv): localuri = '://' not in uri if not localuri: # case: inv URI points to remote resource; strip any existing auth - uri, _, _ = _strip_basic_auth(uri) + uri = _strip_basic_auth(uri) try: if '://' in inv: f = _read_from_url(inv) diff --git a/tests/test_ext_intersphinx.py b/tests/test_ext_intersphinx.py index 64cc03d75..0a6be0a5b 100644 --- a/tests/test_ext_intersphinx.py +++ b/tests/test_ext_intersphinx.py @@ -230,29 +230,23 @@ class TestStripBasicAuth(unittest.TestCase): """basic auth creds stripped from URL containing creds""" url = 'https://user:12345@domain.com/project/objects.inv' expected = 'https://domain.com/project/objects.inv' - actual_url, actual_username, actual_password = _strip_basic_auth(url) - self.assertEqual(expected, actual_url) - self.assertEqual('user', actual_username) - self.assertEqual('12345', actual_password) + actual = _strip_basic_auth(url) + self.assertEqual(expected, actual) def test_no_auth(self): """url unchanged if param doesn't contain basic auth creds""" url = 'https://domain.com/project/objects.inv' expected = 'https://domain.com/project/objects.inv' - actual_url, actual_username, actual_password = _strip_basic_auth(url) - self.assertEqual(expected, actual_url) - self.assertEqual(None, actual_username) - self.assertEqual(None, actual_password) + actual = _strip_basic_auth(url) + self.assertEqual(expected, actual) def test_having_port(self): """basic auth creds correctly stripped from URL containing creds even if URL contains port""" url = 'https://user:12345@domain.com:8080/project/objects.inv' expected = 'https://domain.com:8080/project/objects.inv' - actual_url, actual_username, actual_password = _strip_basic_auth(url) - self.assertEqual(expected, actual_url) - self.assertEqual('user', actual_username) - self.assertEqual('12345', actual_password) + actual = _strip_basic_auth(url) + self.assertEqual(expected, actual) def test_getsafeurl_authed(): From 4edd854a1e6358f495968f8dcc03f64706dd1b13 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Mon, 22 Aug 2016 21:12:33 +0900 Subject: [PATCH 7/7] Send User-Agent on obtain remote inventories --- sphinx/ext/intersphinx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sphinx/ext/intersphinx.py b/sphinx/ext/intersphinx.py index 4433f61ff..d29928848 100644 --- a/sphinx/ext/intersphinx.py +++ b/sphinx/ext/intersphinx.py @@ -41,7 +41,7 @@ from docutils.utils import relative_path import sphinx from sphinx.locale import _ from sphinx.builders.html import INVENTORY_FILENAME -from sphinx.util.requests import requests +from sphinx.util.requests import requests, useragent_header UTF8StreamReader = codecs.lookup('utf-8')[2] @@ -161,7 +161,7 @@ def _read_from_url(url): :return: data read from resource described by *url* :rtype: ``file``-like object """ - r = requests.get(url, stream=True) + r = requests.get(url, stream=True, headers=dict(useragent_header)) r.raise_for_status() r.raw.url = r.url return r.raw