From 32763520a369b2c137be5e9b7440b42c28e65fef Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Wed, 23 Oct 2019 00:11:44 +0900 Subject: [PATCH] Close #1331: Change default User-Agent header --- CHANGES | 4 ++++ doc/usage/configuration.rst | 8 ++++++++ sphinx/builders/linkcheck.py | 1 - sphinx/config.py | 1 + sphinx/ext/intersphinx.py | 1 + sphinx/util/requests.py | 23 +++++++++++++++++++++-- 6 files changed, 35 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 33d5ff2b5..da542ef7a 100644 --- a/CHANGES +++ b/CHANGES @@ -7,6 +7,9 @@ Dependencies Incompatible changes -------------------- +* #1331: Change default User-Agent header to ``"Sphinx/X.Y.Z requests/X.Y.Z + python/X.Y.Z"``. It can be changed via :confval:`user_agent`. + Deprecated ---------- @@ -19,6 +22,7 @@ Features added * #6707: C++, support bit-fields. * #267: html: Eliminate prompt characters of doctest block from copyable text * #6729: html theme: agogo theme now supports ``rightsidebar`` option +* #1331: Add new config variable: :confval:`user_agent` Bugs fixed ---------- diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 02b40256d..e61c09cb2 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -510,6 +510,14 @@ General configuration .. versionadded:: 1.6.6 +.. confval:: user_agent + + A User-Agent of Sphinx. It is used for a header on HTTP access (ex. + linkcheck, intersphinx and so on). Default is ``"Sphinx/X.Y.Z + requests/X.Y.Z python/X.Y.Z"``. + + .. versionadded:: 2.3 + .. confval:: tls_verify If true, Sphinx verifies server certifications. Default is ``True``. diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 1be2041bd..635d9df98 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -101,7 +101,6 @@ class CheckExternalLinksBuilder(Builder): 'allow_redirects': True, 'headers': { 'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8', - 'User-Agent': requests.useragent_header[0][1], }, } if self.app.config.linkcheck_timeout: diff --git a/sphinx/config.py b/sphinx/config.py index 5ba2c2a3d..d8cce1b3d 100644 --- a/sphinx/config.py +++ b/sphinx/config.py @@ -148,6 +148,7 @@ class Config: 'math_numfig': (True, 'env', []), 'tls_verify': (True, 'env', []), 'tls_cacerts': (None, 'env', []), + 'user_agent': (None, 'env', [str]), 'smartquotes': (True, 'env', []), 'smartquotes_action': ('qDe', 'env', []), 'smartquotes_excludes': ({'languages': ['ja'], diff --git a/sphinx/ext/intersphinx.py b/sphinx/ext/intersphinx.py index cb74ef6f1..4389c5a44 100644 --- a/sphinx/ext/intersphinx.py +++ b/sphinx/ext/intersphinx.py @@ -374,6 +374,7 @@ def inspect_main(argv: List[str]) -> None: class MockConfig: intersphinx_timeout = None # type: int tls_verify = False + user_agent = None class MockApp: srcdir = '' diff --git a/sphinx/util/requests.py b/sphinx/util/requests.py index a279b4eb4..4cc73a85f 100644 --- a/sphinx/util/requests.py +++ b/sphinx/util/requests.py @@ -8,6 +8,7 @@ :license: BSD, see LICENSE for details. """ +import sys import warnings from contextlib import contextmanager from typing import Generator, Union @@ -16,6 +17,7 @@ from urllib.parse import urlsplit import pkg_resources import requests +import sphinx from sphinx.config import Config try: @@ -105,14 +107,28 @@ def _get_tls_cacert(url: str, config: Config) -> Union[str, bool]: return certs.get(hostname, True) +def _get_user_agent(config: Config) -> str: + if config.user_agent: + return config.user_agent + else: + return ' '.join([ + 'Sphinx/%s' % sphinx.__version__, + 'requests/%s' % requests.__version__, + 'python/%s' % '.'.join(map(str, sys.version_info[:3])), + ]) + + def get(url: str, **kwargs) -> requests.Response: """Sends a GET request like requests.get(). This sets up User-Agent header and TLS verification automatically.""" - kwargs.setdefault('headers', dict(useragent_header)) + headers = kwargs.setdefault('headers', {}) config = kwargs.pop('config', None) if config: kwargs.setdefault('verify', _get_tls_cacert(url, config)) + headers.setdefault('User-Agent', _get_user_agent(config)) + else: + headers.setdefault('User-Agent', useragent_header[0][1]) with ignore_insecure_warning(**kwargs): return requests.get(url, **kwargs) @@ -122,10 +138,13 @@ def head(url: str, **kwargs) -> requests.Response: """Sends a HEAD request like requests.head(). This sets up User-Agent header and TLS verification automatically.""" - kwargs.setdefault('headers', dict(useragent_header)) + headers = kwargs.setdefault('headers', {}) config = kwargs.pop('config', None) if config: kwargs.setdefault('verify', _get_tls_cacert(url, config)) + headers.setdefault('User-Agent', _get_user_agent(config)) + else: + headers.setdefault('User-Agent', useragent_header[0][1]) with ignore_insecure_warning(**kwargs): return requests.get(url, **kwargs)