From 3c46c2f5accb292b85a494a2c2bf9d2a9f49180b Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Mon, 27 Mar 2017 23:46:11 +0900 Subject: [PATCH 01/13] Add ImageDownloader transform to support remote images on some builders --- sphinx/application.py | 1 + sphinx/builders/__init__.py | 9 +- sphinx/builders/latex.py | 1 + .../__init__.py} | 0 sphinx/transforms/post_transforms/images.py | 83 +++++++++++++++++++ tests/root/images.txt | 2 +- tests/roots/test-images/index.rst | 3 + tests/test_build_html.py | 10 +++ tests/test_build_latex.py | 9 ++ 9 files changed, 113 insertions(+), 5 deletions(-) rename sphinx/transforms/{post_transforms.py => post_transforms/__init__.py} (100%) create mode 100644 sphinx/transforms/post_transforms/images.py diff --git a/sphinx/application.py b/sphinx/application.py index 80c96d280..6119a4b02 100644 --- a/sphinx/application.py +++ b/sphinx/application.py @@ -92,6 +92,7 @@ builtin_extensions = ( 'sphinx.directives.patches', 'sphinx.roles', 'sphinx.transforms.post_transforms', + 'sphinx.transforms.post_transforms.images', # collectors should be loaded by specific order 'sphinx.environment.collectors.dependencies', 'sphinx.environment.collectors.asset', diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py index ad096fe7a..15eaac926 100644 --- a/sphinx/builders/__init__.py +++ b/sphinx/builders/__init__.py @@ -61,6 +61,11 @@ class Builder(object): # support translation use_message_catalog = True + #: The list of MIME types of image formats supported by the builder. + #: Image files are searched in the order in which they appear here. + supported_image_types = [] # type: List[unicode] + supported_remote_images = True + def __init__(self, app): # type: (Sphinx) -> None self.srcdir = app.srcdir @@ -157,10 +162,6 @@ class Builder(object): """Return list of paths for assets (ex. templates, CSS, etc.).""" return [] - #: The list of MIME types of image formats supported by the builder. - #: Image files are searched in the order in which they appear here. - supported_image_types = [] # type: List[unicode] - def post_process_images(self, doctree): # type: (nodes.Node) -> None """Pick the best candidate for all image URIs.""" diff --git a/sphinx/builders/latex.py b/sphinx/builders/latex.py index a57105c08..910ac41d7 100644 --- a/sphinx/builders/latex.py +++ b/sphinx/builders/latex.py @@ -51,6 +51,7 @@ class LaTeXBuilder(Builder): name = 'latex' format = 'latex' supported_image_types = ['application/pdf', 'image/png', 'image/jpeg'] + supported_remote_images = False def init(self): # type: () -> None diff --git a/sphinx/transforms/post_transforms.py b/sphinx/transforms/post_transforms/__init__.py similarity index 100% rename from sphinx/transforms/post_transforms.py rename to sphinx/transforms/post_transforms/__init__.py diff --git a/sphinx/transforms/post_transforms/images.py b/sphinx/transforms/post_transforms/images.py new file mode 100644 index 000000000..11efb18d6 --- /dev/null +++ b/sphinx/transforms/post_transforms/images.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +""" + sphinx.transforms.post_transforms.images + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Docutils transforms used by Sphinx. + + :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import os + +from docutils import nodes + +from sphinx.transforms import SphinxTransform +from sphinx.util import logging, requests +from sphinx.util.osutil import ensuredir + +if False: + # For type annotation + from typing import Any, Dict # NOQA + from sphinx.application import Sphinx # NOQA + + +logger = logging.getLogger(__name__) + + +class BaseImageConverter(SphinxTransform): + def apply(self): + # type: () -> None + for node in self.document.traverse(nodes.image): + if self.match(node): + self.handle(node) + + def match(self, node): + # type: (nodes.Node) -> bool + return True + + def handle(self, node): + # type: (nodes.Node) -> None + pass + + +class ImageDownloader(BaseImageConverter): + default_priority = 100 + + def match(self, node): + # type: (nodes.Node) -> bool + if self.app.builder.supported_remote_images: + return False + else: + return '://' in node['uri'] + + def handle(self, node): + # type: (nodes.Node) -> None + imgdir = os.path.join(self.app.doctreedir, 'images') + basename = os.path.basename(node['uri']) + if '?' in basename: + basename = basename.split('?')[0] + dirname = node['uri'].replace('://', '/').translate({ord("?"): u"/", + ord("&"): u"/"}) + ensuredir(os.path.join(imgdir, dirname)) + path = os.path.join(imgdir, dirname, basename) + with open(path, 'wb') as f: + r = requests.get(node['uri']) + f.write(r.content) + + node['candidates'].pop('?') + node['candidates']['*'] = path + node['uri'] = path + self.app.env.images.add_file(self.env.docname, path) + + +def setup(app): + # type: (Sphinx) -> Dict[unicode, Any] + app.add_post_transform(ImageDownloader) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/tests/root/images.txt b/tests/root/images.txt index 3dd8e6957..55bc6f61c 100644 --- a/tests/root/images.txt +++ b/tests/root/images.txt @@ -16,7 +16,7 @@ Sphinx image handling .. image:: img.* .. a non-local image URI -.. image:: http://www.python.org/logo.png +.. image:: https://www.python.org/static/img/python-logo.png .. an image with subdir and unspecified extension .. image:: subdir/simg.* diff --git a/tests/roots/test-images/index.rst b/tests/roots/test-images/index.rst index 0e95b3c74..c08f82514 100644 --- a/tests/roots/test-images/index.rst +++ b/tests/roots/test-images/index.rst @@ -14,3 +14,6 @@ test-image The caption of img .. image:: testimäge.png + +.. a remote image +.. image:: https://www.python.org/static/img/python-logo.png diff --git a/tests/test_build_html.py b/tests/test_build_html.py index 4605629f7..7867c16bb 100644 --- a/tests/test_build_html.py +++ b/tests/test_build_html.py @@ -1224,3 +1224,13 @@ def test_html_raw_directive(app, status, warning): def test_alternate_stylesheets(app, cached_etree_parse, fname, expect): app.build() check_xpath(cached_etree_parse(app.outdir / fname), fname, *expect) + + +@pytest.mark.sphinx('html', testroot='images') +def test_html_remote_images(app, status, warning): + app.builder.build_all() + + result = (app.outdir / 'index.html').text(encoding='utf8') + assert ('https://www.python.org/static/img/python-logo.png' in result) + assert not (app.outdir / 'python-logo.png').exists() diff --git a/tests/test_build_latex.py b/tests/test_build_latex.py index 24ce0050a..681ca76b4 100644 --- a/tests/test_build_latex.py +++ b/tests/test_build_latex.py @@ -1042,3 +1042,12 @@ def test_latex_raw_directive(app, status, warning): # with substitution assert 'HTML: abc ghi' in result assert 'LaTeX: abc def ghi' in result + + +@pytest.mark.sphinx('latex', testroot='images') +def test_latex_remote_images(app, status, warning): + app.builder.build_all() + + result = (app.outdir / 'Python.tex').text(encoding='utf8') + assert '\\sphinxincludegraphics{{python-logo}.png}' in result + assert (app.outdir / 'python-logo.png').exists() From a5d77a8f0658f9e01eaf1a23a4a8a2c8cf7ace0c Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Mon, 27 Mar 2017 23:46:12 +0900 Subject: [PATCH 02/13] Drop warning for nonlocal image URI --- doc/config.rst | 1 - sphinx/environment/collectors/asset.py | 3 --- tests/roots/test-warnings/index.rst | 3 --- tests/test_build_html.py | 1 - tests/test_environment.py | 2 -- 5 files changed, 10 deletions(-) diff --git a/doc/config.rst b/doc/config.rst index 3bc6bebb7..ca27f384f 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -228,7 +228,6 @@ General configuration * app.add_source_parser * download.not_readable * image.data_uri - * image.nonlocal_uri * image.not_readable * ref.term * ref.ref diff --git a/sphinx/environment/collectors/asset.py b/sphinx/environment/collectors/asset.py index d80698257..36c451765 100644 --- a/sphinx/environment/collectors/asset.py +++ b/sphinx/environment/collectors/asset.py @@ -64,9 +64,6 @@ class ImageCollector(EnvironmentCollector): candidates['?'] = imguri continue elif imguri.find('://') != -1: - logger.warning('nonlocal image URI found: %s' % imguri, - location=node, - type='image', subtype='nonlocal_uri') candidates['?'] = imguri continue rel_imgpath, full_imgpath = app.env.relfn2path(imguri, docname) diff --git a/tests/roots/test-warnings/index.rst b/tests/roots/test-warnings/index.rst index bef44cb4f..4110e93d0 100644 --- a/tests/roots/test-warnings/index.rst +++ b/tests/roots/test-warnings/index.rst @@ -15,9 +15,6 @@ test-warnings .. an SVG image (for HTML at least) .. image:: svgimg.* -.. a non-local image URI -.. image:: http://www.python.org/logo.png - .. should give a warning .. literalinclude:: wrongenc.inc :language: none diff --git a/tests/test_build_html.py b/tests/test_build_html.py index 7867c16bb..99b6f0fea 100644 --- a/tests/test_build_html.py +++ b/tests/test_build_html.py @@ -32,7 +32,6 @@ WARNING: Explicit markup ends without a blank line; unexpected unindent. %(root)s/index.rst:\\d+: WARNING: Encoding 'utf-8-sig' used for reading included \ file u'%(root)s/wrongenc.inc' seems to be wrong, try giving an :encoding: option %(root)s/index.rst:\\d+: WARNING: image file not readable: foo.png -%(root)s/index.rst:\\d+: WARNING: nonlocal image URI found: http://www.python.org/logo.png %(root)s/index.rst:\\d+: WARNING: download file not readable: %(root)s/nonexisting.png %(root)s/index.rst:\\d+: WARNING: invalid single index entry u'' %(root)s/undecodable.rst:\\d+: WARNING: undecodable source characters, replacing \ diff --git a/tests/test_environment.py b/tests/test_environment.py index 4133a28fd..22baab0dd 100644 --- a/tests/test_environment.py +++ b/tests/test_environment.py @@ -40,8 +40,6 @@ def test_first_update(): def test_images(): assert ('image file not readable: foo.png' in app._warning.getvalue()) - assert ('nonlocal image URI found: http://www.python.org/logo.png' - in app._warning.getvalue()) tree = env.get_doctree('images') htmlbuilder = StandaloneHTMLBuilder(app) From 25f4c004d8c91deab400174f6380742d854b35e1 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Mon, 27 Mar 2017 23:46:12 +0900 Subject: [PATCH 03/13] Emit warning when fetching remote images failed --- sphinx/transforms/post_transforms/images.py | 21 +++++++++++++++------ tests/roots/test-images/index.rst | 3 +++ tests/test_build_latex.py | 3 +++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/sphinx/transforms/post_transforms/images.py b/sphinx/transforms/post_transforms/images.py index 11efb18d6..c9d6d2f95 100644 --- a/sphinx/transforms/post_transforms/images.py +++ b/sphinx/transforms/post_transforms/images.py @@ -11,6 +11,7 @@ import os +from six import text_type from docutils import nodes from sphinx.transforms import SphinxTransform @@ -62,14 +63,22 @@ class ImageDownloader(BaseImageConverter): ord("&"): u"/"}) ensuredir(os.path.join(imgdir, dirname)) path = os.path.join(imgdir, dirname, basename) - with open(path, 'wb') as f: + try: r = requests.get(node['uri']) - f.write(r.content) + if r.status_code != 200: + logger.warning('Could not fetch remote image: %s [%d]' % + (node['uri'], r.status_code)) + else: + with open(path, 'wb') as f: + f.write(r.content) - node['candidates'].pop('?') - node['candidates']['*'] = path - node['uri'] = path - self.app.env.images.add_file(self.env.docname, path) + node['candidates'].pop('?') + node['candidates']['*'] = path + node['uri'] = path + self.app.env.images.add_file(self.env.docname, path) + except Exception as exc: + logger.warning('Could not fetch remote image: %s [%s]' % + (node['uri'], text_type(exc))) def setup(app): diff --git a/tests/roots/test-images/index.rst b/tests/roots/test-images/index.rst index c08f82514..d1478fab1 100644 --- a/tests/roots/test-images/index.rst +++ b/tests/roots/test-images/index.rst @@ -17,3 +17,6 @@ test-image .. a remote image .. image:: https://www.python.org/static/img/python-logo.png + +.. non-exist remote image +.. image:: http://example.com/NOT_EXIST.PNG diff --git a/tests/test_build_latex.py b/tests/test_build_latex.py index 681ca76b4..c83b9c5f8 100644 --- a/tests/test_build_latex.py +++ b/tests/test_build_latex.py @@ -1051,3 +1051,6 @@ def test_latex_remote_images(app, status, warning): result = (app.outdir / 'Python.tex').text(encoding='utf8') assert '\\sphinxincludegraphics{{python-logo}.png}' in result assert (app.outdir / 'python-logo.png').exists() + assert '\\sphinxincludegraphics{{NOT_EXIST}.PNG}' not in result + assert ('WARNING: Could not fetch remote image: ' + 'http://example.com/NOT_EXIST.PNG [404]' in warning.getvalue()) From 03093acb2c80eb1a6b5f46ae7f3b43ea07f493d4 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Sun, 16 Apr 2017 19:17:17 +0900 Subject: [PATCH 04/13] Update CHANGES --- CHANGES | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES b/CHANGES index 3459c195f..e1fe0ea2a 100644 --- a/CHANGES +++ b/CHANGES @@ -106,6 +106,7 @@ Features added ``suppress_warnings`` * #2803: Discovery of builders by entry point * #1764, #1676: Allow setting 'rel' and 'title' attributes for stylesheets +* #3589: Support remote images Bugs fixed ---------- From 5f071bb4748876f894cdecf28a2bc5c0efb80d60 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Sun, 16 Apr 2017 20:28:31 +0900 Subject: [PATCH 05/13] Show original image URI on copying images --- sphinx/builders/html.py | 6 ++-- sphinx/builders/latex.py | 29 +++++++++++-------- sphinx/builders/texinfo.py | 31 +++++++++++++-------- sphinx/environment/__init__.py | 3 ++ sphinx/environment/adapters/asset.py | 28 +++++++++++++++++++ sphinx/transforms/post_transforms/images.py | 2 ++ 6 files changed, 75 insertions(+), 24 deletions(-) create mode 100644 sphinx/environment/adapters/asset.py diff --git a/sphinx/builders/html.py b/sphinx/builders/html.py index 7bfce0ad6..0fd5a43ef 100644 --- a/sphinx/builders/html.py +++ b/sphinx/builders/html.py @@ -47,6 +47,7 @@ from sphinx.highlighting import PygmentsBridge from sphinx.util.console import bold, darkgreen # type: ignore from sphinx.writers.html import HTMLWriter, HTMLTranslator, \ SmartyPantsHTMLTranslator +from sphinx.environment.adapters.asset import ImageAdapter from sphinx.environment.adapters.toctree import TocTree from sphinx.environment.adapters.indexentries import IndexEntries @@ -606,11 +607,12 @@ class StandaloneHTMLBuilder(Builder): def copy_image_files(self): # type: () -> None - # copy image files if self.images: + stringify_func = ImageAdapter(self.app.env).get_original_image_uri ensuredir(path.join(self.outdir, self.imagedir)) for src in status_iterator(self.images, 'copying images... ', "brown", - len(self.images), self.app.verbosity): + len(self.images), self.app.verbosity, + stringify_func=stringify_func): dest = self.images[src] try: copyfile(path.join(self.srcdir, src), diff --git a/sphinx/builders/latex.py b/sphinx/builders/latex.py index 910ac41d7..ac061e08e 100644 --- a/sphinx/builders/latex.py +++ b/sphinx/builders/latex.py @@ -13,8 +13,6 @@ import os import warnings from os import path -from six import iteritems - from docutils import nodes from docutils.io import FileOutput from docutils.utils import new_document @@ -22,12 +20,13 @@ from docutils.frontend import OptionParser from sphinx import package_dir, addnodes, highlighting from sphinx.deprecation import RemovedInSphinx17Warning -from sphinx.util import texescape, logging from sphinx.config import string_classes, ENUM from sphinx.errors import SphinxError from sphinx.locale import _ from sphinx.builders import Builder from sphinx.environment import NoUri +from sphinx.environment.adapters.asset import ImageAdapter +from sphinx.util import texescape, logging, status_iterator from sphinx.util.nodes import inline_all_toctrees from sphinx.util.fileutil import copy_asset_file from sphinx.util.osutil import SEP, make_filename @@ -207,14 +206,7 @@ class LaTeXBuilder(Builder): def finish(self): # type: () -> None - # copy image files - if self.images: - logger.info(bold('copying images...'), nonl=1) - for src, dest in iteritems(self.images): - logger.info(' ' + src, nonl=1) - copy_asset_file(path.join(self.srcdir, src), - path.join(self.outdir, dest)) - logger.info('') + self.copy_image_files() # copy TeX support files from texinputs context = {'latex_engine': self.config.latex_engine} @@ -241,6 +233,21 @@ class LaTeXBuilder(Builder): copy_asset_file(path.join(self.confdir, self.config.latex_logo), self.outdir) logger.info('done') + def copy_image_files(self): + # type: () -> None + if self.images: + stringify_func = ImageAdapter(self.app.env).get_original_image_uri + for src in status_iterator(self.images, 'copying images... ', "brown", + len(self.images), self.app.verbosity, + stringify_func=stringify_func): + dest = self.images[src] + try: + copy_asset_file(path.join(self.srcdir, src), + path.join(self.outdir, dest)) + except Exception as err: + logger.warning('cannot copy image file %r: %s', + path.join(self.srcdir, src), err) + def validate_config_values(app): # type: (Sphinx) -> None diff --git a/sphinx/builders/texinfo.py b/sphinx/builders/texinfo.py index 823290255..4724aa9c3 100644 --- a/sphinx/builders/texinfo.py +++ b/sphinx/builders/texinfo.py @@ -11,8 +11,6 @@ from os import path -from six import iteritems - from docutils import nodes from docutils.io import FileOutput from docutils.utils import new_document @@ -22,9 +20,12 @@ from sphinx import addnodes from sphinx.locale import _ from sphinx.builders import Builder from sphinx.environment import NoUri +from sphinx.environment.adapters.asset import ImageAdapter from sphinx.util import logging +from sphinx.util import status_iterator +from sphinx.util.fileutil import copy_asset_file from sphinx.util.nodes import inline_all_toctrees -from sphinx.util.osutil import SEP, copyfile, make_filename +from sphinx.util.osutil import SEP, make_filename from sphinx.util.console import bold, darkgreen # type: ignore from sphinx.writers.texinfo import TexinfoWriter @@ -223,14 +224,7 @@ class TexinfoBuilder(Builder): def finish(self): # type: () -> None - # copy image files - if self.images: - logger.info(bold('copying images...'), nonl=1) - for src, dest in iteritems(self.images): - logger.info(' ' + src, nonl=1) - copyfile(path.join(self.srcdir, src), - path.join(self.outdir, dest)) - logger.info('') + self.copy_image_files() logger.info(bold('copying Texinfo support files... '), nonl=True) # copy Makefile @@ -243,6 +237,21 @@ class TexinfoBuilder(Builder): logger.warning("error writing file %s: %s", fn, err) logger.info(' done') + def copy_image_files(self): + # type: () -> None + if self.images: + stringify_func = ImageAdapter(self.app.env).get_original_image_uri + for src in status_iterator(self.images, 'copying images... ', "brown", + len(self.images), self.app.verbosity, + stringify_func=stringify_func): + dest = self.images[src] + try: + copy_asset_file(path.join(self.srcdir, src), + path.join(self.outdir, dest)) + except Exception as err: + logger.warning('cannot copy image file %r: %s', + path.join(self.srcdir, src), err) + def setup(app): # type: (Sphinx) -> Dict[unicode, Any] diff --git a/sphinx/environment/__init__.py b/sphinx/environment/__init__.py index 00d730592..303704773 100644 --- a/sphinx/environment/__init__.py +++ b/sphinx/environment/__init__.py @@ -257,6 +257,9 @@ class BuildEnvironment(object): self.images = FilenameUniqDict() self.dlfiles = FilenameUniqDict() + # the original URI for images + self.original_image_uri = {} # type: Dict[unicode, unicode] + # temporary data storage while reading a document self.temp_data = {} # type: Dict[unicode, Any] # context for cross-references (e.g. current module or class) diff --git a/sphinx/environment/adapters/asset.py b/sphinx/environment/adapters/asset.py new file mode 100644 index 000000000..a373205d2 --- /dev/null +++ b/sphinx/environment/adapters/asset.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +""" + sphinx.environment.adapters.assets + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Assets adapter for sphinx.environment. + + :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +if False: + # For type annotation + from sphinx.environment import BuildEnvironment # NOQA + + +class ImageAdapter(object): + def __init__(self, env): + # type: (BuildEnvironment) -> None + self.env = env + + def get_original_image_uri(self, name): + # type: (unicode) -> unicode + """Get the original image URI.""" + while name in self.env.original_image_uri: + name = self.env.original_image_uri[name] + + return name diff --git a/sphinx/transforms/post_transforms/images.py b/sphinx/transforms/post_transforms/images.py index c9d6d2f95..e4f1c29cb 100644 --- a/sphinx/transforms/post_transforms/images.py +++ b/sphinx/transforms/post_transforms/images.py @@ -69,6 +69,8 @@ class ImageDownloader(BaseImageConverter): logger.warning('Could not fetch remote image: %s [%d]' % (node['uri'], r.status_code)) else: + self.app.env.original_image_uri[path] = node['uri'] + with open(path, 'wb') as f: f.write(r.content) From 027872f569c261bfd23c31dab72019eec118df5a Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Mon, 17 Apr 2017 01:18:49 +0900 Subject: [PATCH 06/13] Guess mimetype on downloading --- sphinx/transforms/post_transforms/images.py | 4 +++- sphinx/util/images.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sphinx/transforms/post_transforms/images.py b/sphinx/transforms/post_transforms/images.py index e4f1c29cb..ab7b75837 100644 --- a/sphinx/transforms/post_transforms/images.py +++ b/sphinx/transforms/post_transforms/images.py @@ -16,6 +16,7 @@ from docutils import nodes from sphinx.transforms import SphinxTransform from sphinx.util import logging, requests +from sphinx.util.images import guess_mimetype from sphinx.util.osutil import ensuredir if False: @@ -74,8 +75,9 @@ class ImageDownloader(BaseImageConverter): with open(path, 'wb') as f: f.write(r.content) + mimetype = guess_mimetype(path, default='*') node['candidates'].pop('?') - node['candidates']['*'] = path + node['candidates'][mimetype] = path node['uri'] = path self.app.env.images.add_file(self.env.docname, path) except Exception as exc: diff --git a/sphinx/util/images.py b/sphinx/util/images.py index 8de8254db..7fc89f8aa 100644 --- a/sphinx/util/images.py +++ b/sphinx/util/images.py @@ -52,8 +52,8 @@ def get_image_size(filename): return None -def guess_mimetype(filename): - # type: (unicode) -> unicode +def guess_mimetype(filename, default=None): + # type: (unicode, unicode) -> unicode _, ext = path.splitext(filename) if ext in mime_suffixes: return mime_suffixes[ext] @@ -63,4 +63,4 @@ def guess_mimetype(filename): if imgtype: return 'image/' + imgtype - return None + return default From be261ed71ec797d32fe88dae1acf73adb2b33c8e Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Wed, 19 Apr 2017 01:38:30 +0900 Subject: [PATCH 07/13] Add utility methods and testcases to sphinx.util.images --- sphinx/util/images.py | 82 +++++++++++++++++++++++++++------ tests/test_util_images.py | 97 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 13 deletions(-) create mode 100644 tests/test_util_images.py diff --git a/sphinx/util/images.py b/sphinx/util/images.py index 7fc89f8aa..7bb904d22 100644 --- a/sphinx/util/images.py +++ b/sphinx/util/images.py @@ -8,10 +8,16 @@ :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. :license: BSD, see LICENSE for details. """ +from __future__ import absolute_import +import base64 import imghdr import imagesize from os import path +from collections import OrderedDict + +from six import PY3, BytesIO, iteritems +from typing import NamedTuple try: from PIL import Image # check for the Python Imaging Library @@ -23,13 +29,23 @@ except ImportError: if False: # For type annotation - from typing import Dict, List, Tuple # NOQA + from typing import Dict, IO, List, Tuple # NOQA -mime_suffixes = { - '.pdf': 'application/pdf', - '.svg': 'image/svg+xml', - '.svgz': 'image/svg+xml', -} # type: Dict[unicode, unicode] +if PY3: + unicode = str # special alias for static typing... + +mime_suffixes = OrderedDict([ + ('.gif', 'image/gif'), + ('.jpg', 'image/jpeg'), + ('.png', 'image/png'), + ('.pdf', 'application/pdf'), + ('.svg', 'image/svg+xml'), + ('.svgz', 'image/svg+xml'), +]) # type: Dict[unicode, unicode] + +DataURI = NamedTuple('DataURI', [('mimetype', unicode), + ('charset', unicode), + ('data', bytes)]) def get_image_size(filename): @@ -52,15 +68,55 @@ def get_image_size(filename): return None -def guess_mimetype(filename, default=None): - # type: (unicode, unicode) -> unicode - _, ext = path.splitext(filename) +def guess_mimetype_for_stream(stream, default=None): + # type: (IO, unicode) -> unicode + imgtype = imghdr.what(stream) + if imgtype: + return 'image/' + imgtype + else: + return default + + +def guess_mimetype(filename='', content=None, default=None): + # type: (unicode, unicode, unicode) -> unicode + _, ext = path.splitext(filename.lower()) if ext in mime_suffixes: return mime_suffixes[ext] - else: + elif content: + return guess_mimetype_for_stream(BytesIO(content), default=default) + elif path.exists(filename): with open(filename, 'rb') as f: - imgtype = imghdr.what(f) - if imgtype: - return 'image/' + imgtype + return guess_mimetype_for_stream(f, default=default) return default + + +def get_image_extension(mimetype): + # type: (unicode) -> unicode + for ext, _mimetype in iteritems(mime_suffixes): + if mimetype == _mimetype: + return ext + + return None + + +def parse_data_uri(uri): + # type: (unicode) -> DataURI + if not uri.startswith('data:'): + return None + + # data:[][;charset=][;base64], + mimetype = u'text/plain' + charset = u'US-ASCII' + + properties, data = uri[5:].split(',', 1) + for prop in properties.split(';'): + if prop == 'base64': + pass # skip + elif prop.startswith('charset='): + charset = prop[8:] + elif prop: + mimetype = prop + + image_data = base64.b64decode(data) # type: ignore + return DataURI(mimetype, charset, image_data) diff --git a/tests/test_util_images.py b/tests/test_util_images.py new file mode 100644 index 000000000..45ee66c55 --- /dev/null +++ b/tests/test_util_images.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +""" + test_util_images + ~~~~~~~~~~~~~~~~ + + Test images util. + + :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" +from __future__ import print_function + +import pytest + +from sphinx.util.images import ( + get_image_size, guess_mimetype, get_image_extension, parse_data_uri +) + +from util import rootdir + + +GIF_FILENAME = rootdir / 'root' / 'img.gif' +PNG_FILENAME = rootdir / 'root' / 'img.png' +PDF_FILENAME = rootdir / 'root' / 'img.pdf' +TXT_FILENAME = rootdir / 'root' / 'contents.txt' + + +def test_get_image_size(): + assert get_image_size(GIF_FILENAME) == (200, 181) + assert get_image_size(PNG_FILENAME) == (200, 181) + assert get_image_size(PDF_FILENAME) is None + assert get_image_size(TXT_FILENAME) is None + + +def test_guess_mimetype(): + # guess by filename + assert guess_mimetype('img.png') == 'image/png' + assert guess_mimetype('img.jpg') == 'image/jpeg' + assert guess_mimetype('img.txt') is None + assert guess_mimetype('img.txt', default='text/plain') == 'text/plain' + assert guess_mimetype('no_extension') is None + assert guess_mimetype('IMG.PNG') == 'image/png' + + # guess by content + assert guess_mimetype(content=GIF_FILENAME.bytes()) == 'image/gif' + assert guess_mimetype(content=PNG_FILENAME.bytes()) == 'image/png' + assert guess_mimetype(content=PDF_FILENAME.bytes()) is None + assert guess_mimetype(content=TXT_FILENAME.bytes()) is None + assert guess_mimetype(content=TXT_FILENAME.bytes(), default='text/plain') == 'text/plain' + + # the priority of params: filename > content > default + assert guess_mimetype('img.png', + content=GIF_FILENAME.bytes(), + default='text/plain') == 'image/png' + assert guess_mimetype('no_extension', + content=GIF_FILENAME.bytes(), + default='text/plain') == 'image/gif' + assert guess_mimetype('no_extension', + content=TXT_FILENAME.bytes(), + default='text/plain') == 'text/plain' + + +def test_get_image_extension(): + assert get_image_extension('image/png') == '.png' + assert get_image_extension('image/jpeg') == '.jpg' + assert get_image_extension('image/svg+xml') == '.svg' + assert get_image_extension('text/plain') is None + + +def test_parse_data_uri(): + # standard case + uri = ("" + "//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==") + image = parse_data_uri(uri) + assert image is not None + assert image.mimetype == 'image/png' + assert image.charset == 'US-ASCII' + + # no mimetype + uri = ("data:charset=utf-8,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElE" + "QVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==") + image = parse_data_uri(uri) + assert image is not None + assert image.mimetype == 'text/plain' + assert image.charset == 'utf-8' + + # non data URI + uri = ("image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4" + "//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==") + image = parse_data_uri(uri) + assert image is None + + # invalid data URI (no properties) + with pytest.raises(ValueError): + uri = ("data:iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4" + "//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==") + parse_data_uri(uri) From ebdec70dfc2de3a8234ed08b4b6ed096bdcfa264 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Wed, 19 Apr 2017 01:38:30 +0900 Subject: [PATCH 08/13] Support images in Data URI on non-HTML builders --- CHANGES | 3 +- doc/config.rst | 1 - sphinx/builders/__init__.py | 1 + sphinx/builders/html.py | 1 + sphinx/environment/collectors/asset.py | 2 - sphinx/transforms/post_transforms/images.py | 47 +++++++++++++++++++-- 6 files changed, 47 insertions(+), 8 deletions(-) diff --git a/CHANGES b/CHANGES index e1fe0ea2a..1543bec91 100644 --- a/CHANGES +++ b/CHANGES @@ -106,7 +106,8 @@ Features added ``suppress_warnings`` * #2803: Discovery of builders by entry point * #1764, #1676: Allow setting 'rel' and 'title' attributes for stylesheets -* #3589: Support remote images +* #3589: Support remote images on non-HTML builders +* #3589: Support images in Data URI on non-HTML builders Bugs fixed ---------- diff --git a/doc/config.rst b/doc/config.rst index ca27f384f..5e641cbfb 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -227,7 +227,6 @@ General configuration * app.add_generic_role * app.add_source_parser * download.not_readable - * image.data_uri * image.not_readable * ref.term * ref.ref diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py index 15eaac926..8c9c03f34 100644 --- a/sphinx/builders/__init__.py +++ b/sphinx/builders/__init__.py @@ -65,6 +65,7 @@ class Builder(object): #: Image files are searched in the order in which they appear here. supported_image_types = [] # type: List[unicode] supported_remote_images = True + supported_data_uri_images = False def __init__(self, app): # type: (Sphinx) -> None diff --git a/sphinx/builders/html.py b/sphinx/builders/html.py index 0fd5a43ef..d09886309 100644 --- a/sphinx/builders/html.py +++ b/sphinx/builders/html.py @@ -103,6 +103,7 @@ class StandaloneHTMLBuilder(Builder): html_scaled_image_link = True supported_image_types = ['image/svg+xml', 'image/png', 'image/gif', 'image/jpeg'] + supported_data_uri_images = True searchindex_filename = 'searchindex.js' add_permalinks = True allow_sharp_as_current_path = True diff --git a/sphinx/environment/collectors/asset.py b/sphinx/environment/collectors/asset.py index 36c451765..3a0e1fefd 100644 --- a/sphinx/environment/collectors/asset.py +++ b/sphinx/environment/collectors/asset.py @@ -59,8 +59,6 @@ class ImageCollector(EnvironmentCollector): node['candidates'] = candidates imguri = node['uri'] if imguri.startswith('data:'): - logger.warning('image data URI found. some builders might not support', - location=node, type='image', subtype='data_uri') candidates['?'] = imguri continue elif imguri.find('://') != -1: diff --git a/sphinx/transforms/post_transforms/images.py b/sphinx/transforms/post_transforms/images.py index ab7b75837..d2c6cbe25 100644 --- a/sphinx/transforms/post_transforms/images.py +++ b/sphinx/transforms/post_transforms/images.py @@ -10,13 +10,14 @@ """ import os +from hashlib import sha1 from six import text_type from docutils import nodes from sphinx.transforms import SphinxTransform from sphinx.util import logging, requests -from sphinx.util.images import guess_mimetype +from sphinx.util.images import guess_mimetype, get_image_extension, parse_data_uri from sphinx.util.osutil import ensuredir if False: @@ -43,6 +44,11 @@ class BaseImageConverter(SphinxTransform): # type: (nodes.Node) -> None pass + @property + def imagedir(self): + # type: () -> unicode + return os.path.join(self.app.doctreedir, 'images') + class ImageDownloader(BaseImageConverter): default_priority = 100 @@ -56,14 +62,13 @@ class ImageDownloader(BaseImageConverter): def handle(self, node): # type: (nodes.Node) -> None - imgdir = os.path.join(self.app.doctreedir, 'images') basename = os.path.basename(node['uri']) if '?' in basename: basename = basename.split('?')[0] dirname = node['uri'].replace('://', '/').translate({ord("?"): u"/", ord("&"): u"/"}) - ensuredir(os.path.join(imgdir, dirname)) - path = os.path.join(imgdir, dirname, basename) + ensuredir(os.path.join(self.imagedir, dirname)) + path = os.path.join(self.imagedir, dirname, basename) try: r = requests.get(node['uri']) if r.status_code != 200: @@ -85,9 +90,43 @@ class ImageDownloader(BaseImageConverter): (node['uri'], text_type(exc))) +class DataURIExtractor(BaseImageConverter): + default_priority = 150 + + def match(self, node): + # type: (nodes.Node) -> bool + if self.app.builder.supported_data_uri_images: + return False + else: + return 'data:' in node['uri'] + + def handle(self, node): + # type: (nodes.Node) -> None + image = parse_data_uri(node['uri']) + ext = get_image_extension(image.mimetype) + if ext is None: + logger.warning('Unknown image format: %s...', node['uri'][:32], + location=node) + return + + ensuredir(os.path.join(self.imagedir, 'embeded')) + digest = sha1(image.data).hexdigest() + path = os.path.join(self.imagedir, 'embeded', digest + ext) + self.app.env.original_image_uri[path] = node['uri'] + + with open(path, 'wb') as f: + f.write(image.data) + + node['candidates'].pop('?') + node['candidates'][image.mimetype] = path + node['uri'] = path + self.app.env.images.add_file(self.env.docname, path) + + def setup(app): # type: (Sphinx) -> Dict[unicode, Any] app.add_post_transform(ImageDownloader) + app.add_post_transform(DataURIExtractor) return { 'version': 'builtin', From a43f7b47c2cd02ebad7af297822d2e1bd5aab126 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Wed, 19 Apr 2017 01:38:31 +0900 Subject: [PATCH 09/13] Change the default setting of supported_remote_images --- sphinx/builders/__init__.py | 2 +- sphinx/builders/epub.py | 1 + sphinx/builders/epub3.py | 1 + sphinx/builders/html.py | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py index 8c9c03f34..18f8b0c9c 100644 --- a/sphinx/builders/__init__.py +++ b/sphinx/builders/__init__.py @@ -64,7 +64,7 @@ class Builder(object): #: The list of MIME types of image formats supported by the builder. #: Image files are searched in the order in which they appear here. supported_image_types = [] # type: List[unicode] - supported_remote_images = True + supported_remote_images = False supported_data_uri_images = False def __init__(self, app): diff --git a/sphinx/builders/epub.py b/sphinx/builders/epub.py index b0dbd35f8..ca77856a5 100644 --- a/sphinx/builders/epub.py +++ b/sphinx/builders/epub.py @@ -119,6 +119,7 @@ class EpubBuilder(StandaloneHTMLBuilder): copysource = False supported_image_types = ['image/svg+xml', 'image/png', 'image/gif', 'image/jpeg'] + supported_remote_images = False # don't add links add_permalinks = False diff --git a/sphinx/builders/epub3.py b/sphinx/builders/epub3.py index 8d5118a6d..47b8c6bb7 100644 --- a/sphinx/builders/epub3.py +++ b/sphinx/builders/epub3.py @@ -58,6 +58,7 @@ class Epub3Builder(EpubBuilder): """ name = 'epub' + supported_remote_images = False template_dir = path.join(package_dir, 'templates', 'epub3') doctype = DOCTYPE diff --git a/sphinx/builders/html.py b/sphinx/builders/html.py index d09886309..0a1e1df2f 100644 --- a/sphinx/builders/html.py +++ b/sphinx/builders/html.py @@ -103,6 +103,7 @@ class StandaloneHTMLBuilder(Builder): html_scaled_image_link = True supported_image_types = ['image/svg+xml', 'image/png', 'image/gif', 'image/jpeg'] + supported_remote_images = True supported_data_uri_images = True searchindex_filename = 'searchindex.js' add_permalinks = True From af1defa95efe43604069f9705b9cda2bc0551116 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Wed, 19 Apr 2017 20:36:44 +0900 Subject: [PATCH 10/13] Fix style error --- sphinx/environment/adapters/asset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/environment/adapters/asset.py b/sphinx/environment/adapters/asset.py index a373205d2..ffa88a10f 100644 --- a/sphinx/environment/adapters/asset.py +++ b/sphinx/environment/adapters/asset.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ sphinx.environment.adapters.assets - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assets adapter for sphinx.environment. From 17481d85242e5230a3416f66ab9c1b8320958a8c Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Wed, 19 Apr 2017 21:44:04 +0900 Subject: [PATCH 11/13] Send If-modified-since header on downloading images --- sphinx/transforms/post_transforms/images.py | 16 ++++++++++++++-- sphinx/util/__init__.py | 14 ++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/sphinx/transforms/post_transforms/images.py b/sphinx/transforms/post_transforms/images.py index d2c6cbe25..b8b05715d 100644 --- a/sphinx/transforms/post_transforms/images.py +++ b/sphinx/transforms/post_transforms/images.py @@ -10,6 +10,7 @@ """ import os +from math import ceil from hashlib import sha1 from six import text_type @@ -17,6 +18,7 @@ from docutils import nodes from sphinx.transforms import SphinxTransform from sphinx.util import logging, requests +from sphinx.util import epoch_to_rfc1123, rfc1123_to_epoch from sphinx.util.images import guess_mimetype, get_image_extension, parse_data_uri from sphinx.util.osutil import ensuredir @@ -70,8 +72,13 @@ class ImageDownloader(BaseImageConverter): ensuredir(os.path.join(self.imagedir, dirname)) path = os.path.join(self.imagedir, dirname, basename) try: - r = requests.get(node['uri']) - if r.status_code != 200: + headers = {} + if os.path.exists(path): + timestamp = ceil(os.stat(path).st_mtime) + headers['If-Modified-Since'] = epoch_to_rfc1123(timestamp) + + r = requests.get(node['uri'], headers=headers) + if r.status_code >= 400: logger.warning('Could not fetch remote image: %s [%d]' % (node['uri'], r.status_code)) else: @@ -80,6 +87,11 @@ class ImageDownloader(BaseImageConverter): with open(path, 'wb') as f: f.write(r.content) + last_modified = r.headers.get('last-modified') + if last_modified: + timestamp = rfc1123_to_epoch(last_modified) + os.utime(path, (timestamp, timestamp)) + mimetype = guess_mimetype(path, default='*') node['candidates'].pop('?') node['candidates'][mimetype] = path diff --git a/sphinx/util/__init__.py b/sphinx/util/__init__.py index bb6c58919..8064bc68a 100644 --- a/sphinx/util/__init__.py +++ b/sphinx/util/__init__.py @@ -19,12 +19,15 @@ import posixpath import traceback import unicodedata from os import path +from time import mktime, strptime from codecs import BOM_UTF8 +from datetime import datetime from collections import deque from six import text_type, binary_type, itervalues from six.moves import range from six.moves.urllib.parse import urlsplit, urlunsplit, quote_plus, parse_qsl, urlencode +from babel.dates import format_datetime from docutils.utils import relative_path from sphinx.errors import PycodeError, SphinxParallelError, ExtensionError @@ -615,3 +618,14 @@ def status_iterator(iterable, summary, color="darkgreen", length=0, verbosity=0, yield item if l > 0: logger.info('') + + +def epoch_to_rfc1123(epoch): + """Convert datetime format epoch to RFC1123.""" + dt = datetime.fromtimestamp(epoch) + fmt = 'EEE, dd LLL yyyy hh:mm:ss' + return format_datetime(dt, fmt, locale='en') + ' GMT' + + +def rfc1123_to_epoch(rfc1123): + return mktime(strptime(rfc1123, '%a, %d %b %Y %H:%M:%S %Z')) From 3aef91764dd1dfc3ed8917ed8747fa2e93c0f4b4 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Sat, 22 Apr 2017 09:25:41 +0900 Subject: [PATCH 12/13] Store the downloaded images only if status_code == 200 --- sphinx/transforms/post_transforms/images.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sphinx/transforms/post_transforms/images.py b/sphinx/transforms/post_transforms/images.py index b8b05715d..9a232c1d4 100644 --- a/sphinx/transforms/post_transforms/images.py +++ b/sphinx/transforms/post_transforms/images.py @@ -84,8 +84,9 @@ class ImageDownloader(BaseImageConverter): else: self.app.env.original_image_uri[path] = node['uri'] - with open(path, 'wb') as f: - f.write(r.content) + if r.status_code == 200: + with open(path, 'wb') as f: + f.write(r.content) last_modified = r.headers.get('last-modified') if last_modified: From 9ad4c9125fe47602eace18e555eb4eedf0649c2a Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Sat, 22 Apr 2017 09:50:19 +0900 Subject: [PATCH 13/13] Fix module docstring --- sphinx/environment/adapters/asset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sphinx/environment/adapters/asset.py b/sphinx/environment/adapters/asset.py index ffa88a10f..02557a8c4 100644 --- a/sphinx/environment/adapters/asset.py +++ b/sphinx/environment/adapters/asset.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ - sphinx.environment.adapters.assets - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + sphinx.environment.adapters.asset + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assets adapter for sphinx.environment.