diff --git a/sphinx/util/images.py b/sphinx/util/images.py index 7fc89f8aa..7bb904d22 100644 --- a/sphinx/util/images.py +++ b/sphinx/util/images.py @@ -8,10 +8,16 @@ :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. :license: BSD, see LICENSE for details. """ +from __future__ import absolute_import +import base64 import imghdr import imagesize from os import path +from collections import OrderedDict + +from six import PY3, BytesIO, iteritems +from typing import NamedTuple try: from PIL import Image # check for the Python Imaging Library @@ -23,13 +29,23 @@ except ImportError: if False: # For type annotation - from typing import Dict, List, Tuple # NOQA + from typing import Dict, IO, List, Tuple # NOQA -mime_suffixes = { - '.pdf': 'application/pdf', - '.svg': 'image/svg+xml', - '.svgz': 'image/svg+xml', -} # type: Dict[unicode, unicode] +if PY3: + unicode = str # special alias for static typing... + +mime_suffixes = OrderedDict([ + ('.gif', 'image/gif'), + ('.jpg', 'image/jpeg'), + ('.png', 'image/png'), + ('.pdf', 'application/pdf'), + ('.svg', 'image/svg+xml'), + ('.svgz', 'image/svg+xml'), +]) # type: Dict[unicode, unicode] + +DataURI = NamedTuple('DataURI', [('mimetype', unicode), + ('charset', unicode), + ('data', bytes)]) def get_image_size(filename): @@ -52,15 +68,55 @@ def get_image_size(filename): return None -def guess_mimetype(filename, default=None): - # type: (unicode, unicode) -> unicode - _, ext = path.splitext(filename) +def guess_mimetype_for_stream(stream, default=None): + # type: (IO, unicode) -> unicode + imgtype = imghdr.what(stream) + if imgtype: + return 'image/' + imgtype + else: + return default + + +def guess_mimetype(filename='', content=None, default=None): + # type: (unicode, unicode, unicode) -> unicode + _, ext = path.splitext(filename.lower()) if ext in mime_suffixes: return mime_suffixes[ext] - else: + elif content: + return guess_mimetype_for_stream(BytesIO(content), default=default) + elif path.exists(filename): with open(filename, 'rb') as f: - imgtype = imghdr.what(f) - if imgtype: - return 'image/' + imgtype + return guess_mimetype_for_stream(f, default=default) return default + + +def get_image_extension(mimetype): + # type: (unicode) -> unicode + for ext, _mimetype in iteritems(mime_suffixes): + if mimetype == _mimetype: + return ext + + return None + + +def parse_data_uri(uri): + # type: (unicode) -> DataURI + if not uri.startswith('data:'): + return None + + # data:[][;charset=][;base64], + mimetype = u'text/plain' + charset = u'US-ASCII' + + properties, data = uri[5:].split(',', 1) + for prop in properties.split(';'): + if prop == 'base64': + pass # skip + elif prop.startswith('charset='): + charset = prop[8:] + elif prop: + mimetype = prop + + image_data = base64.b64decode(data) # type: ignore + return DataURI(mimetype, charset, image_data) diff --git a/tests/test_util_images.py b/tests/test_util_images.py new file mode 100644 index 000000000..45ee66c55 --- /dev/null +++ b/tests/test_util_images.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +""" + test_util_images + ~~~~~~~~~~~~~~~~ + + Test images util. + + :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" +from __future__ import print_function + +import pytest + +from sphinx.util.images import ( + get_image_size, guess_mimetype, get_image_extension, parse_data_uri +) + +from util import rootdir + + +GIF_FILENAME = rootdir / 'root' / 'img.gif' +PNG_FILENAME = rootdir / 'root' / 'img.png' +PDF_FILENAME = rootdir / 'root' / 'img.pdf' +TXT_FILENAME = rootdir / 'root' / 'contents.txt' + + +def test_get_image_size(): + assert get_image_size(GIF_FILENAME) == (200, 181) + assert get_image_size(PNG_FILENAME) == (200, 181) + assert get_image_size(PDF_FILENAME) is None + assert get_image_size(TXT_FILENAME) is None + + +def test_guess_mimetype(): + # guess by filename + assert guess_mimetype('img.png') == 'image/png' + assert guess_mimetype('img.jpg') == 'image/jpeg' + assert guess_mimetype('img.txt') is None + assert guess_mimetype('img.txt', default='text/plain') == 'text/plain' + assert guess_mimetype('no_extension') is None + assert guess_mimetype('IMG.PNG') == 'image/png' + + # guess by content + assert guess_mimetype(content=GIF_FILENAME.bytes()) == 'image/gif' + assert guess_mimetype(content=PNG_FILENAME.bytes()) == 'image/png' + assert guess_mimetype(content=PDF_FILENAME.bytes()) is None + assert guess_mimetype(content=TXT_FILENAME.bytes()) is None + assert guess_mimetype(content=TXT_FILENAME.bytes(), default='text/plain') == 'text/plain' + + # the priority of params: filename > content > default + assert guess_mimetype('img.png', + content=GIF_FILENAME.bytes(), + default='text/plain') == 'image/png' + assert guess_mimetype('no_extension', + content=GIF_FILENAME.bytes(), + default='text/plain') == 'image/gif' + assert guess_mimetype('no_extension', + content=TXT_FILENAME.bytes(), + default='text/plain') == 'text/plain' + + +def test_get_image_extension(): + assert get_image_extension('image/png') == '.png' + assert get_image_extension('image/jpeg') == '.jpg' + assert get_image_extension('image/svg+xml') == '.svg' + assert get_image_extension('text/plain') is None + + +def test_parse_data_uri(): + # standard case + uri = ("" + "//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==") + image = parse_data_uri(uri) + assert image is not None + assert image.mimetype == 'image/png' + assert image.charset == 'US-ASCII' + + # no mimetype + uri = ("data:charset=utf-8,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElE" + "QVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==") + image = parse_data_uri(uri) + assert image is not None + assert image.mimetype == 'text/plain' + assert image.charset == 'utf-8' + + # non data URI + uri = ("image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4" + "//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==") + image = parse_data_uri(uri) + assert image is None + + # invalid data URI (no properties) + with pytest.raises(ValueError): + uri = ("data:iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4" + "//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==") + parse_data_uri(uri)