diff --git a/CHANGES b/CHANGES index 9ad92038d..4228c27c2 100644 --- a/CHANGES +++ b/CHANGES @@ -38,6 +38,7 @@ Bugs fixed * #3873: Failure of deprecation warning mechanism of ``sphinx.util.compat.Directive`` * #3874: Bogus warnings for "citation not referenced" for cross-file citations +* #3840: make checking ``epub_uid`` strict Testing -------- diff --git a/doc/conf.py b/doc/conf.py index 3cde5bf3b..62c5c13d5 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -34,6 +34,7 @@ epub_theme = 'epub' epub_basename = 'sphinx' epub_author = 'Georg Brandl' epub_publisher = 'http://sphinx-doc.org/' +epub_uid = 'web-site' epub_scheme = 'url' epub_identifier = epub_publisher epub_pre_files = [('index.xhtml', 'Welcome')] diff --git a/doc/config.rst b/doc/config.rst index 3aab2e7b2..a094195e9 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -1360,7 +1360,10 @@ the `Dublin Core metadata `_. .. confval:: epub_uid A unique identifier for the document. This is put in the Dublin Core - metadata. You may use a random string. The default value is ``'unknown'``. + metadata. You may use a + `XML's Name format `_ string. + You can't use hyphen, period, numbers as a first character. + The default value is ``'unknown'``. .. confval:: epub_cover diff --git a/sphinx/builders/epub3.py b/sphinx/builders/epub3.py index 19baad344..6256b0f6d 100644 --- a/sphinx/builders/epub3.py +++ b/sphinx/builders/epub3.py @@ -17,7 +17,7 @@ from collections import namedtuple from sphinx import package_dir from sphinx.config import string_classes, ENUM from sphinx.builders import _epub_base -from sphinx.util import logging +from sphinx.util import logging, xmlname_checker from sphinx.util.fileutil import copy_asset_file if False: @@ -89,8 +89,8 @@ class Epub3Builder(_epub_base.EpubBuilder): 'conf value "epub_language" (or "language") ' 'should not be empty for EPUB3') # unique-identifier attribute - if not self.app.config.epub_uid: - self.app.warn('conf value "epub_uid" should not be empty for EPUB3') + if not xmlname_checker().match(self.app.config.epub_uid): + self.app.warn('conf value "epub_uid" should be XML NAME for EPUB3') # dc:title if not self.app.config.epub_title: self.app.warn( diff --git a/sphinx/util/__init__.py b/sphinx/util/__init__.py index 295848e40..03f8ce6a3 100644 --- a/sphinx/util/__init__.py +++ b/sphinx/util/__init__.py @@ -631,3 +631,36 @@ def epoch_to_rfc1123(epoch): def rfc1123_to_epoch(rfc1123): return mktime(strptime(rfc1123, '%a, %d %b %Y %H:%M:%S %Z')) + + +def xmlname_checker(): + # https://www.w3.org/TR/REC-xml/#NT-Name + # Only Python 3.3 or newer support character code in regular expression + name_start_chars = [ + u':', [u'A', u'Z'], u'_', [u'a', u'z'], [u'\u00C0', u'\u00D6'], + [u'\u00D8', u'\u00F6'], [u'\u00F8', u'\u02FF'], [u'\u0370', u'\u037D'], + [u'\u037F', u'\u1FFF'], [u'\u200C', u'\u200D'], [u'\u2070', u'\u218F'], + [u'\u2C00', u'\u2FEF'], [u'\u3001', u'\uD7FF'], [u'\uF900', u'\uFDCF'], + [u'\uFDF0', u'\uFFFD']] + + if sys.version_info.major == 3: + name_start_chars.append([u'\U00010000', u'\U000EFFFF']) + + name_chars = [ + u"\\-", u"\\.", [u'0', u'9'], u'\u00B7', [u'\u0300', u'\u036F'], + [u'\u203F', u'\u2040'] + ] + + def convert(entries, splitter=u'|'): + results = [] + for entry in entries: + if isinstance(entry, list): + results.append(u'[%s]' % convert(entry, u'-')) + else: + results.append(entry) + return splitter.join(results) + + start_chars_regex = convert(name_start_chars) + name_chars_regex = convert(name_chars) + return re.compile(u'(%s)(%s|%s)*' % ( + start_chars_regex, start_chars_regex, name_chars_regex)) diff --git a/tests/test_build_epub.py b/tests/test_build_epub.py index e5d86b0ed..397547734 100644 --- a/tests/test_build_epub.py +++ b/tests/test_build_epub.py @@ -245,3 +245,5 @@ def test_epub_writing_mode(app): # vertical / writing-mode (CSS) css = (app.outdir / '_static' / 'epub.css').text() assert 'writing-mode: vertical-rl;' in css + + diff --git a/tests/test_util.py b/tests/test_util.py index b0543a246..84ce44007 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -14,7 +14,8 @@ from mock import patch from sphinx.util import logging from sphinx.util import ( - display_chunk, encode_uri, parselinenos, split_docinfo, status_iterator + display_chunk, encode_uri, parselinenos, split_docinfo, status_iterator, + xmlname_checker ) from sphinx.testing.util import strip_escseq @@ -115,3 +116,11 @@ def test_parselinenos(): parselinenos('-', 10) with pytest.raises(ValueError): parselinenos('3-1', 10) + + + +def test_xmlname_check(): + checker = xmlname_checker() + assert checker.match('id-pub') + assert checker.match('webpage') + assert not checker.match('1bfda21')