fix #3840: epub_uid should be XML Name

2025-02-25 18:55:22 -06:00 · 2017-06-30 01:21:16 +09:00 · 2017-06-30 01:21:16 +09:00 · f0766ce4cd
commit f0766ce4cd
parent 1d0f667ecd
7 changed files with 54 additions and 5 deletions
--- a/1
+++ b/1
@ -38,6 +38,7 @@ Bugs fixed
 * #3873: Failure of deprecation warning mechanism of
  ``sphinx.util.compat.Directive``
 * #3874: Bogus warnings for "citation not referenced" for cross-file citations
 * #3840: make checking ``epub_uid`` strict
 Testing
 --------
--- a/doc/conf.py
+++ b/doc/conf.py
@ -34,6 +34,7 @@ epub_theme = 'epub'
 epub_basename = 'sphinx'
 epub_author = 'Georg Brandl'
 epub_publisher = 'http://sphinx-doc.org/'
 epub_uid = 'web-site'
 epub_scheme = 'url'
 epub_identifier = epub_publisher
 epub_pre_files = [('index.xhtml', 'Welcome')]
--- a/doc/config.rst
+++ b/doc/config.rst
@ -1360,7 +1360,10 @@ the `Dublin Core metadata <http://dublincore.org/>`_.
 .. confval:: epub_uid
   A unique identifier for the document.  This is put in the Dublin Core
-   metadata.  You may use a random string.  The default value is ``'unknown'``.
+   metadata.  You may use a
   `XML's Name format <https://www.w3.org/TR/REC-xml/#NT-NameStartChar>`_ string.
   You can't use hyphen, period, numbers as a first character.
   The default value is ``'unknown'``.
 .. confval:: epub_cover
--- a/sphinx/builders/epub3.py
+++ b/sphinx/builders/epub3.py
@ -17,7 +17,7 @@ from collections import namedtuple
 from sphinx import package_dir
 from sphinx.config import string_classes, ENUM
 from sphinx.builders import _epub_base
-from sphinx.util import logging
+from sphinx.util import logging, xmlname_checker
 from sphinx.util.fileutil import copy_asset_file
 if False:
@ -89,8 +89,8 @@ class Epub3Builder(_epub_base.EpubBuilder):
                'conf value "epub_language" (or "language") '
                'should not be empty for EPUB3')
        # <package> unique-identifier attribute
-        if not self.app.config.epub_uid:
+        if not xmlname_checker().match(self.app.config.epub_uid):
-            self.app.warn('conf value "epub_uid" should not be empty for EPUB3')
+            self.app.warn('conf value "epub_uid" should be XML NAME for EPUB3')
        # dc:title
        if not self.app.config.epub_title:
            self.app.warn(
--- a/sphinx/util/init.py
+++ b/sphinx/util/init.py
@ -631,3 +631,36 @@ def epoch_to_rfc1123(epoch):
 def rfc1123_to_epoch(rfc1123):
    return mktime(strptime(rfc1123, '%a, %d %b %Y %H:%M:%S %Z'))
 def xmlname_checker():
    # https://www.w3.org/TR/REC-xml/#NT-Name
    # Only Python 3.3 or newer support character code in regular expression
    name_start_chars = [
        u':', [u'A', u'Z'], u'_',  [u'a', u'z'], [u'\u00C0', u'\u00D6'],
        [u'\u00D8', u'\u00F6'], [u'\u00F8', u'\u02FF'], [u'\u0370', u'\u037D'],
        [u'\u037F', u'\u1FFF'], [u'\u200C', u'\u200D'], [u'\u2070', u'\u218F'],
        [u'\u2C00', u'\u2FEF'], [u'\u3001', u'\uD7FF'], [u'\uF900', u'\uFDCF'],
        [u'\uFDF0', u'\uFFFD']]
    if sys.version_info.major == 3:
        name_start_chars.append([u'\U00010000', u'\U000EFFFF'])
    name_chars = [
        u"\\-", u"\\.", [u'0', u'9'], u'\u00B7', [u'\u0300', u'\u036F'],
        [u'\u203F', u'\u2040']
    ]
    def convert(entries, splitter=u'|'):
        results = []
        for entry in entries:
            if isinstance(entry, list):
                results.append(u'[%s]' % convert(entry, u'-'))
            else:
                results.append(entry)
        return splitter.join(results)
    start_chars_regex = convert(name_start_chars)
    name_chars_regex = convert(name_chars)
    return re.compile(u'(%s)(%s|%s)*' % (
        start_chars_regex, start_chars_regex, name_chars_regex))
--- a/tests/test_build_epub.py
+++ b/tests/test_build_epub.py
@ -245,3 +245,5 @@ def test_epub_writing_mode(app):
    # vertical / writing-mode (CSS)
    css = (app.outdir / '_static' / 'epub.css').text()
    assert 'writing-mode: vertical-rl;' in css
--- a/tests/test_util.py
+++ b/tests/test_util.py
@ -14,7 +14,8 @@ from mock import patch
 from sphinx.util import logging
 from sphinx.util import (
-    display_chunk, encode_uri, parselinenos, split_docinfo, status_iterator
+    display_chunk, encode_uri, parselinenos, split_docinfo, status_iterator,
    xmlname_checker
 )
 from sphinx.testing.util import strip_escseq
@ -115,3 +116,11 @@ def test_parselinenos():
        parselinenos('-', 10)
    with pytest.raises(ValueError):
        parselinenos('3-1', 10)
 def test_xmlname_check():
    checker = xmlname_checker()
    assert checker.match('id-pub')
    assert checker.match('webpage')
    assert not checker.match('1bfda21')