diff --git a/CHANGES b/CHANGES
index 9ad92038d..4228c27c2 100644
--- a/CHANGES
+++ b/CHANGES
@@ -38,6 +38,7 @@ Bugs fixed
* #3873: Failure of deprecation warning mechanism of
``sphinx.util.compat.Directive``
* #3874: Bogus warnings for "citation not referenced" for cross-file citations
+* #3840: make checking ``epub_uid`` strict
Testing
--------
diff --git a/doc/conf.py b/doc/conf.py
index 3cde5bf3b..62c5c13d5 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -34,6 +34,7 @@ epub_theme = 'epub'
epub_basename = 'sphinx'
epub_author = 'Georg Brandl'
epub_publisher = 'http://sphinx-doc.org/'
+epub_uid = 'web-site'
epub_scheme = 'url'
epub_identifier = epub_publisher
epub_pre_files = [('index.xhtml', 'Welcome')]
diff --git a/doc/config.rst b/doc/config.rst
index 3aab2e7b2..a094195e9 100644
--- a/doc/config.rst
+++ b/doc/config.rst
@@ -1360,7 +1360,10 @@ the `Dublin Core metadata `_.
.. confval:: epub_uid
A unique identifier for the document. This is put in the Dublin Core
- metadata. You may use a random string. The default value is ``'unknown'``.
+ metadata. You may use a
+ `XML's Name format `_ string.
+ You can't use hyphen, period, numbers as a first character.
+ The default value is ``'unknown'``.
.. confval:: epub_cover
diff --git a/sphinx/builders/epub3.py b/sphinx/builders/epub3.py
index 19baad344..6256b0f6d 100644
--- a/sphinx/builders/epub3.py
+++ b/sphinx/builders/epub3.py
@@ -17,7 +17,7 @@ from collections import namedtuple
from sphinx import package_dir
from sphinx.config import string_classes, ENUM
from sphinx.builders import _epub_base
-from sphinx.util import logging
+from sphinx.util import logging, xmlname_checker
from sphinx.util.fileutil import copy_asset_file
if False:
@@ -89,8 +89,8 @@ class Epub3Builder(_epub_base.EpubBuilder):
'conf value "epub_language" (or "language") '
'should not be empty for EPUB3')
# unique-identifier attribute
- if not self.app.config.epub_uid:
- self.app.warn('conf value "epub_uid" should not be empty for EPUB3')
+ if not xmlname_checker().match(self.app.config.epub_uid):
+ self.app.warn('conf value "epub_uid" should be XML NAME for EPUB3')
# dc:title
if not self.app.config.epub_title:
self.app.warn(
diff --git a/sphinx/util/__init__.py b/sphinx/util/__init__.py
index 295848e40..03f8ce6a3 100644
--- a/sphinx/util/__init__.py
+++ b/sphinx/util/__init__.py
@@ -631,3 +631,36 @@ def epoch_to_rfc1123(epoch):
def rfc1123_to_epoch(rfc1123):
return mktime(strptime(rfc1123, '%a, %d %b %Y %H:%M:%S %Z'))
+
+
+def xmlname_checker():
+ # https://www.w3.org/TR/REC-xml/#NT-Name
+ # Only Python 3.3 or newer support character code in regular expression
+ name_start_chars = [
+ u':', [u'A', u'Z'], u'_', [u'a', u'z'], [u'\u00C0', u'\u00D6'],
+ [u'\u00D8', u'\u00F6'], [u'\u00F8', u'\u02FF'], [u'\u0370', u'\u037D'],
+ [u'\u037F', u'\u1FFF'], [u'\u200C', u'\u200D'], [u'\u2070', u'\u218F'],
+ [u'\u2C00', u'\u2FEF'], [u'\u3001', u'\uD7FF'], [u'\uF900', u'\uFDCF'],
+ [u'\uFDF0', u'\uFFFD']]
+
+ if sys.version_info.major == 3:
+ name_start_chars.append([u'\U00010000', u'\U000EFFFF'])
+
+ name_chars = [
+ u"\\-", u"\\.", [u'0', u'9'], u'\u00B7', [u'\u0300', u'\u036F'],
+ [u'\u203F', u'\u2040']
+ ]
+
+ def convert(entries, splitter=u'|'):
+ results = []
+ for entry in entries:
+ if isinstance(entry, list):
+ results.append(u'[%s]' % convert(entry, u'-'))
+ else:
+ results.append(entry)
+ return splitter.join(results)
+
+ start_chars_regex = convert(name_start_chars)
+ name_chars_regex = convert(name_chars)
+ return re.compile(u'(%s)(%s|%s)*' % (
+ start_chars_regex, start_chars_regex, name_chars_regex))
diff --git a/tests/test_build_epub.py b/tests/test_build_epub.py
index e5d86b0ed..397547734 100644
--- a/tests/test_build_epub.py
+++ b/tests/test_build_epub.py
@@ -245,3 +245,5 @@ def test_epub_writing_mode(app):
# vertical / writing-mode (CSS)
css = (app.outdir / '_static' / 'epub.css').text()
assert 'writing-mode: vertical-rl;' in css
+
+
diff --git a/tests/test_util.py b/tests/test_util.py
index b0543a246..84ce44007 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -14,7 +14,8 @@ from mock import patch
from sphinx.util import logging
from sphinx.util import (
- display_chunk, encode_uri, parselinenos, split_docinfo, status_iterator
+ display_chunk, encode_uri, parselinenos, split_docinfo, status_iterator,
+ xmlname_checker
)
from sphinx.testing.util import strip_escseq
@@ -115,3 +116,11 @@ def test_parselinenos():
parselinenos('-', 10)
with pytest.raises(ValueError):
parselinenos('3-1', 10)
+
+
+
+def test_xmlname_check():
+ checker = xmlname_checker()
+ assert checker.match('id-pub')
+ assert checker.match('webpage')
+ assert not checker.match('1bfda21')