fix #3840: epub_uid should be XML Name

This commit is contained in:
Yoshiki Shibukawa 2017-06-30 01:21:16 +09:00
parent 1d0f667ecd
commit f0766ce4cd
7 changed files with 54 additions and 5 deletions

View File

@ -38,6 +38,7 @@ Bugs fixed
* #3873: Failure of deprecation warning mechanism of * #3873: Failure of deprecation warning mechanism of
``sphinx.util.compat.Directive`` ``sphinx.util.compat.Directive``
* #3874: Bogus warnings for "citation not referenced" for cross-file citations * #3874: Bogus warnings for "citation not referenced" for cross-file citations
* #3840: make checking ``epub_uid`` strict
Testing Testing
-------- --------

View File

@ -34,6 +34,7 @@ epub_theme = 'epub'
epub_basename = 'sphinx' epub_basename = 'sphinx'
epub_author = 'Georg Brandl' epub_author = 'Georg Brandl'
epub_publisher = 'http://sphinx-doc.org/' epub_publisher = 'http://sphinx-doc.org/'
epub_uid = 'web-site'
epub_scheme = 'url' epub_scheme = 'url'
epub_identifier = epub_publisher epub_identifier = epub_publisher
epub_pre_files = [('index.xhtml', 'Welcome')] epub_pre_files = [('index.xhtml', 'Welcome')]

View File

@ -1360,7 +1360,10 @@ the `Dublin Core metadata <http://dublincore.org/>`_.
.. confval:: epub_uid .. confval:: epub_uid
A unique identifier for the document. This is put in the Dublin Core A unique identifier for the document. This is put in the Dublin Core
metadata. You may use a random string. The default value is ``'unknown'``. metadata. You may use a
`XML's Name format <https://www.w3.org/TR/REC-xml/#NT-NameStartChar>`_ string.
You can't use hyphen, period, numbers as a first character.
The default value is ``'unknown'``.
.. confval:: epub_cover .. confval:: epub_cover

View File

@ -17,7 +17,7 @@ from collections import namedtuple
from sphinx import package_dir from sphinx import package_dir
from sphinx.config import string_classes, ENUM from sphinx.config import string_classes, ENUM
from sphinx.builders import _epub_base from sphinx.builders import _epub_base
from sphinx.util import logging from sphinx.util import logging, xmlname_checker
from sphinx.util.fileutil import copy_asset_file from sphinx.util.fileutil import copy_asset_file
if False: if False:
@ -89,8 +89,8 @@ class Epub3Builder(_epub_base.EpubBuilder):
'conf value "epub_language" (or "language") ' 'conf value "epub_language" (or "language") '
'should not be empty for EPUB3') 'should not be empty for EPUB3')
# <package> unique-identifier attribute # <package> unique-identifier attribute
if not self.app.config.epub_uid: if not xmlname_checker().match(self.app.config.epub_uid):
self.app.warn('conf value "epub_uid" should not be empty for EPUB3') self.app.warn('conf value "epub_uid" should be XML NAME for EPUB3')
# dc:title # dc:title
if not self.app.config.epub_title: if not self.app.config.epub_title:
self.app.warn( self.app.warn(

View File

@ -631,3 +631,36 @@ def epoch_to_rfc1123(epoch):
def rfc1123_to_epoch(rfc1123): def rfc1123_to_epoch(rfc1123):
return mktime(strptime(rfc1123, '%a, %d %b %Y %H:%M:%S %Z')) return mktime(strptime(rfc1123, '%a, %d %b %Y %H:%M:%S %Z'))
def xmlname_checker():
# https://www.w3.org/TR/REC-xml/#NT-Name
# Only Python 3.3 or newer support character code in regular expression
name_start_chars = [
u':', [u'A', u'Z'], u'_', [u'a', u'z'], [u'\u00C0', u'\u00D6'],
[u'\u00D8', u'\u00F6'], [u'\u00F8', u'\u02FF'], [u'\u0370', u'\u037D'],
[u'\u037F', u'\u1FFF'], [u'\u200C', u'\u200D'], [u'\u2070', u'\u218F'],
[u'\u2C00', u'\u2FEF'], [u'\u3001', u'\uD7FF'], [u'\uF900', u'\uFDCF'],
[u'\uFDF0', u'\uFFFD']]
if sys.version_info.major == 3:
name_start_chars.append([u'\U00010000', u'\U000EFFFF'])
name_chars = [
u"\\-", u"\\.", [u'0', u'9'], u'\u00B7', [u'\u0300', u'\u036F'],
[u'\u203F', u'\u2040']
]
def convert(entries, splitter=u'|'):
results = []
for entry in entries:
if isinstance(entry, list):
results.append(u'[%s]' % convert(entry, u'-'))
else:
results.append(entry)
return splitter.join(results)
start_chars_regex = convert(name_start_chars)
name_chars_regex = convert(name_chars)
return re.compile(u'(%s)(%s|%s)*' % (
start_chars_regex, start_chars_regex, name_chars_regex))

View File

@ -245,3 +245,5 @@ def test_epub_writing_mode(app):
# vertical / writing-mode (CSS) # vertical / writing-mode (CSS)
css = (app.outdir / '_static' / 'epub.css').text() css = (app.outdir / '_static' / 'epub.css').text()
assert 'writing-mode: vertical-rl;' in css assert 'writing-mode: vertical-rl;' in css

View File

@ -14,7 +14,8 @@ from mock import patch
from sphinx.util import logging from sphinx.util import logging
from sphinx.util import ( from sphinx.util import (
display_chunk, encode_uri, parselinenos, split_docinfo, status_iterator display_chunk, encode_uri, parselinenos, split_docinfo, status_iterator,
xmlname_checker
) )
from sphinx.testing.util import strip_escseq from sphinx.testing.util import strip_escseq
@ -115,3 +116,11 @@ def test_parselinenos():
parselinenos('-', 10) parselinenos('-', 10)
with pytest.raises(ValueError): with pytest.raises(ValueError):
parselinenos('3-1', 10) parselinenos('3-1', 10)
def test_xmlname_check():
checker = xmlname_checker()
assert checker.match('id-pub')
assert checker.match('webpage')
assert not checker.match('1bfda21')