Move XML Name pattern to `epub3`

This commit is contained in:
Adam Turner 2022-09-10 09:04:35 +01:00
parent 5eb79c126a
commit f4ab9adf77
4 changed files with 31 additions and 35 deletions

View File

@ -6,6 +6,7 @@ Originally derived from epub.py.
from __future__ import annotations
import html
import re
from os import path
from typing import Any, NamedTuple
@ -14,7 +15,7 @@ from sphinx.application import Sphinx
from sphinx.builders import _epub_base
from sphinx.config import ENUM, Config
from sphinx.locale import __
from sphinx.util import logging, xmlname_checker
from sphinx.util import logging
from sphinx.util.fileutil import copy_asset_file
from sphinx.util.i18n import format_date
from sphinx.util.osutil import make_filename
@ -50,6 +51,19 @@ HTML_TAG = (
'xmlns:epub="http://www.idpf.org/2007/ops">'
)
# https://www.w3.org/TR/REC-xml/#NT-Name
_xml_name_start_char = (
':|[A-Z]|_|[a-z]|[\u00C0-\u00D6]'
'|[\u00D8-\u00F6]|[\u00F8-\u02FF]|[\u0370-\u037D]'
'|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]'
'|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]'
'|[\uFDF0-\uFFFD]|[\U00010000-\U000EFFFF]'
)
_xml_name_char = (
_xml_name_start_char + r'\-|\.' '|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040]'
)
_XML_NAME_PATTERN = re.compile(f'({_xml_name_start_char})({_xml_name_char})*')
class Epub3Builder(_epub_base.EpubBuilder):
"""
@ -187,7 +201,7 @@ def validate_config_values(app: Sphinx) -> None:
logger.warning(__('conf value "epub_language" (or "language") '
'should not be empty for EPUB3'))
# <package> unique-identifier attribute
if not xmlname_checker().match(app.config.epub_uid):
if not _XML_NAME_PATTERN.match(app.config.epub_uid):
logger.warning(__('conf value "epub_uid" should be XML NAME for EPUB3'))
# dc:title
if not app.config.epub_title:

View File

@ -371,32 +371,11 @@ def isurl(url: str) -> bool:
return bool(url) and '://' in url
def xmlname_checker() -> re.Pattern:
# https://www.w3.org/TR/REC-xml/#NT-Name
name_start_chars = [
':', ['A', 'Z'], '_', ['a', 'z'], ['\u00C0', '\u00D6'],
['\u00D8', '\u00F6'], ['\u00F8', '\u02FF'], ['\u0370', '\u037D'],
['\u037F', '\u1FFF'], ['\u200C', '\u200D'], ['\u2070', '\u218F'],
['\u2C00', '\u2FEF'], ['\u3001', '\uD7FF'], ['\uF900', '\uFDCF'],
['\uFDF0', '\uFFFD'], ['\U00010000', '\U000EFFFF']]
def _xml_name_checker():
# to prevent import cycles
from sphinx.builders.epub3 import _XML_NAME_PATTERN
name_chars = [
"\\-", "\\.", ['0', '9'], '\u00B7', ['\u0300', '\u036F'],
['\u203F', '\u2040']
]
def convert(entries: Any, splitter: str = '|') -> str:
results = []
for entry in entries:
if isinstance(entry, list):
results.append('[%s]' % convert(entry, '-'))
else:
results.append(entry)
return splitter.join(results)
start_chars_regex = convert(name_start_chars)
name_chars_regex = convert(name_chars)
return re.compile(f'({start_chars_regex})({start_chars_regex}|{name_chars_regex})*')
return _XML_NAME_PATTERN
deprecated_alias('sphinx.util',
@ -410,6 +389,7 @@ deprecated_alias('sphinx.util',
'rfc1123_to_epoch': _http_date.rfc1123_to_epoch,
'save_traceback': _exceptions.save_traceback,
'format_exception_cut_frames': _exceptions.format_exception_cut_frames,
'xmlname_checker': _xml_name_checker,
},
RemovedInSphinx70Warning,
{
@ -422,4 +402,5 @@ deprecated_alias('sphinx.util',
'rfc1123_to_epoch': 'sphinx.http_date.rfc1123_to_epoch',
'save_traceback': 'sphinx.exceptions.save_traceback',
'format_exception_cut_frames': 'sphinx.exceptions.format_exception_cut_frames', # NoQA: E501
'xmlname_checker': 'sphinx.builders.epub3._XML_NAME_PATTERN',
})

View File

@ -7,6 +7,8 @@ from xml.etree import ElementTree
import pytest
from sphinx.builders.epub3 import _XML_NAME_PATTERN
# check given command is runnable
def runnable(command):
@ -382,3 +384,9 @@ def test_run_epubcheck(app):
print(exc.stdout.decode('utf-8'))
print(exc.stderr.decode('utf-8'))
raise AssertionError('epubcheck exited with return code %s' % exc.returncode)
def test_xml_name_pattern_check():
assert _XML_NAME_PATTERN.match('id-pub')
assert _XML_NAME_PATTERN.match('webpage')
assert not _XML_NAME_PATTERN.match('1bfda21')

View File

@ -6,7 +6,7 @@ import tempfile
import pytest
from sphinx.errors import ExtensionError
from sphinx.util import encode_uri, ensuredir, import_object, parselinenos, xmlname_checker
from sphinx.util import encode_uri, ensuredir, import_object, parselinenos
def test_encode_uri():
@ -75,10 +75,3 @@ def test_parselinenos():
parselinenos('-', 10)
with pytest.raises(ValueError):
parselinenos('3-1', 10)
def test_xmlname_check():
checker = xmlname_checker()
assert checker.match('id-pub')
assert checker.match('webpage')
assert not checker.match('1bfda21')