From f4ab9adf77557906972ff4ccfe61862be7f23751 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sat, 10 Sep 2022 09:04:35 +0100 Subject: [PATCH] Move XML Name pattern to ``epub3`` --- sphinx/builders/epub3.py | 18 ++++++++++++++++-- sphinx/util/__init__.py | 31 ++++++------------------------- tests/test_build_epub.py | 8 ++++++++ tests/test_util.py | 9 +-------- 4 files changed, 31 insertions(+), 35 deletions(-) diff --git a/sphinx/builders/epub3.py b/sphinx/builders/epub3.py index adb1aaac1..9b01eec8a 100644 --- a/sphinx/builders/epub3.py +++ b/sphinx/builders/epub3.py @@ -6,6 +6,7 @@ Originally derived from epub.py. from __future__ import annotations import html +import re from os import path from typing import Any, NamedTuple @@ -14,7 +15,7 @@ from sphinx.application import Sphinx from sphinx.builders import _epub_base from sphinx.config import ENUM, Config from sphinx.locale import __ -from sphinx.util import logging, xmlname_checker +from sphinx.util import logging from sphinx.util.fileutil import copy_asset_file from sphinx.util.i18n import format_date from sphinx.util.osutil import make_filename @@ -50,6 +51,19 @@ HTML_TAG = ( 'xmlns:epub="http://www.idpf.org/2007/ops">' ) +# https://www.w3.org/TR/REC-xml/#NT-Name +_xml_name_start_char = ( + ':|[A-Z]|_|[a-z]|[\u00C0-\u00D6]' + '|[\u00D8-\u00F6]|[\u00F8-\u02FF]|[\u0370-\u037D]' + '|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]' + '|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]' + '|[\uFDF0-\uFFFD]|[\U00010000-\U000EFFFF]' +) +_xml_name_char = ( + _xml_name_start_char + r'\-|\.' '|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040]' +) +_XML_NAME_PATTERN = re.compile(f'({_xml_name_start_char})({_xml_name_char})*') + class Epub3Builder(_epub_base.EpubBuilder): """ @@ -187,7 +201,7 @@ def validate_config_values(app: Sphinx) -> None: logger.warning(__('conf value "epub_language" (or "language") ' 'should not be empty for EPUB3')) # unique-identifier attribute - if not xmlname_checker().match(app.config.epub_uid): + if not _XML_NAME_PATTERN.match(app.config.epub_uid): logger.warning(__('conf value "epub_uid" should be XML NAME for EPUB3')) # dc:title if not app.config.epub_title: diff --git a/sphinx/util/__init__.py b/sphinx/util/__init__.py index 837e41af8..eaa007b1a 100644 --- a/sphinx/util/__init__.py +++ b/sphinx/util/__init__.py @@ -371,32 +371,11 @@ def isurl(url: str) -> bool: return bool(url) and '://' in url -def xmlname_checker() -> re.Pattern: - # https://www.w3.org/TR/REC-xml/#NT-Name - name_start_chars = [ - ':', ['A', 'Z'], '_', ['a', 'z'], ['\u00C0', '\u00D6'], - ['\u00D8', '\u00F6'], ['\u00F8', '\u02FF'], ['\u0370', '\u037D'], - ['\u037F', '\u1FFF'], ['\u200C', '\u200D'], ['\u2070', '\u218F'], - ['\u2C00', '\u2FEF'], ['\u3001', '\uD7FF'], ['\uF900', '\uFDCF'], - ['\uFDF0', '\uFFFD'], ['\U00010000', '\U000EFFFF']] +def _xml_name_checker(): + # to prevent import cycles + from sphinx.builders.epub3 import _XML_NAME_PATTERN - name_chars = [ - "\\-", "\\.", ['0', '9'], '\u00B7', ['\u0300', '\u036F'], - ['\u203F', '\u2040'] - ] - - def convert(entries: Any, splitter: str = '|') -> str: - results = [] - for entry in entries: - if isinstance(entry, list): - results.append('[%s]' % convert(entry, '-')) - else: - results.append(entry) - return splitter.join(results) - - start_chars_regex = convert(name_start_chars) - name_chars_regex = convert(name_chars) - return re.compile(f'({start_chars_regex})({start_chars_regex}|{name_chars_regex})*') + return _XML_NAME_PATTERN deprecated_alias('sphinx.util', @@ -410,6 +389,7 @@ deprecated_alias('sphinx.util', 'rfc1123_to_epoch': _http_date.rfc1123_to_epoch, 'save_traceback': _exceptions.save_traceback, 'format_exception_cut_frames': _exceptions.format_exception_cut_frames, + 'xmlname_checker': _xml_name_checker, }, RemovedInSphinx70Warning, { @@ -422,4 +402,5 @@ deprecated_alias('sphinx.util', 'rfc1123_to_epoch': 'sphinx.http_date.rfc1123_to_epoch', 'save_traceback': 'sphinx.exceptions.save_traceback', 'format_exception_cut_frames': 'sphinx.exceptions.format_exception_cut_frames', # NoQA: E501 + 'xmlname_checker': 'sphinx.builders.epub3._XML_NAME_PATTERN', }) diff --git a/tests/test_build_epub.py b/tests/test_build_epub.py index becde92cd..a50c51e25 100644 --- a/tests/test_build_epub.py +++ b/tests/test_build_epub.py @@ -7,6 +7,8 @@ from xml.etree import ElementTree import pytest +from sphinx.builders.epub3 import _XML_NAME_PATTERN + # check given command is runnable def runnable(command): @@ -382,3 +384,9 @@ def test_run_epubcheck(app): print(exc.stdout.decode('utf-8')) print(exc.stderr.decode('utf-8')) raise AssertionError('epubcheck exited with return code %s' % exc.returncode) + + +def test_xml_name_pattern_check(): + assert _XML_NAME_PATTERN.match('id-pub') + assert _XML_NAME_PATTERN.match('webpage') + assert not _XML_NAME_PATTERN.match('1bfda21') diff --git a/tests/test_util.py b/tests/test_util.py index 226b5b4ed..bb4f10a8c 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -6,7 +6,7 @@ import tempfile import pytest from sphinx.errors import ExtensionError -from sphinx.util import encode_uri, ensuredir, import_object, parselinenos, xmlname_checker +from sphinx.util import encode_uri, ensuredir, import_object, parselinenos def test_encode_uri(): @@ -75,10 +75,3 @@ def test_parselinenos(): parselinenos('-', 10) with pytest.raises(ValueError): parselinenos('3-1', 10) - - -def test_xmlname_check(): - checker = xmlname_checker() - assert checker.match('id-pub') - assert checker.match('webpage') - assert not checker.match('1bfda21')