Merge pull request #5862 from tk0miya/refactor_chm_htmlescape

refactor chm_htmlescape()
This commit is contained in:
Takeshi KOMIYA 2018-12-24 14:17:41 +09:00 committed by GitHub
commit f8c6c14526
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 46 additions and 19 deletions

View File

@ -38,6 +38,7 @@ Bugs fixed
* #5834: apidoc: wrong help for ``--tocfile`` * #5834: apidoc: wrong help for ``--tocfile``
* #5800: todo: crashed if todo is defined in TextElement * #5800: todo: crashed if todo is defined in TextElement
* #5846: htmlhelp: convert hex escaping to decimal escaping in .hhc/.hhk files * #5846: htmlhelp: convert hex escaping to decimal escaping in .hhc/.hhk files
* htmlhelp: broken .hhk file generated when title contains a double quote
Testing Testing
-------- --------

View File

@ -13,10 +13,10 @@ from __future__ import print_function
import codecs import codecs
import os import os
import re
from os import path from os import path
from docutils import nodes from docutils import nodes
from six import PY3
from sphinx import addnodes from sphinx import addnodes
from sphinx.builders.html import StandaloneHTMLBuilder from sphinx.builders.html import StandaloneHTMLBuilder
@ -170,22 +170,21 @@ chm_locales = {
} }
def chm_htmlescape(*args, **kwargs): def chm_htmlescape(s, quote=None):
# type: (*Any, **Any) -> unicode # type: (unicode, bool) -> unicode
""" """
chm_htmlescape() is a wrapper of htmlescape(). chm_htmlescape() is a wrapper of html.escape().
.hhc/.hhk files don't recognize hex escaping, we need convert .hhc/.hhk files don't recognize hex escaping, we need convert
hex escaping to decimal escaping. for example: `'` -> `'` hex escaping to decimal escaping. for example: ``'`` -> ``'``
htmlescape() may generates a hex escaping `'` for single html.escape() may generates a hex escaping ``'`` for single
quote `'`, this wrapper fixes this. quote ``'``, this wrapper fixes this.
""" """
def convert(matchobj): if quote is None:
# type: (Match[unicode]) -> unicode quote = PY3 # True for py3, False for py2 (for compatibility)
codepoint = int(matchobj.group(1), 16)
return '&#%d;' % codepoint s = htmlescape(s, quote)
return re.sub(r'&#[xX]([0-9a-fA-F]+);', s = s.replace(''', ''') # re-escape as decimal
convert, return s
htmlescape(*args, **kwargs))
class HTMLHelpBuilder(StandaloneHTMLBuilder): class HTMLHelpBuilder(StandaloneHTMLBuilder):
@ -297,7 +296,7 @@ class HTMLHelpBuilder(StandaloneHTMLBuilder):
write_toc(subnode, ullevel) write_toc(subnode, ullevel)
elif isinstance(node, nodes.reference): elif isinstance(node, nodes.reference):
link = node['refuri'] link = node['refuri']
title = chm_htmlescape(node.astext()).replace('"', '"') title = chm_htmlescape(node.astext(), True)
f.write(object_sitemap % (title, link)) f.write(object_sitemap % (title, link))
elif isinstance(node, nodes.bullet_list): elif isinstance(node, nodes.bullet_list):
if ullevel != 0: if ullevel != 0:
@ -327,10 +326,9 @@ class HTMLHelpBuilder(StandaloneHTMLBuilder):
# type: (unicode, List[Tuple[unicode, unicode]], List[Tuple[unicode, List[Tuple[unicode, unicode]]]]) -> None # NOQA # type: (unicode, List[Tuple[unicode, unicode]], List[Tuple[unicode, List[Tuple[unicode, unicode]]]]) -> None # NOQA
def write_param(name, value): def write_param(name, value):
# type: (unicode, unicode) -> None # type: (unicode, unicode) -> None
item = ' <param name="%s" value="%s">\n' % \ item = ' <param name="%s" value="%s">\n' % (name, value)
(name, value)
f.write(item) f.write(item)
title = chm_htmlescape(title) title = chm_htmlescape(title, True)
f.write('<LI> <OBJECT type="text/sitemap">\n') f.write('<LI> <OBJECT type="text/sitemap">\n')
write_param('Keyword', title) write_param('Keyword', title)
if len(refs) == 0: if len(refs) == 0:

View File

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
""" """
test_build_htmlhelp test_build_htmlhelp
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
@ -9,6 +10,9 @@
import re import re
import pytest import pytest
from six import PY2
from sphinx.builders.htmlhelp import chm_htmlescape
@pytest.mark.sphinx('htmlhelp', testroot='build-htmlhelp') @pytest.mark.sphinx('htmlhelp', testroot='build-htmlhelp')
@ -22,5 +26,29 @@ def test_chm(app):
with open(hhk_path, 'rb') as f: with open(hhk_path, 'rb') as f:
data = f.read() data = f.read()
m = re.search(br'&#[xX][0-9a-fA-F]+;', data) m = re.search(br'&#[xX][0-9a-fA-F]+;', data)
assert m == None, 'Hex escaping exists in .hhk file: ' + str(m.group(0)) assert m is None, 'Hex escaping exists in .hhk file: ' + str(m.group(0))
def test_chm_htmlescape():
assert chm_htmlescape('Hello world') == 'Hello world'
assert chm_htmlescape(u'Unicode 文字') == u'Unicode 文字'
assert chm_htmlescape('&#x45') == '&amp;#x45'
if PY2:
assert chm_htmlescape('<Hello> "world"') == '&lt;Hello&gt; "world"'
assert chm_htmlescape('<Hello> "world"', True) == '&lt;Hello&gt; &quot;world&quot;'
assert chm_htmlescape('<Hello> "world"', False) == '&lt;Hello&gt; "world"'
else:
assert chm_htmlescape('<Hello> "world"') == '&lt;Hello&gt; &quot;world&quot;'
assert chm_htmlescape('<Hello> "world"', True) == '&lt;Hello&gt; &quot;world&quot;'
assert chm_htmlescape('<Hello> "world"', False) == '&lt;Hello&gt; "world"'
if PY2:
# single quotes are not escaped on py2 (following the behavior of cgi.escape())
assert chm_htmlescape("Hello 'world'") == "Hello 'world'"
assert chm_htmlescape("Hello 'world'", True) == "Hello 'world'"
assert chm_htmlescape("Hello 'world'", False) == "Hello 'world'"
else:
assert chm_htmlescape("Hello 'world'") == "Hello &#39;world&#39;"
assert chm_htmlescape("Hello 'world'", True) == "Hello &#39;world&#39;"
assert chm_htmlescape("Hello 'world'", False) == "Hello 'world'"