Merge pull request #5862 from tk0miya/refactor_chm_htmlescape

refactor chm_htmlescape()
2025-02-25 18:55:22 -06:00 · 2018-12-24 14:17:41 +09:00 · 2018-12-24 14:17:41 +09:00 · f8c6c14526
commit f8c6c14526
parent 3f92aa7e6a 6ffe549f49
3 changed files with 46 additions and 19 deletions
--- a/1
+++ b/1
@ -38,6 +38,7 @@ Bugs fixed
 * #5834: apidoc: wrong help for ``--tocfile``
 * #5800: todo: crashed if todo is defined in TextElement
 * #5846: htmlhelp: convert hex escaping to decimal escaping in .hhc/.hhk files
 * htmlhelp: broken .hhk file generated when title contains a double quote
 Testing
 --------
--- a/sphinx/builders/htmlhelp.py
+++ b/sphinx/builders/htmlhelp.py
@ -13,10 +13,10 @@ from __future__ import print_function
 import codecs
 import os
 import re
 from os import path
 from docutils import nodes
 from six import PY3
 from sphinx import addnodes
 from sphinx.builders.html import StandaloneHTMLBuilder
@ -170,22 +170,21 @@ chm_locales = {
 }
-def chm_htmlescape(*args, **kwargs):
+def chm_htmlescape(s, quote=None):
-    # type: (*Any, **Any) -> unicode
+    # type: (unicode, bool) -> unicode
    """
-    chm_htmlescape() is a wrapper of htmlescape().
+    chm_htmlescape() is a wrapper of html.escape().
    .hhc/.hhk files don't recognize hex escaping, we need convert
-    hex escaping to decimal escaping. for example: `&#x27;` -> `&#39;`
+    hex escaping to decimal escaping. for example: ``&#x27;`` -> ``&#39;``
-    htmlescape() may generates a hex escaping `&#x27;` for single
+    html.escape() may generates a hex escaping ``&#x27;`` for single
-    quote `'`, this wrapper fixes this.
+    quote ``'``, this wrapper fixes this.
    """
-    def convert(matchobj):
+    if quote is None:
-        # type: (Match[unicode]) -> unicode
+        quote = PY3  # True for py3, False for py2  (for compatibility)
-        codepoint = int(matchobj.group(1), 16)
+
-        return '&#%d;' % codepoint
+    s = htmlescape(s, quote)
-    return re.sub(r'&#[xX]([0-9a-fA-F]+);',
+    s = s.replace('&#x27;', '&#39;')    # re-escape as decimal
-                  convert,
+    return s
                  htmlescape(*args, **kwargs))
 class HTMLHelpBuilder(StandaloneHTMLBuilder):
@ -297,7 +296,7 @@ class HTMLHelpBuilder(StandaloneHTMLBuilder):
                        write_toc(subnode, ullevel)
                elif isinstance(node, nodes.reference):
                    link = node['refuri']
-                    title = chm_htmlescape(node.astext()).replace('"', '&quot;')
+                    title = chm_htmlescape(node.astext(), True)
                    f.write(object_sitemap % (title, link))
                elif isinstance(node, nodes.bullet_list):
                    if ullevel != 0:
@ -327,10 +326,9 @@ class HTMLHelpBuilder(StandaloneHTMLBuilder):
                # type: (unicode, List[Tuple[unicode, unicode]], List[Tuple[unicode, List[Tuple[unicode, unicode]]]]) -> None  # NOQA
                def write_param(name, value):
                    # type: (unicode, unicode) -> None
-                    item = '    <param name="%s" value="%s">\n' % \
+                    item = '    <param name="%s" value="%s">\n' % (name, value)
                        (name, value)
                    f.write(item)
-                title = chm_htmlescape(title)
+                title = chm_htmlescape(title, True)
                f.write('<LI> <OBJECT type="text/sitemap">\n')
                write_param('Keyword', title)
                if len(refs) == 0:
--- a/tests/test_build_htmlhelp.py
+++ b/tests/test_build_htmlhelp.py
@ -1,3 +1,4 @@
 # -*- coding: utf-8 -*-
 """
    test_build_htmlhelp
    ~~~~~~~~~~~~~~~~~~~
@ -9,6 +10,9 @@
 import re
 import pytest
 from six import PY2
 from sphinx.builders.htmlhelp import chm_htmlescape
@pytest.mark.sphinx('htmlhelp', testroot='build-htmlhelp')
@ -22,5 +26,29 @@ def test_chm(app):
    with open(hhk_path, 'rb') as f:
        data = f.read()
    m = re.search(br'&#[xX][0-9a-fA-F]+;', data)
-    assert m == None, 'Hex escaping exists in .hhk file: ' + str(m.group(0))
+    assert m is None, 'Hex escaping exists in .hhk file: ' + str(m.group(0))
 def test_chm_htmlescape():
    assert chm_htmlescape('Hello world') == 'Hello world'
    assert chm_htmlescape(u'Unicode 文字') == u'Unicode 文字'
    assert chm_htmlescape('&#x45') == '&amp;#x45'
    if PY2:
        assert chm_htmlescape('<Hello> "world"') == '&lt;Hello&gt; "world"'
        assert chm_htmlescape('<Hello> "world"', True) == '&lt;Hello&gt; &quot;world&quot;'
        assert chm_htmlescape('<Hello> "world"', False) == '&lt;Hello&gt; "world"'
    else:
        assert chm_htmlescape('<Hello> "world"') == '&lt;Hello&gt; &quot;world&quot;'
        assert chm_htmlescape('<Hello> "world"', True) == '&lt;Hello&gt; &quot;world&quot;'
        assert chm_htmlescape('<Hello> "world"', False) == '&lt;Hello&gt; "world"'
    if PY2:
        # single quotes are not escaped on py2 (following the behavior of cgi.escape())
        assert chm_htmlescape("Hello 'world'") == "Hello 'world'"
        assert chm_htmlescape("Hello 'world'", True) == "Hello 'world'"
        assert chm_htmlescape("Hello 'world'", False) == "Hello 'world'"
    else:
        assert chm_htmlescape("Hello 'world'") == "Hello &#39;world&#39;"
        assert chm_htmlescape("Hello 'world'", True) == "Hello &#39;world&#39;"
        assert chm_htmlescape("Hello 'world'", False) == "Hello 'world'"