From 6ffe549f492dd8ed9cfcc286449a200a77bff235 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Mon, 24 Dec 2018 12:59:38 +0900 Subject: [PATCH] refactor chm_htmlescape() --- CHANGES | 1 + sphinx/builders/htmlhelp.py | 34 ++++++++++++++++------------------ tests/test_build_htmlhelp.py | 30 +++++++++++++++++++++++++++++- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/CHANGES b/CHANGES index 78bc97b23..190e00e12 100644 --- a/CHANGES +++ b/CHANGES @@ -38,6 +38,7 @@ Bugs fixed * #5834: apidoc: wrong help for ``--tocfile`` * #5800: todo: crashed if todo is defined in TextElement * #5846: htmlhelp: convert hex escaping to decimal escaping in .hhc/.hhk files +* htmlhelp: broken .hhk file generated when title contains a double quote Testing -------- diff --git a/sphinx/builders/htmlhelp.py b/sphinx/builders/htmlhelp.py index 42ee05c4a..dfb58ede6 100644 --- a/sphinx/builders/htmlhelp.py +++ b/sphinx/builders/htmlhelp.py @@ -13,10 +13,10 @@ from __future__ import print_function import codecs import os -import re from os import path from docutils import nodes +from six import PY3 from sphinx import addnodes from sphinx.builders.html import StandaloneHTMLBuilder @@ -170,22 +170,21 @@ chm_locales = { } -def chm_htmlescape(*args, **kwargs): - # type: (*Any, **Any) -> unicode +def chm_htmlescape(s, quote=None): + # type: (unicode, bool) -> unicode """ - chm_htmlescape() is a wrapper of htmlescape(). + chm_htmlescape() is a wrapper of html.escape(). .hhc/.hhk files don't recognize hex escaping, we need convert - hex escaping to decimal escaping. for example: `'` -> `'` - htmlescape() may generates a hex escaping `'` for single - quote `'`, this wrapper fixes this. + hex escaping to decimal escaping. for example: ``'`` -> ``'`` + html.escape() may generates a hex escaping ``'`` for single + quote ``'``, this wrapper fixes this. """ - def convert(matchobj): - # type: (Match[unicode]) -> unicode - codepoint = int(matchobj.group(1), 16) - return '&#%d;' % codepoint - return re.sub(r'&#[xX]([0-9a-fA-F]+);', - convert, - htmlescape(*args, **kwargs)) + if quote is None: + quote = PY3 # True for py3, False for py2 (for compatibility) + + s = htmlescape(s, quote) + s = s.replace(''', ''') # re-escape as decimal + return s class HTMLHelpBuilder(StandaloneHTMLBuilder): @@ -297,7 +296,7 @@ class HTMLHelpBuilder(StandaloneHTMLBuilder): write_toc(subnode, ullevel) elif isinstance(node, nodes.reference): link = node['refuri'] - title = chm_htmlescape(node.astext()).replace('"', '"') + title = chm_htmlescape(node.astext(), True) f.write(object_sitemap % (title, link)) elif isinstance(node, nodes.bullet_list): if ullevel != 0: @@ -327,10 +326,9 @@ class HTMLHelpBuilder(StandaloneHTMLBuilder): # type: (unicode, List[Tuple[unicode, unicode]], List[Tuple[unicode, List[Tuple[unicode, unicode]]]]) -> None # NOQA def write_param(name, value): # type: (unicode, unicode) -> None - item = ' \n' % \ - (name, value) + item = ' \n' % (name, value) f.write(item) - title = chm_htmlescape(title) + title = chm_htmlescape(title, True) f.write('
  • \n') write_param('Keyword', title) if len(refs) == 0: diff --git a/tests/test_build_htmlhelp.py b/tests/test_build_htmlhelp.py index 78c698ec4..99de2a65f 100644 --- a/tests/test_build_htmlhelp.py +++ b/tests/test_build_htmlhelp.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ test_build_htmlhelp ~~~~~~~~~~~~~~~~~~~ @@ -9,6 +10,9 @@ import re import pytest +from six import PY2 + +from sphinx.builders.htmlhelp import chm_htmlescape @pytest.mark.sphinx('htmlhelp', testroot='build-htmlhelp') @@ -22,5 +26,29 @@ def test_chm(app): with open(hhk_path, 'rb') as f: data = f.read() m = re.search(br'&#[xX][0-9a-fA-F]+;', data) - assert m == None, 'Hex escaping exists in .hhk file: ' + str(m.group(0)) + assert m is None, 'Hex escaping exists in .hhk file: ' + str(m.group(0)) + +def test_chm_htmlescape(): + assert chm_htmlescape('Hello world') == 'Hello world' + assert chm_htmlescape(u'Unicode 文字') == u'Unicode 文字' + assert chm_htmlescape('E') == '&#x45' + + if PY2: + assert chm_htmlescape(' "world"') == '<Hello> "world"' + assert chm_htmlescape(' "world"', True) == '<Hello> "world"' + assert chm_htmlescape(' "world"', False) == '<Hello> "world"' + else: + assert chm_htmlescape(' "world"') == '<Hello> "world"' + assert chm_htmlescape(' "world"', True) == '<Hello> "world"' + assert chm_htmlescape(' "world"', False) == '<Hello> "world"' + + if PY2: + # single quotes are not escaped on py2 (following the behavior of cgi.escape()) + assert chm_htmlescape("Hello 'world'") == "Hello 'world'" + assert chm_htmlescape("Hello 'world'", True) == "Hello 'world'" + assert chm_htmlescape("Hello 'world'", False) == "Hello 'world'" + else: + assert chm_htmlescape("Hello 'world'") == "Hello 'world'" + assert chm_htmlescape("Hello 'world'", True) == "Hello 'world'" + assert chm_htmlescape("Hello 'world'", False) == "Hello 'world'"