From 56bbb08b2cc4925e4796752ac798fe0001267a49 Mon Sep 17 00:00:00 2001 From: Takeshi KOMIYA Date: Sun, 20 Oct 2019 22:29:46 +0900 Subject: [PATCH] Do not replace unicode characters by LaTeX macros on unicode supported LaTeX engines --- CHANGES | 2 ++ sphinx/util/template.py | 6 ++--- sphinx/util/texescape.py | 32 +++++++++++++++++++++--- sphinx/writers/latex.py | 9 ++++--- tests/roots/test-latex-unicode/conf.py | 0 tests/roots/test-latex-unicode/index.rst | 7 ++++++ tests/test_build_latex.py | 24 ++++++++++++++++++ 7 files changed, 70 insertions(+), 10 deletions(-) create mode 100644 tests/roots/test-latex-unicode/conf.py create mode 100644 tests/roots/test-latex-unicode/index.rst diff --git a/CHANGES b/CHANGES index 26ea1afb6..15b8368ac 100644 --- a/CHANGES +++ b/CHANGES @@ -28,6 +28,8 @@ Bugs fixed .. _latex3/latex2e#173: https://github.com/latex3/latex2e/issues/173 * #6618: LaTeX: Avoid section names at the end of a page +* #6738: LaTeX: Do not replace unicode characters by LaTeX macros on unicode + supported LaTeX engines * #6704: linkcheck: Be defensive and handle newly defined HTTP error code * #6655: image URLs containing ``data:`` causes gettext builder crashed * #6584: i18n: Error when compiling message catalogs on Hindi diff --git a/sphinx/util/template.py b/sphinx/util/template.py index fd8886944..3a43db9a5 100644 --- a/sphinx/util/template.py +++ b/sphinx/util/template.py @@ -63,14 +63,14 @@ class SphinxRenderer(FileRenderer): class LaTeXRenderer(SphinxRenderer): - def __init__(self, template_path: str = None) -> None: + def __init__(self, template_path: str = None, latex_engine: str = None) -> None: if template_path is None: template_path = os.path.join(package_dir, 'templates', 'latex') super().__init__(template_path) # use texescape as escape filter - self.env.filters['e'] = texescape.escape - self.env.filters['escape'] = texescape.escape + self.env.filters['e'] = texescape.get_escape_func(latex_engine) + self.env.filters['escape'] = texescape.get_escape_func(latex_engine) self.env.filters['eabbr'] = texescape.escape_abbr # use JSP/eRuby like tagging instead because curly bracket; the default diff --git a/sphinx/util/texescape.py b/sphinx/util/texescape.py index 408ec1253..4e7055119 100644 --- a/sphinx/util/texescape.py +++ b/sphinx/util/texescape.py @@ -9,7 +9,7 @@ """ import re -from typing import Dict +from typing import Callable, Dict tex_replacements = [ # map TeX special chars @@ -46,6 +46,14 @@ tex_replacements = [ ('|', r'\textbar{}'), ('ℯ', r'e'), ('ⅈ', r'i'), + # Greek alphabet not escaped: pdflatex handles it via textalpha and inputenc + # OHM SIGN U+2126 is handled by LaTeX textcomp package +] + +# A map Unicode characters to LaTeX representation +# (for LaTeX engines which don't support unicode) +unicode_tex_replacements = [ + # superscript ('⁰', r'\(\sp{\text{0}}\)'), ('¹', r'\(\sp{\text{1}}\)'), ('²', r'\(\sp{\text{2}}\)'), @@ -56,6 +64,7 @@ tex_replacements = [ ('⁷', r'\(\sp{\text{7}}\)'), ('⁸', r'\(\sp{\text{8}}\)'), ('⁹', r'\(\sp{\text{9}}\)'), + # subscript ('₀', r'\(\sb{\text{0}}\)'), ('₁', r'\(\sb{\text{1}}\)'), ('₂', r'\(\sb{\text{2}}\)'), @@ -66,20 +75,32 @@ tex_replacements = [ ('₇', r'\(\sb{\text{7}}\)'), ('₈', r'\(\sb{\text{8}}\)'), ('₉', r'\(\sb{\text{9}}\)'), - # Greek alphabet not escaped: pdflatex handles it via textalpha and inputenc - # OHM SIGN U+2126 is handled by LaTeX textcomp package ] tex_escape_map = {} # type: Dict[int, str] +tex_escape_map_without_unicode = {} # type: Dict[int, str] tex_replace_map = {} tex_hl_escape_map_new = {} +def get_escape_func(latex_engine: str) -> Callable[[str], str]: + """Get escape() function for given latex_engine.""" + if latex_engine in ('lualatex', 'xelatex'): + return escape_for_unicode_latex_engine + else: + return escape + + def escape(s: str) -> str: """Escape text for LaTeX output.""" return s.translate(tex_escape_map) +def escape_for_unicode_latex_engine(s: str) -> str: + """Escape text for unicode supporting LaTeX engine.""" + return s.translate(tex_escape_map_without_unicode) + + def escape_abbr(text: str) -> str: """Adjust spacing after abbreviations. Works with @ letter or other.""" return re.sub(r'\.(?=\s|$)', r'.\@{}', text) @@ -87,6 +108,11 @@ def escape_abbr(text: str) -> str: def init() -> None: for a, b in tex_replacements: + tex_escape_map[ord(a)] = b + tex_escape_map_without_unicode[ord(a)] = b + tex_replace_map[ord(a)] = '_' + + for a, b in unicode_tex_replacements: tex_escape_map[ord(a)] = b tex_replace_map[ord(a)] = '_' diff --git a/sphinx/writers/latex.py b/sphinx/writers/latex.py index 0d2b2bc97..bb84bba7a 100644 --- a/sphinx/writers/latex.py +++ b/sphinx/writers/latex.py @@ -32,7 +32,7 @@ from sphinx.util import split_into, logging from sphinx.util.docutils import SphinxTranslator from sphinx.util.nodes import clean_astext, get_prev_node from sphinx.util.template import LaTeXRenderer -from sphinx.util.texescape import escape, tex_replace_map +from sphinx.util.texescape import get_escape_func, tex_replace_map try: from docutils.utils.roman import toRoman @@ -500,7 +500,7 @@ class LaTeXTranslator(SphinxTranslator): self.first_param = 0 # escape helper - self.escape = escape + self.escape = get_escape_func(self.config.latex_engine) # sort out some elements self.elements = self.builder.context.copy() @@ -795,13 +795,14 @@ class LaTeXTranslator(SphinxTranslator): def render(self, template_name, variables): # type: (str, Dict) -> str + renderer = LaTeXRenderer(latex_engine=self.config.latex_engine) for template_dir in self.builder.config.templates_path: template = path.join(self.builder.confdir, template_dir, template_name) if path.exists(template): - return LaTeXRenderer().render(template, variables) + return renderer.render(template, variables) - return LaTeXRenderer().render(template_name, variables) + return renderer.render(template_name, variables) def visit_document(self, node): # type: (nodes.Element) -> None diff --git a/tests/roots/test-latex-unicode/conf.py b/tests/roots/test-latex-unicode/conf.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/roots/test-latex-unicode/index.rst b/tests/roots/test-latex-unicode/index.rst new file mode 100644 index 000000000..2abeca98f --- /dev/null +++ b/tests/roots/test-latex-unicode/index.rst @@ -0,0 +1,7 @@ +test-latex-unicode +================== + +* script small e: ℯ +* double struck italic small i: ⅈ +* superscript: ⁰, ¹ +* subscript: ₀, ₁ diff --git a/tests/test_build_latex.py b/tests/test_build_latex.py index 8410bbd03..80869dea3 100644 --- a/tests/test_build_latex.py +++ b/tests/test_build_latex.py @@ -1437,3 +1437,27 @@ def test_index_on_title(app, status, warning): '\\label{\\detokenize{contents:test-for-index-in-top-level-title}}' '\\index{index@\\spxentry{index}}\n' in result) + + +@pytest.mark.sphinx('latex', testroot='latex-unicode', + confoverrides={'latex_engine': 'pdflatex'}) +def test_texescape_for_non_unicode_supported_engine(app, status, warning): + app.builder.build_all() + result = (app.outdir / 'python.tex').text() + print(result) + assert 'script small e: e' in result + assert 'double struck italic small i: i' in result + assert r'superscript: \(\sp{\text{0}}\), \(\sp{\text{1}}\)' in result + assert r'subscript: \(\sb{\text{0}}\), \(\sb{\text{1}}\)' in result + + +@pytest.mark.sphinx('latex', testroot='latex-unicode', + confoverrides={'latex_engine': 'xelatex'}) +def test_texescape_for_unicode_supported_engine(app, status, warning): + app.builder.build_all() + result = (app.outdir / 'python.tex').text() + print(result) + assert 'script small e: e' in result + assert 'double struck italic small i: i' in result + assert 'superscript: ⁰, ¹' in result + assert 'subscript: ₀, ₁' in result