Do not replace unicode characters by LaTeX macros on unicode supported LaTeX engines

This commit is contained in:
Takeshi KOMIYA 2019-10-20 22:29:46 +09:00
parent a499b7d83e
commit 56bbb08b2c
7 changed files with 70 additions and 10 deletions

View File

@ -28,6 +28,8 @@ Bugs fixed
.. _latex3/latex2e#173: https://github.com/latex3/latex2e/issues/173
* #6618: LaTeX: Avoid section names at the end of a page
* #6738: LaTeX: Do not replace unicode characters by LaTeX macros on unicode
supported LaTeX engines
* #6704: linkcheck: Be defensive and handle newly defined HTTP error code
* #6655: image URLs containing ``data:`` causes gettext builder crashed
* #6584: i18n: Error when compiling message catalogs on Hindi

View File

@ -63,14 +63,14 @@ class SphinxRenderer(FileRenderer):
class LaTeXRenderer(SphinxRenderer):
def __init__(self, template_path: str = None) -> None:
def __init__(self, template_path: str = None, latex_engine: str = None) -> None:
if template_path is None:
template_path = os.path.join(package_dir, 'templates', 'latex')
super().__init__(template_path)
# use texescape as escape filter
self.env.filters['e'] = texescape.escape
self.env.filters['escape'] = texescape.escape
self.env.filters['e'] = texescape.get_escape_func(latex_engine)
self.env.filters['escape'] = texescape.get_escape_func(latex_engine)
self.env.filters['eabbr'] = texescape.escape_abbr
# use JSP/eRuby like tagging instead because curly bracket; the default

View File

@ -9,7 +9,7 @@
"""
import re
from typing import Dict
from typing import Callable, Dict
tex_replacements = [
# map TeX special chars
@ -46,6 +46,14 @@ tex_replacements = [
('|', r'\textbar{}'),
('', r'e'),
('', r'i'),
# Greek alphabet not escaped: pdflatex handles it via textalpha and inputenc
# OHM SIGN U+2126 is handled by LaTeX textcomp package
]
# A map Unicode characters to LaTeX representation
# (for LaTeX engines which don't support unicode)
unicode_tex_replacements = [
# superscript
('', r'\(\sp{\text{0}}\)'),
('¹', r'\(\sp{\text{1}}\)'),
('²', r'\(\sp{\text{2}}\)'),
@ -56,6 +64,7 @@ tex_replacements = [
('', r'\(\sp{\text{7}}\)'),
('', r'\(\sp{\text{8}}\)'),
('', r'\(\sp{\text{9}}\)'),
# subscript
('', r'\(\sb{\text{0}}\)'),
('', r'\(\sb{\text{1}}\)'),
('', r'\(\sb{\text{2}}\)'),
@ -66,20 +75,32 @@ tex_replacements = [
('', r'\(\sb{\text{7}}\)'),
('', r'\(\sb{\text{8}}\)'),
('', r'\(\sb{\text{9}}\)'),
# Greek alphabet not escaped: pdflatex handles it via textalpha and inputenc
# OHM SIGN U+2126 is handled by LaTeX textcomp package
]
tex_escape_map = {} # type: Dict[int, str]
tex_escape_map_without_unicode = {} # type: Dict[int, str]
tex_replace_map = {}
tex_hl_escape_map_new = {}
def get_escape_func(latex_engine: str) -> Callable[[str], str]:
"""Get escape() function for given latex_engine."""
if latex_engine in ('lualatex', 'xelatex'):
return escape_for_unicode_latex_engine
else:
return escape
def escape(s: str) -> str:
"""Escape text for LaTeX output."""
return s.translate(tex_escape_map)
def escape_for_unicode_latex_engine(s: str) -> str:
"""Escape text for unicode supporting LaTeX engine."""
return s.translate(tex_escape_map_without_unicode)
def escape_abbr(text: str) -> str:
"""Adjust spacing after abbreviations. Works with @ letter or other."""
return re.sub(r'\.(?=\s|$)', r'.\@{}', text)
@ -87,6 +108,11 @@ def escape_abbr(text: str) -> str:
def init() -> None:
for a, b in tex_replacements:
tex_escape_map[ord(a)] = b
tex_escape_map_without_unicode[ord(a)] = b
tex_replace_map[ord(a)] = '_'
for a, b in unicode_tex_replacements:
tex_escape_map[ord(a)] = b
tex_replace_map[ord(a)] = '_'

View File

@ -32,7 +32,7 @@ from sphinx.util import split_into, logging
from sphinx.util.docutils import SphinxTranslator
from sphinx.util.nodes import clean_astext, get_prev_node
from sphinx.util.template import LaTeXRenderer
from sphinx.util.texescape import escape, tex_replace_map
from sphinx.util.texescape import get_escape_func, tex_replace_map
try:
from docutils.utils.roman import toRoman
@ -500,7 +500,7 @@ class LaTeXTranslator(SphinxTranslator):
self.first_param = 0
# escape helper
self.escape = escape
self.escape = get_escape_func(self.config.latex_engine)
# sort out some elements
self.elements = self.builder.context.copy()
@ -795,13 +795,14 @@ class LaTeXTranslator(SphinxTranslator):
def render(self, template_name, variables):
# type: (str, Dict) -> str
renderer = LaTeXRenderer(latex_engine=self.config.latex_engine)
for template_dir in self.builder.config.templates_path:
template = path.join(self.builder.confdir, template_dir,
template_name)
if path.exists(template):
return LaTeXRenderer().render(template, variables)
return renderer.render(template, variables)
return LaTeXRenderer().render(template_name, variables)
return renderer.render(template_name, variables)
def visit_document(self, node):
# type: (nodes.Element) -> None

View File

View File

@ -0,0 +1,7 @@
test-latex-unicode
==================
* script small e:
* double struck italic small i:
* superscript: ⁰, ¹
* subscript: ₀, ₁

View File

@ -1437,3 +1437,27 @@ def test_index_on_title(app, status, warning):
'\\label{\\detokenize{contents:test-for-index-in-top-level-title}}'
'\\index{index@\\spxentry{index}}\n'
in result)
@pytest.mark.sphinx('latex', testroot='latex-unicode',
confoverrides={'latex_engine': 'pdflatex'})
def test_texescape_for_non_unicode_supported_engine(app, status, warning):
app.builder.build_all()
result = (app.outdir / 'python.tex').text()
print(result)
assert 'script small e: e' in result
assert 'double struck italic small i: i' in result
assert r'superscript: \(\sp{\text{0}}\), \(\sp{\text{1}}\)' in result
assert r'subscript: \(\sb{\text{0}}\), \(\sb{\text{1}}\)' in result
@pytest.mark.sphinx('latex', testroot='latex-unicode',
confoverrides={'latex_engine': 'xelatex'})
def test_texescape_for_unicode_supported_engine(app, status, warning):
app.builder.build_all()
result = (app.outdir / 'python.tex').text()
print(result)
assert 'script small e: e' in result
assert 'double struck italic small i: i' in result
assert 'superscript: ⁰, ¹' in result
assert 'subscript: ₀, ₁' in result