diff --git a/CHANGES b/CHANGES index d04da2e54..eacf8b9f4 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,11 @@ Release 1.8.0 (in development) Dependencies ------------ +* LaTeX: :confval:`latex_use_xindy`, if ``True`` (default for + ``xelatex/lualatex``), instructs ``make latexpdf`` to use :program:`xindy` + for general index. Make sure your LaTeX distribution includes it. + (refs: #5134) + Incompatible changes -------------------- @@ -39,6 +44,10 @@ Incompatible changes Please use ``app.add_js_file()`` instead. * #5072: Save environment object also with only new documents * #5035: qthelp builder allows dashes in :confval:`qthelp_namespace` +* LaTeX: with lualatex or xelatex use by default :program:`xindy` as + UTF-8 able replacement of :program:`makeindex` (refs: #5134). After + upgrading Sphinx, please clean latex build repertory of existing project + before new build. Deprecated ---------- @@ -143,7 +152,8 @@ Features added for mathjax * #4362: latex: Don't overwrite .tex file if document not changed * #1431: latex: Add alphanumeric enumerated list support -* Add :confval:`latex_use_xindy` for UTF-8 savvy indexing +* Add :confval:`latex_use_xindy` for UTF-8 savvy indexing, defaults to ``True`` + if :confval:`latex_engine` is ``'xelatex'`` or ``'lualatex'``. Bugs fixed ---------- @@ -153,7 +163,7 @@ Bugs fixed * #4945: i18n: fix lang_COUNTRY not fallback correctly for IndexBuilder. Thanks to Shengjing Zhu. * #4983: productionlist directive generates invalid IDs for the tokens -* #5132: (lualatex) PDF build fails if indexed word starts with Unicode characters +* #5132: (lualatex) PDF build fails if indexed word starts with Unicode character Testing -------- diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 99b55f67e..8db606515 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -1874,28 +1874,35 @@ information. .. confval:: latex_use_xindy - If ``True`` (default ``False``), the PDF build from the LaTeX files - created by Sphinx will use :program:`xindy` (doc__) rather than - :program:`makeindex`. This means that utf-8 initials in indexed - words will be handled correctly, and entries will be ordered - according to the rules appropriate to the :confval:`language`. - Currently, this uses :program:`texindy` and only (most) European - languages with Latin scripts are supported. For usages requiring - direct use of ``xindy``, user will have to customize the files - :file:`Makefile` and :file:`latexmkrc` which are written to LaTeX - build repertory. This can be done via - :confval:`latex_additional_files` and customized such files located - in source repertory. + If ``True``, the PDF build from the LaTeX files created by Sphinx + will use :program:`xindy` (doc__) rather than :program:`makeindex` + for preparing the index of general terms (from :rst:dir:`index` + usage). This means that words with UTF-8 characters will get + ordered correctly for the :confval:`language`. __ http://xindy.sourceforge.net/ - This option is recommended in case of :confval:`latex_engine` set - to ``xelatex`` or ``lualatex`` (it is even mandatory for the latter - as the PDF build is broken if some indexed terms start with a - non-ascii character). It is without effect in case of - :confval:`platex` (Japanese documents). Even if - :confval:`latex_engine` is left to its default, the option is - recommended as soon as indexed terms use non-ascii characters. + - This option is ignored if :confval:`latex_engine` is ``'platex'`` + (Japanese documents) as :program:`mendex` is used in that case. + + - The default is ``True`` for ``'xelatex'`` or ``'lualatex'`` as + :program:`makeindex`, if any indexed term starts with a non-ascii + character, creates ``.ind`` file containing invalid bytes for + UTF-8 encoding. With ``'lualatex'`` this then breaks the PDF + build. Notice that :program:`xindy` supports most but not + all European languages. + + - The default is ``False`` for ``'pdflatex'`` but ``True`` is + recommended for non-English documents as soon as some indexed + terms use non-ascii characters from the language script. + Cyrillic scripts are (transparently) supported with + ``'pdflatex'`` thanks to a specific Sphinx-contributed ``xindy`` + style file :file:`cyrLICRutf8.xdy`. + + As :program:`xindy` does not support the same range of languages + as ``LaTeX/babel`` does, the default :program:`makeindex` for + ``'pdflatex'`` may be preferred in some circumstances, although + the index will be ill-formed probably. .. versionadded:: 1.8 diff --git a/sphinx/builders/latex/__init__.py b/sphinx/builders/latex/__init__.py index d90feb32b..c17ead5c2 100644 --- a/sphinx/builders/latex/__init__.py +++ b/sphinx/builders/latex/__init__.py @@ -44,30 +44,66 @@ if False: from sphinx.config import Config # NOQA -XINDY_LANGUAGES = { - # currently only Latin scripts here, for use with -L option of texindy - # code, name - 'hr': 'croatian', - 'cs': 'czech', - 'da': 'danish', - 'en': 'english', - 'et': 'estonian', - 'fi': 'finnish', - 'fr': 'french', - 'de': 'german-din', # there is also german-duden - 'hu': 'hungarian', - 'it': 'italian', - 'lv': 'latvian', - 'lt': 'lithuanian', - 'nb': 'norwegian', - 'pl': 'polish', - 'pt': 'portuguese', - 'sk': 'slovak-small', # xindy recognizes slovak-small and slovak-large - 'sl': 'slovenian', - 'es': 'spanish-modern', - 'sv': 'swedish', - 'tr': 'turkish' -} +XINDY_LANG_OPTIONS = { + # language codes from docutils.writers.latex2e.Babel + # ! xindy language names may differ from those in use by LaTeX/babel + # ! xindy does not support all Latin scripts as recognized by LaTeX/babel + # ! not all xindy-supported languages appear in Babel.language_codes + # cd /usr/local/texlive/2018/texmf-dist/xindy/modules/lang + # find . -name '*utf8.xdy' + # LATIN + 'sq': '-L albanian -C utf8 ', + 'hr': '-L croatian -C utf8 ', + 'cs': '-L czech -C utf8 ', + 'da': '-L danish -C utf8 ', + 'nl': '-L dutch -C ij-as-ij-utf8 ', + 'en': '-L english -C utf8 ', + 'eo': '-L esperanto -C utf8 ', + 'et': '-L estonian -C utf8 ', + 'fi': '-L finnish -C utf8 ', + 'fr': '-L french -C utf8 ', + 'de': '-L german -C din5007-utf8 ', + 'is': '-L icelandic -C utf8 ', + 'it': '-L italian -C utf8 ', + 'la': '-L latin -C utf8 ', + 'lv': '-L latvian -C utf8 ', + 'lt': '-L lithuanian -C utf8 ', + 'dsb': '-L lower-sorbian -C utf8 ', + 'ds': '-L lower-sorbian -C utf8 ', # trick, no conflict + 'nb': '-L norwegian -C utf8 ', + 'no': '-L norwegian -C utf8 ', # and what about nynorsk? + 'pl': '-L polish -C utf8 ', + 'pt': '-L portuguese -C utf8 ', + 'ro': '-L romanian -C utf8 ', + 'sk': '-L slovak -C small-utf8 ', # there is also slovak-large + 'sl': '-L slovenian -C utf8 ', + 'es': '-L spanish -C modern-utf8 ', # there is also spanish-traditional + 'sv': '-L swedish -C utf8 ', + 'tr': '-L turkish -C utf8 ', + 'hsb': '-L upper-sorbian -C utf8 ', + 'hs': '-L upper-sorbian -C utf8 ', # trick, no conflict + 'vi': '-L vietnamese -C utf8 ', + # CYRILLIC + # for usage with pdflatex, needs also cyrLICRutf8.xdy module + 'be': '-L belarusian -C utf8 ', + 'bg': '-L bulgarian -C utf8 ', + 'mk': '-L macedonian -C utf8 ', + 'mn': '-L mongolian -C cyrillic-utf8 ', + 'ru': '-L russian -C utf8 ', + 'sr': '-L serbian -C utf8 ', + 'sh-cyrl': '-L serbian -C utf8 ', + 'sh': '-L serbian -C utf8 ', # trick, no conflict + 'uk': '-L ukrainian -C utf8 ', + # GREEK + # can work only with xelatex/lualatex, not supported by texindy+pdflatex + 'el': '-L greek -C utf8 ', + # FIXME, not compatible with [:2] slice but does Sphinx support Greek ? + 'el-polyton': '-L greek -C polytonic-utf8 ', +} # type: Dict[unicode, unicode] + +XINDY_CYRILLIC_SCRIPTS = [ + 'be', 'bg', 'mk', 'mn', 'ru', 'sr', 'sh', 'uk', +] # type: List[unicode] logger = logging.getLogger(__name__) @@ -257,14 +293,23 @@ class LaTeXBuilder(Builder): self.copy_image_files() # copy TeX support files from texinputs + # configure usage of xindy (impacts Makefile and latexmkrc) + # FIXME: convert this rather to a confval with suitable default + # according to language ? but would require extra documentation if self.config.language: - xindy_lang = \ - XINDY_LANGUAGES.get(self.config.language[:2], 'general') + xindy_lang_option = \ + XINDY_LANG_OPTIONS.get(self.config.language[:2], + '-L general -C utf8 ') + xindy_cyrillic = self.config.language[:2] in XINDY_CYRILLIC_SCRIPTS else: - xindy_lang = 'english' - context = {'latex_engine': self.config.latex_engine, - 'latex_use_xindy': self.config.latex_use_xindy, - 'xindy_lang': xindy_lang} + xindy_lang_option = '-L english -C utf8 ' + xindy_cyrillic = False + context = { + 'latex_engine': self.config.latex_engine, + 'xindy_use': self.config.latex_use_xindy, + 'xindy_lang_option': xindy_lang_option, + 'xindy_cyrillic': xindy_cyrillic, + } logger.info(bold(__('copying TeX support files...'))) staticdirname = path.join(package_dir, 'texinputs') for filename in os.listdir(staticdirname): @@ -349,6 +394,12 @@ def default_latex_docclass(config): return {} +def default_latex_use_xindy(config): + # type: (Config) -> bool + """ Better default latex_use_xindy settings for specific engines. """ + return config.latex_engine in {'xelatex', 'lualatex'} + + def setup(app): # type: (Sphinx) -> Dict[unicode, Any] app.add_builder(LaTeXBuilder) @@ -366,7 +417,7 @@ def setup(app): app.add_config_value('latex_logo', None, None, string_classes) app.add_config_value('latex_appendices', [], None) app.add_config_value('latex_use_latex_multicolumn', False, None) - app.add_config_value('latex_use_xindy', False, None) + app.add_config_value('latex_use_xindy', default_latex_use_xindy, None) app.add_config_value('latex_toplevel_sectioning', None, None, ENUM(None, 'part', 'chapter', 'section')) app.add_config_value('latex_domain_indices', True, None, [list]) diff --git a/sphinx/templates/latex/latex.tex_t b/sphinx/templates/latex/latex.tex_t index 5a158444c..d219f8525 100644 --- a/sphinx/templates/latex/latex.tex_t +++ b/sphinx/templates/latex/latex.tex_t @@ -13,6 +13,11 @@ \ifdefined\pdfpxdimen \let\sphinxpxdimen\pdfpxdimen\else\newdimen\sphinxpxdimen \fi \sphinxpxdimen=<%= pxunit %>\relax +<% if use_xindy -%> +%% turn off hyperref patch of \index as sphinx.xdy xindy module takes care of +%% suitable \hyperpage mark-up, working around hyperref-xindy incompatibility +\PassOptionsToPackage{hyperindex=false}{hyperref} +<% endif -%> <%= passoptionstopackages %> \PassOptionsToPackage{warn}{textcomp} <%= inputenc %> diff --git a/sphinx/texinputs/Makefile_t b/sphinx/texinputs/Makefile_t index 82ea29de1..1ce4b1324 100644 --- a/sphinx/texinputs/Makefile_t +++ b/sphinx/texinputs/Makefile_t @@ -22,10 +22,13 @@ export LATEXOPTS = # or on command line for faster builds. {% endif -%} LATEXMKOPTS = -{% if latex_use_xindy -%} -export XINDYOPTS = -L {{ xindy_lang }} -C utf8 -M python.xdy +{% if xindy_use -%} +export XINDYOPTS = {{ xindy_lang_option }} -M sphinx.xdy +{% if latex_engine == 'pdflatex' and xindy_cyrillic -%} +XINDYOPTS += -M cyrLICRutf8.xdy +{% endif -%} {% if latex_engine == 'xelatex' or latex_engine == 'lualatex' -%} -export XINDYOPTS += -I xelatex +XINDYOPTS += -I xelatex {% endif -%} {% endif -%} # format: pdf or dvi (used only by archive targets) diff --git a/sphinx/texinputs/cyrLICRutf8.xdy b/sphinx/texinputs/cyrLICRutf8.xdy new file mode 100644 index 000000000..86d8cd87b --- /dev/null +++ b/sphinx/texinputs/cyrLICRutf8.xdy @@ -0,0 +1,103 @@ +;; -*- coding: utf-8; mode: Lisp; -*- +;; style file for xindy +;; filename: cyrLICRutf8.xdy +;; description: style file for xindy which maps back LaTeX Internal +;; Character Representation of Cyrillic to utf-8 +;; usage: for use with pdflatex produced .idx files. +;; This module must be loaded after the rule for suppressing space +;; characters has been executed, hence after module sphinx.xdy. +;; Contributed by the Sphinx team, July 2018. +(merge-rule "\IeC{\'\CYRG}" "Ѓ" :string) +(merge-rule "\IeC{\'\CYRK}" "Ќ" :string) +(merge-rule "\IeC{\'\cyrg}" "ѓ" :string) +(merge-rule "\IeC{\'\cyrk}" "ќ" :string) +(merge-rule "\IeC{\CYRA}" "А" :string) +(merge-rule "\IeC{\CYRB}" "Б" :string) +(merge-rule "\IeC{\CYRC}" "Ц" :string) +(merge-rule "\IeC{\CYRCH}" "Ч" :string) +(merge-rule "\IeC{\CYRD}" "Д" :string) +(merge-rule "\IeC{\CYRDJE}" "Ђ" :string) +(merge-rule "\IeC{\CYRDZE}" "Ѕ" :string) +(merge-rule "\IeC{\CYRDZHE}" "Џ" :string) +(merge-rule "\IeC{\CYRE}" "Е" :string) +(merge-rule "\IeC{\CYREREV}" "Э" :string) +(merge-rule "\IeC{\CYRERY}" "Ы" :string) +(merge-rule "\IeC{\CYRF}" "Ф" :string) +(merge-rule "\IeC{\CYRG}" "Г" :string) +(merge-rule "\IeC{\CYRGUP}" "Ґ" :string) +(merge-rule "\IeC{\CYRH}" "Х" :string) +(merge-rule "\IeC{\CYRHRDSN}" "Ъ" :string) +(merge-rule "\IeC{\CYRI}" "И" :string) +(merge-rule "\IeC{\CYRIE}" "Є" :string) +(merge-rule "\IeC{\CYRII}" "І" :string) +(merge-rule "\IeC{\CYRISHRT}" "Й" :string) +(merge-rule "\IeC{\CYRJE}" "Ј" :string) +(merge-rule "\IeC{\CYRK}" "К" :string) +(merge-rule "\IeC{\CYRL}" "Л" :string) +(merge-rule "\IeC{\CYRLJE}" "Љ" :string) +(merge-rule "\IeC{\CYRM}" "М" :string) +(merge-rule "\IeC{\CYRN}" "Н" :string) +(merge-rule "\IeC{\CYRNJE}" "Њ" :string) +(merge-rule "\IeC{\CYRO}" "О" :string) +(merge-rule "\IeC{\CYRP}" "П" :string) +(merge-rule "\IeC{\CYRR}" "Р" :string) +(merge-rule "\IeC{\CYRS}" "С" :string) +(merge-rule "\IeC{\CYRSFTSN}" "Ь" :string) +(merge-rule "\IeC{\CYRSH}" "Ш" :string) +(merge-rule "\IeC{\CYRSHCH}" "Щ" :string) +(merge-rule "\IeC{\CYRT}" "Т" :string) +(merge-rule "\IeC{\CYRTSHE}" "Ћ" :string) +(merge-rule "\IeC{\CYRU}" "У" :string) +(merge-rule "\IeC{\CYRUSHRT}" "Ў" :string) +(merge-rule "\IeC{\CYRV}" "В" :string) +(merge-rule "\IeC{\CYRYA}" "Я" :string) +(merge-rule "\IeC{\CYRYI}" "Ї" :string) +(merge-rule "\IeC{\CYRYO}" "Ё" :string) +(merge-rule "\IeC{\CYRYU}" "Ю" :string) +(merge-rule "\IeC{\CYRZ}" "З" :string) +(merge-rule "\IeC{\CYRZH}" "Ж" :string) +(merge-rule "\IeC{\cyra}" "а" :string) +(merge-rule "\IeC{\cyrb}" "б" :string) +(merge-rule "\IeC{\cyrc}" "ц" :string) +(merge-rule "\IeC{\cyrch}" "ч" :string) +(merge-rule "\IeC{\cyrd}" "д" :string) +(merge-rule "\IeC{\cyrdje}" "ђ" :string) +(merge-rule "\IeC{\cyrdze}" "ѕ" :string) +(merge-rule "\IeC{\cyrdzhe}" "џ" :string) +(merge-rule "\IeC{\cyre}" "е" :string) +(merge-rule "\IeC{\cyrerev}" "э" :string) +(merge-rule "\IeC{\cyrery}" "ы" :string) +(merge-rule "\IeC{\cyrf}" "ф" :string) +(merge-rule "\IeC{\cyrg}" "г" :string) +(merge-rule "\IeC{\cyrgup}" "ґ" :string) +(merge-rule "\IeC{\cyrh}" "х" :string) +(merge-rule "\IeC{\cyrhrdsn}" "ъ" :string) +(merge-rule "\IeC{\cyri}" "и" :string) +(merge-rule "\IeC{\cyrie}" "є" :string) +(merge-rule "\IeC{\cyrii}" "і" :string) +(merge-rule "\IeC{\cyrishrt}" "й" :string) +(merge-rule "\IeC{\cyrje}" "ј" :string) +(merge-rule "\IeC{\cyrk}" "к" :string) +(merge-rule "\IeC{\cyrl}" "л" :string) +(merge-rule "\IeC{\cyrlje}" "љ" :string) +(merge-rule "\IeC{\cyrm}" "м" :string) +(merge-rule "\IeC{\cyrn}" "н" :string) +(merge-rule "\IeC{\cyrnje}" "њ" :string) +(merge-rule "\IeC{\cyro}" "о" :string) +(merge-rule "\IeC{\cyrp}" "п" :string) +(merge-rule "\IeC{\cyrr}" "р" :string) +(merge-rule "\IeC{\cyrs}" "с" :string) +(merge-rule "\IeC{\cyrsftsn}" "ь" :string) +(merge-rule "\IeC{\cyrsh}" "ш" :string) +(merge-rule "\IeC{\cyrshch}" "щ" :string) +(merge-rule "\IeC{\cyrt}" "т" :string) +(merge-rule "\IeC{\cyrtshe}" "ћ" :string) +(merge-rule "\IeC{\cyru}" "у" :string) +(merge-rule "\IeC{\cyrushrt}" "ў" :string) +(merge-rule "\IeC{\cyrv}" "в" :string) +(merge-rule "\IeC{\cyrya}" "я" :string) +(merge-rule "\IeC{\cyryi}" "ї" :string) +(merge-rule "\IeC{\cyryo}" "ё" :string) +(merge-rule "\IeC{\cyryu}" "ю" :string) +(merge-rule "\IeC{\cyrz}" "з" :string) +(merge-rule "\IeC{\cyrzh}" "ж" :string) diff --git a/sphinx/texinputs/latexmkrc_t b/sphinx/texinputs/latexmkrc_t index c0965ffe3..d52681fbd 100644 --- a/sphinx/texinputs/latexmkrc_t +++ b/sphinx/texinputs/latexmkrc_t @@ -10,8 +10,8 @@ $pdflatex = 'xelatex ' . $ENV{'LATEXOPTS'} . ' %O %S'; {% endif -%} $lualatex = 'lualatex ' . $ENV{'LATEXOPTS'} . ' %O %S'; $xelatex = 'xelatex --no-pdf ' . $ENV{'LATEXOPTS'} . ' %O %S'; -{% if latex_use_xindy -%} -$makeindex = 'texindy ' . $ENV{'XINDYOPTS'} . ' -t %B.ilg %O -o %D %S'; +{% if xindy_use -%} +$makeindex = 'xindy ' . $ENV{'XINDYOPTS'} . ' %O -o %D %S'; {% else -%} $makeindex = 'makeindex -s python.ist %O -o %D %S'; {% endif -%} diff --git a/sphinx/texinputs/python.ist b/sphinx/texinputs/python.ist index f560754c0..203fde319 100644 --- a/sphinx/texinputs/python.ist +++ b/sphinx/texinputs/python.ist @@ -3,7 +3,7 @@ headings_flag 1 heading_prefix " \\bigletter " preamble "\\begin{sphinxtheindex} -\\let\\bigletter\\sphinxstyleindexletterhead +\\let\\bigletter\\sphinxstyleindexlettergroup " diff --git a/sphinx/texinputs/python.xdy b/sphinx/texinputs/python.xdy deleted file mode 100644 index d84b9668b..000000000 --- a/sphinx/texinputs/python.xdy +++ /dev/null @@ -1,16 +0,0 @@ -(markup-index :open "\begin{sphinxtheindex} -\let\lettergroup\sphinxstyleindexletterhead -\let\lettergroupDefault\sphinxstyleindexxindygroupheadsymbols - -" - :close " - -\end{sphinxtheindex} -" - :tree) - -;; End - -;; Local Variables: -;; mode: lisp -;; End: diff --git a/sphinx/texinputs/sphinx.sty b/sphinx/texinputs/sphinx.sty index d8ee2ed65..6ee1acb90 100644 --- a/sphinx/texinputs/sphinx.sty +++ b/sphinx/texinputs/sphinx.sty @@ -470,8 +470,8 @@ {\newenvironment{sphinxtheindex}{\begin{theindex}}{\end{theindex}}}% {}% else clause of \ltx@ifundefined -% for usage with xindy string is internationalized in document preamble -\newcommand*{\sphinxsymbolsandnumbersname}{Symbols and Numbers} +% for usage with xindy: this string gets internationalized in preamble +\newcommand*{\sphinxnonalphabeticalgroupname}{} %% COLOR (general) % @@ -1596,9 +1596,10 @@ \def\sphinxstyleindexentry #1{\texttt{#1}} \def\sphinxstyleindexextra #1{ \emph{(#1)}} \def\sphinxstyleindexpageref #1{, \pageref{#1}} -\def\sphinxstyleindexletterhead #1{{\Large\sffamily#1}\nopagebreak\vspace{1mm}} -\def\sphinxstyleindexxindygroupheadsymbols - {{\Large\sffamily\sphinxsymbolsandnumbersname}\nopagebreak\vspace{1mm}} +\def\sphinxstyleindexlettergroup #1% + {{\Large\sffamily#1}\nopagebreak\vspace{1mm}} +\def\sphinxstyleindexlettergroupDefault #1% + {{\Large\sffamily\sphinxnonalphabeticalgroupname}\nopagebreak\vspace{1mm}} \protected\def\sphinxstyletopictitle #1{\textbf{#1}\par\medskip} \let\sphinxstylesidebartitle\sphinxstyletopictitle \protected\def\sphinxstyleothertitle #1{\textbf{#1}} diff --git a/sphinx/texinputs/sphinx.xdy b/sphinx/texinputs/sphinx.xdy new file mode 100644 index 000000000..d9f99dc2a --- /dev/null +++ b/sphinx/texinputs/sphinx.xdy @@ -0,0 +1,133 @@ +;;; -*- mode: lisp; coding: utf-8; -*- + +;; Unfortunately xindy is out-of-the-box hyperref-incompatible. This +;; configuration is a workaround, which requires to pass option +;; hyperindex=false to hyperref. +;; textit and emph not currently used by Sphinx LaTeX writer. +(define-attributes (("textbf" "textit" "emph" "default"))) +(markup-locref :open "\textbf{\hyperpage{" :close "}}" :attr "textbf") +(markup-locref :open "\textit{\hyperpage{" :close "}}" :attr "textit") +(markup-locref :open "\emph{\hyperpage{" :close "}}" :attr "emph") +(markup-locref :open "\hyperpage{" :close "}" :attr "default") + +(require "numeric-sort.xdy") + +;; xindy base module latex.xdy loads tex.xdy and the latter instructs +;; xindy to ignore **all** TeX macros in .idx entries, except those +;; explicitely described in merge rule. But when after applying all +;; merge rules an empty string results, xindy raises an error: + +;; ERROR: CHAR: index 0 should be less than the length of the string + +;; For example when using pdflatex with utf-8 characters the index +;; file will contain \IeC macros and they will get ignored except if +;; suitable merge rules are loaded early. The texindy script coming +;; with xindy provides this, but only for Latin scripts. The texindy +;; man page says to use rather xelatex or lualatex in case of Cyrillic +;; scripts. + +;; Sphinx contributes cyrLICRutf8.xdy to provide support for Cyrillic +;; scripts for the pdflatex engine. + +;; Another issue caused by xindy ignoring all TeX macros except those +;; explicitely declared reveals itself when attempting to index ">>>", +;; as the ">" is converted to "\textgreater{}" by Sphinx's LaTeX +;; escaping. + +;; To fix this, Sphinx does **not** use texindy, and does not even +;; load the xindy latex.xdy base module. + +;(require "latex.xdy") + +;; Rather it incorporates some suitable extracts from latex.xdy and +;; tex.xdy with additional Sphinx contributed rules. + +;;;;;;;; extracts from tex.xdy (discarding most original comments): + +;;; +;;; TeX conventions +;;; + +;; Discard leading and trailing white space. Collapse multiple white +;; space characters to blank. + +(merge-rule "^ +" "" :eregexp) +(merge-rule " +$" "" :eregexp) +(merge-rule " +" " " :eregexp) + +;; Handle TeX markup + +(merge-rule "\\([{}$%&#])" "\1" :eregexp) + +;;;;;;;; end of extracts from xindy's tex.xdy + +;;;;;;;; extracts from latex.xdy: + +;; Standard location classes: arabic and roman numbers, and alphabets. + +(define-location-class "arabic-page-numbers" ("arabic-numbers")) +(define-location-class "roman-page-numbers" ("roman-numbers-lowercase")) +(define-location-class "Roman-page-numbers" ("roman-numbers-uppercase")) +(define-location-class "alpha-page-numbers" ("alpha")) +(define-location-class "Alpha-page-numbers" ("ALPHA")) + +;; Output Markup + +(markup-letter-group-list :sep "~n~n \indexspace~n") + +(markup-indexentry :open "~n \item " :depth 0) +(markup-indexentry :open "~n \subitem " :depth 1) +(markup-indexentry :open "~n \subsubitem " :depth 2) + +(markup-locclass-list :open ", " :sep ", ") +(markup-locref-list :sep ", ") + +;;;;;;;; end of extracts from latex.xdy + +;; Sphinx additions, cf sphinx.util.texescape for rationale +;; +;; blanks are already ignored from above merge-rules, so no space +;; character after TeX control words, despite the fact that they will +;; be present in .idx file. + +(merge-rule "\\sphinxleftcurlybrace\{\}" "{") +(merge-rule "\\sphinxrightcurlybrace\{\}" "}") +(merge-rule "\\_" "_") +(merge-rule "\{\[\}" "[") +(merge-rule "\{\]\}" "]") +(merge-rule "\{\}`" "`") +(merge-rule "\\textbackslash\{\}" "\\") +(merge-rule "\\textasciitilde\{\}" "~~") +(merge-rule "\\textless\{\}" "<") +(merge-rule "\\textgreater\{\}" ">") +(merge-rule "\\textasciicircum\{\}" "^") +(merge-rule "\\P\{\}" "¶") +(merge-rule "\\S\{\}" "§") +(merge-rule "\\texteuro\{\}" "€") +(merge-rule "\\\(\\infty\\\)" "∞") +(merge-rule "\\\(\\pm\\\)" "±") +(merge-rule "\\\(\\rightarrow\\\)" "→") +(merge-rule "\\\(\\checkmark\\\)" "✓") +(merge-rule "\\textendash\{\}" "–") +(merge-rule "\\textbar\{\}" "|") + +;; This xindy module provides some basic support for "see" +(require "makeindex.xdy") + +;; This creates one-letter headings and works fine with utf-8 letters. +;; For Cyrillic and pdflatex necessitates cyrLICRutf8.xdy to be loaded too. +(require "latin-lettergroups.xdy") + +;; currently we don't (know how to easily) separate "Numbers" from +;; "Symbols" with xindy as is the case with makeindex. +(markup-index :open "\begin{sphinxtheindex} +\let\lettergroup\sphinxstyleindexlettergroup +\let\lettergroupDefault\sphinxstyleindexlettergroupDefault + +" + :close " + +\end{sphinxtheindex} +" + :tree) + diff --git a/sphinx/writers/latex.py b/sphinx/writers/latex.py index 5054d7943..3e170baa0 100644 --- a/sphinx/writers/latex.py +++ b/sphinx/writers/latex.py @@ -497,6 +497,7 @@ class LaTeXTranslator(nodes.NodeVisitor): 'release': self.encode(builder.config.release), 'author': document.settings.author, # treat as a raw LaTeX code 'indexname': _('Index'), + 'use_xindy': builder.config.latex_use_xindy, }) if not self.elements['releasename'] and self.elements['release']: self.elements.update({ @@ -675,7 +676,7 @@ class LaTeXTranslator(nodes.NodeVisitor): '\\literalblockcontinuesname', self.encode(_('continues on next page')) ) + self.babel_renewcommand( - '\\sphinxsymbolsandnumbersname', self.encode(_('Symbols and Numbers')) + '\\sphinxnonalphabeticalgroupname', self.encode(_('Non-alphabetical')) ) ) self.elements['pageautorefname'] = \ @@ -859,7 +860,7 @@ class LaTeXTranslator(nodes.NodeVisitor): def generate(content, collapsed): # type: (List[Tuple[unicode, List[Tuple[unicode, unicode, unicode, unicode, unicode]]]], bool) -> None # NOQA ret.append('\\begin{sphinxtheindex}\n') - ret.append('\\let\\bigletter\\sphinxstyleindexletterhead\n') + ret.append('\\let\\bigletter\\sphinxstyleindexlettergroup\n') for i, (letter, entries) in enumerate(content): if i > 0: ret.append('\\indexspace\n') @@ -1922,8 +1923,8 @@ class LaTeXTranslator(nodes.NodeVisitor): # type: (nodes.Node, Pattern) -> None def escape(value): value = self.encode(value) - value = value.replace(r'\{', r'{\sphinxleftcurlybrace}') - value = value.replace(r'\}', r'{\sphinxrightcurlybrace}') + value = value.replace(r'\{', r'\sphinxleftcurlybrace{}') + value = value.replace(r'\}', r'\sphinxrightcurlybrace{}') value = value.replace('"', '""') value = value.replace('@', '"@') value = value.replace('!', '"!')