In translated docs, sort glossaries by translated terms

This is done by moving the sorting from the glossary directive to a transform operating after the i18n transform. Closes #9827
2025-02-25 18:55:22 -06:00 · 2022-01-11 22:40:01 +01:00 · 2022-01-11 22:40:01 +01:00 · 0269bec1ed
commit 0269bec1ed
parent 3ed92651d5
6 changed files with 125 additions and 55 deletions
--- a/doc/usage/restructuredtext/directives.rst
+++ b/doc/usage/restructuredtext/directives.rst
@ -831,6 +831,9 @@ Glossary
   .. versionchanged:: 1.4
      Index key for glossary term should be considered *experimental*.

+   .. versionchanged:: 4.4
+      In internationalized documentation, the ``:sorted:`` flag sorts
+      according to translated terms.

 Meta-information markup
 -----------------------
--- a/sphinx/domains/std.py
+++ b/sphinx/domains/std.py
@ -9,7 +9,6 @@
 """

 import re
-import unicodedata
 import warnings
 from copy import copy
 from typing import (TYPE_CHECKING, Any, Callable, Dict, Iterable, Iterator, List, Optional,
@ -336,6 +335,7 @@ class Glossary(SphinxDirective):
    def run(self) -> List[Node]:
        node = addnodes.glossary()
        node.document = self.state.document
+        node['sorted'] = ('sorted' in self.options)

        # This directive implements a custom format of the reST definition list
        # that allows multiple lines of terms before the definition.  This is
@ -400,9 +400,8 @@ class Glossary(SphinxDirective):
            was_empty = False

        # now, parse all the entries into a big definition list
-        items = []
+        items: List[nodes.definition_list_item] = []
        for terms, definition in entries:
-            termtexts: List[str] = []
            termnodes: List[Node] = []
            system_messages: List[Node] = []
            for line, source, lineno in terms:
@ -416,7 +415,6 @@ class Glossary(SphinxDirective):
                                          node_id=None, document=self.state.document)
                term.rawsource = line
                system_messages.extend(sysmsg)
-                termtexts.append(term.astext())
                termnodes.append(term)

            termnodes.extend(system_messages)
@ -426,16 +424,10 @@ class Glossary(SphinxDirective):
                self.state.nested_parse(definition, definition.items[0][1],
                                        defnode)
            termnodes.append(defnode)
-            items.append((termtexts,
-                          nodes.definition_list_item('', *termnodes)))
+            items.append(nodes.definition_list_item('', *termnodes))

-        if 'sorted' in self.options:
-            items.sort(key=lambda x:
-                       unicodedata.normalize('NFD', x[0][0].lower()))
-
-        dlist = nodes.definition_list()
+        dlist = nodes.definition_list('', *items)
        dlist['classes'].append('glossary')
-        dlist.extend(item[1] for item in items)
        node += dlist
        return messages + [node]

--- a/sphinx/transforms/init.py
+++ b/sphinx/transforms/init.py
@ -9,8 +9,9 @@
 """

 import re
+import unicodedata
 import warnings
-from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Tuple, cast

 from docutils import nodes
 from docutils.nodes import Element, Node, Text
@ -405,6 +406,24 @@ class ManpageLink(SphinxTransform):
            node.attributes.update(info)


+class GlossarySorter(SphinxTransform):
+    """Sort glossaries that have the ``sorted`` flag."""
+    # This must be done after i18n, therefore not right
+    # away in the glossary directive.
+    default_priority = 500
+
+    def apply(self, **kwargs: Any) -> None:
+        for glossary in self.document.findall(addnodes.glossary):
+            if glossary["sorted"]:
+                definition_list = cast(nodes.definition_list, glossary[0])
+                definition_list[:] = sorted(
+                    definition_list,
+                    key=lambda item: unicodedata.normalize(
+                        'NFD',
+                        cast(nodes.term, item)[0].astext().lower())
+                )
+
+
 def setup(app: "Sphinx") -> Dict[str, Any]:
    app.add_transform(ApplySourceWorkaround)
    app.add_transform(ExtraTranslatableNodes)
@ -420,6 +439,7 @@ def setup(app: "Sphinx") -> Dict[str, Any]:
    app.add_transform(SphinxSmartQuotes)
    app.add_transform(DoctreeReadEvent)
    app.add_transform(ManpageLink)
+    app.add_transform(GlossarySorter)

    return {
        'version': 'builtin',
--- a/tests/roots/test-intl/glossary_terms.txt
+++ b/tests/roots/test-intl/glossary_terms.txt
@ -12,3 +12,18 @@ i18n with glossary terms
      The corresponding glossary #2

 link to :term:`Some term`.
+
+Translated glossary should be sorted by translated terms:
+
+.. glossary::
+   :sorted:
+
+   AAA
+      Define AAA
+
+   CCC
+   EEE
+      Define CCC
+
+   BBB
+      Define BBB
--- a/tests/roots/test-intl/xx/LC_MESSAGES/glossary_terms.po
+++ b/tests/roots/test-intl/xx/LC_MESSAGES/glossary_terms.po
@ -1,35 +1,59 @@
-# SOME DESCRIPTIVE TITLE.
-# Copyright (C) 2012, foof
-# This file is distributed under the same license as the foo package.
-# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
-#
-#, fuzzy
-msgid ""
-msgstr ""
-"Project-Id-Version: sphinx 1.0\n"
-"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2013-01-29 14:10+0000\n"
-"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
-"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
-"Language-Team: LANGUAGE <LL@li.org>\n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=UTF-8\n"
-"Content-Transfer-Encoding: 8bit\n"
-
-msgid "i18n with glossary terms"
-msgstr "I18N WITH GLOSSARY TERMS"
-
-msgid "Some term"
-msgstr "SOME NEW TERM"
-
-msgid "The corresponding glossary"
-msgstr "THE CORRESPONDING GLOSSARY"
-
-msgid "Some other term"
-msgstr "SOME OTHER NEW TERM"
-
-msgid "The corresponding glossary #2"
-msgstr "THE CORRESPONDING GLOSSARY #2"
-
-msgid "link to :term:`Some term`."
-msgstr "LINK TO :term:`SOME NEW TERM`."
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2012, foof
+# This file is distributed under the same license as the foo package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: sphinx 1.0\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2013-01-29 14:10+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "i18n with glossary terms"
+msgstr "I18N WITH GLOSSARY TERMS"
+
+msgid "Some term"
+msgstr "SOME NEW TERM"
+
+msgid "The corresponding glossary"
+msgstr "THE CORRESPONDING GLOSSARY"
+
+msgid "Some other term"
+msgstr "SOME OTHER NEW TERM"
+
+msgid "The corresponding glossary #2"
+msgstr "THE CORRESPONDING GLOSSARY #2"
+
+msgid "link to :term:`Some term`."
+msgstr "LINK TO :term:`SOME NEW TERM`."
+
+msgid "Translated glossary should be sorted by translated terms:"
+msgstr "TRANSLATED GLOSSARY SHOULD BE SORTED BY TRANSLATED TERMS:"
+
+msgid "BBB"
+msgstr "TRANSLATED TERM XXX"
+
+msgid "Define BBB"
+msgstr "DEFINE XXX"
+
+msgid "AAA"
+msgstr "TRANSLATED TERM YYY"
+
+msgid "Define AAA"
+msgstr "DEFINE YYY"
+
+msgid "CCC"
+msgstr "TRANSLATED TERM ZZZ"
+
+msgid "EEE"
+msgstr "VVV"
+
+msgid "Define CCC"
+msgstr "DEFINE ZZZ"
--- a/tests/test_intl.py
+++ b/tests/test_intl.py
@ -241,13 +241,29 @@ def test_text_glossary_term(app, warning):
    app.build()
    # --- glossary terms: regression test for #1090
    result = (app.outdir / 'glossary_terms.txt').read_text()
-    expect = ("18. I18N WITH GLOSSARY TERMS"
-              "\n****************************\n"
-              "\nSOME NEW TERM"
-              "\n   THE CORRESPONDING GLOSSARY\n"
-              "\nSOME OTHER NEW TERM"
-              "\n   THE CORRESPONDING GLOSSARY #2\n"
-              "\nLINK TO *SOME NEW TERM*.\n")
+    expect = (r"""18. I18N WITH GLOSSARY TERMS
+****************************
+
+SOME NEW TERM
+   THE CORRESPONDING GLOSSARY
+
+SOME OTHER NEW TERM
+   THE CORRESPONDING GLOSSARY #2
+
+LINK TO *SOME NEW TERM*.
+
+TRANSLATED GLOSSARY SHOULD BE SORTED BY TRANSLATED TERMS:
+
+TRANSLATED TERM XXX
+   DEFINE XXX
+
+TRANSLATED TERM YYY
+   DEFINE YYY
+
+TRANSLATED TERM ZZZ
+VVV
+   DEFINE ZZZ
+""")
    assert result == expect
    warnings = getwarning(warning)
    assert 'term not in glossary' not in warnings