Merge pull request #3470 from mitya57/stable

Make genindex support all kinds of letters, not only Latin ones
2025-02-25 18:55:22 -06:00 · 2017-03-01 02:31:32 +09:00
parent 7d0ac36e2d 73cd4b038a
commit 1e9c791285
2 changed files with 22 additions and 9 deletions
--- a/sphinx/environment/managers/indexentries.py
+++ b/sphinx/environment/managers/indexentries.py
@@ -11,7 +11,6 @@
 import re
 import bisect
 import unicodedata
-import string
 from itertools import groupby

 from six import text_type
@@ -114,13 +113,15 @@ class IndexEntries(EnvironmentManager):

        # sort the index entries; put all symbols at the front, even those
        # following the letters in ASCII, this is where the chr(127) comes from
-        def keyfunc(entry, lcletters=string.ascii_lowercase + '_'):
+        def keyfunc(entry):
            key, (void, void, category_key) = entry
            if category_key:
                # using specified category key to sort
                key = category_key
            lckey = unicodedata.normalize('NFD', key.lower())
-            if lckey[0:1] in lcletters:
+            if lckey.startswith(u'\N{RIGHT-TO-LEFT MARK}'):
+                lckey = lckey[1:]
+            if lckey[0:1].isalpha() or lckey.startswith('_'):
                lckey = chr(127) + lckey
            # ensure a determinstic order *within* letters by also sorting on
            # the entry itself
@@ -158,14 +159,16 @@ class IndexEntries(EnvironmentManager):
                i += 1

        # group the entries by letter
-        def keyfunc2(item, letters=string.ascii_uppercase + '_'):
+        def keyfunc2(item):
            # hack: mutating the subitems dicts to a list in the keyfunc
            k, v = item
            v[1] = sorted((si, se) for (si, (se, void, void)) in iteritems(v[1]))
            if v[2] is None:
                # now calculate the key
+                if k.startswith(u'\N{RIGHT-TO-LEFT MARK}'):
+                    k = k[1:]
                letter = unicodedata.normalize('NFD', k[0])[0].upper()
-                if letter in letters:
+                if letter.isalpha() or letter == '_':
                    return letter
                else:
                    # get all other symbols under one heading
--- a/tests/test_environment_indexentries.py
+++ b/tests/test_environment_indexentries.py
@@ -30,15 +30,25 @@ def test_create_single_index():
            ('single', 'pip; install', 'id3', '', None),
            ('single', 'pip; upgrade', 'id4', '', None),
            ('single', 'Sphinx', 'id5', '', None),
+            ('single', u'Ель', 'id6', '', None),
+            ('single', u'ёлка', 'id7', '', None),
+            ('single', u'‏תירבע‎', 'id8', '', None),
+            ('single', u'9-symbol', 'id9', '', None),
+            ('single', u'&-symbol', 'id10', '', None),
        ],
    })
    index = IndexEntries(env).create_index(dummy_builder)
-    assert len(index) == 3
-    assert index[0] == (u'D', [(u'docutils', [[('', '#id1')], [], None])])
-    assert index[1] == (u'P', [(u'pip', [[], [(u'install', [('', '#id3')]),
+    assert len(index) == 6
+    assert index[0] == (u'Symbols', [(u'&-symbol', [[('', '#id10')], [], None]),
+                                     (u'9-symbol', [[('', '#id9')], [], None])])
+    assert index[1] == (u'D', [(u'docutils', [[('', '#id1')], [], None])])
+    assert index[2] == (u'P', [(u'pip', [[], [(u'install', [('', '#id3')]),
                                              (u'upgrade', [('', '#id4')])], None]),
                               (u'Python', [[('', '#id2')], [], None])])
-    assert index[2] == (u'S', [(u'Sphinx', [[('', '#id5')], [], None])])
+    assert index[3] == (u'S', [(u'Sphinx', [[('', '#id5')], [], None])])
+    assert index[4] == (u'Е', [(u'ёлка', [[('', '#id7')], [], None]),
+                               (u'Ель', [[('', '#id6')], [], None])])
+    assert index[5] == (u'ת', [(u'‏תירבע‎', [[('', '#id8')], [], None])])


 def test_create_pair_index():