From 046abd05770ad32c2df0ee43dae619148561b9d8 Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Sun, 26 Feb 2017 22:08:59 +0300 Subject: [PATCH 1/3] Make genindex support all kinds of letters, not only Latin ones Without this i.e. all names starting with a Cyrillic letter are moved into a Symbols section rather than being split into sections by letter. --- sphinx/environment/managers/indexentries.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sphinx/environment/managers/indexentries.py b/sphinx/environment/managers/indexentries.py index d4e5f05bd..d71bc9dc7 100644 --- a/sphinx/environment/managers/indexentries.py +++ b/sphinx/environment/managers/indexentries.py @@ -11,7 +11,6 @@ import re import bisect import unicodedata -import string from itertools import groupby from six import text_type @@ -114,13 +113,13 @@ class IndexEntries(EnvironmentManager): # sort the index entries; put all symbols at the front, even those # following the letters in ASCII, this is where the chr(127) comes from - def keyfunc(entry, lcletters=string.ascii_lowercase + '_'): + def keyfunc(entry): key, (void, void, category_key) = entry if category_key: # using specified category key to sort key = category_key lckey = unicodedata.normalize('NFD', key.lower()) - if lckey[0:1] in lcletters: + if lckey[0:1].isalpha() or lckey.startswith('_'): lckey = chr(127) + lckey # ensure a determinstic order *within* letters by also sorting on # the entry itself @@ -158,14 +157,14 @@ class IndexEntries(EnvironmentManager): i += 1 # group the entries by letter - def keyfunc2(item, letters=string.ascii_uppercase + '_'): + def keyfunc2(item): # hack: mutating the subitems dicts to a list in the keyfunc k, v = item v[1] = sorted((si, se) for (si, (se, void, void)) in iteritems(v[1])) if v[2] is None: # now calculate the key letter = unicodedata.normalize('NFD', k[0])[0].upper() - if letter in letters: + if letter.isalpha() or letter == '_': return letter else: # get all other symbols under one heading From d534692a0e30be043eb3f61cd7d55bf69bf4f8fd Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Mon, 27 Feb 2017 15:45:34 +0300 Subject: [PATCH 2/3] Make genindex support RTL languages If a string starts with an RTL mark, simply skip it and use the following character as the first letter. --- sphinx/environment/managers/indexentries.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sphinx/environment/managers/indexentries.py b/sphinx/environment/managers/indexentries.py index d71bc9dc7..f10edb872 100644 --- a/sphinx/environment/managers/indexentries.py +++ b/sphinx/environment/managers/indexentries.py @@ -119,6 +119,8 @@ class IndexEntries(EnvironmentManager): # using specified category key to sort key = category_key lckey = unicodedata.normalize('NFD', key.lower()) + if lckey.startswith(u'\N{RIGHT-TO-LEFT MARK}'): + lckey = lckey[1:] if lckey[0:1].isalpha() or lckey.startswith('_'): lckey = chr(127) + lckey # ensure a determinstic order *within* letters by also sorting on @@ -163,6 +165,8 @@ class IndexEntries(EnvironmentManager): v[1] = sorted((si, se) for (si, (se, void, void)) in iteritems(v[1])) if v[2] is None: # now calculate the key + if k.startswith(u'\N{RIGHT-TO-LEFT MARK}'): + k = k[1:] letter = unicodedata.normalize('NFD', k[0])[0].upper() if letter.isalpha() or letter == '_': return letter From 73cd4b038a95271d25b55dc0b063e262bb34da10 Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Mon, 27 Feb 2017 15:48:26 +0300 Subject: [PATCH 3/3] Add tests for the previous two changes --- tests/test_environment_indexentries.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/test_environment_indexentries.py b/tests/test_environment_indexentries.py index 57a3cf52f..6e69dab72 100644 --- a/tests/test_environment_indexentries.py +++ b/tests/test_environment_indexentries.py @@ -30,15 +30,25 @@ def test_create_single_index(): ('single', 'pip; install', 'id3', '', None), ('single', 'pip; upgrade', 'id4', '', None), ('single', 'Sphinx', 'id5', '', None), + ('single', u'Ель', 'id6', '', None), + ('single', u'ёлка', 'id7', '', None), + ('single', u'‏תירבע‎', 'id8', '', None), + ('single', u'9-symbol', 'id9', '', None), + ('single', u'&-symbol', 'id10', '', None), ], }) index = IndexEntries(env).create_index(dummy_builder) - assert len(index) == 3 - assert index[0] == (u'D', [(u'docutils', [[('', '#id1')], [], None])]) - assert index[1] == (u'P', [(u'pip', [[], [(u'install', [('', '#id3')]), + assert len(index) == 6 + assert index[0] == (u'Symbols', [(u'&-symbol', [[('', '#id10')], [], None]), + (u'9-symbol', [[('', '#id9')], [], None])]) + assert index[1] == (u'D', [(u'docutils', [[('', '#id1')], [], None])]) + assert index[2] == (u'P', [(u'pip', [[], [(u'install', [('', '#id3')]), (u'upgrade', [('', '#id4')])], None]), (u'Python', [[('', '#id2')], [], None])]) - assert index[2] == (u'S', [(u'Sphinx', [[('', '#id5')], [], None])]) + assert index[3] == (u'S', [(u'Sphinx', [[('', '#id5')], [], None])]) + assert index[4] == (u'Е', [(u'ёлка', [[('', '#id7')], [], None]), + (u'Ель', [[('', '#id6')], [], None])]) + assert index[5] == (u'ת', [(u'‏תירבע‎', [[('', '#id8')], [], None])]) def test_create_pair_index():