diff --git a/sphinx/builders/gettext.py b/sphinx/builders/gettext.py index 8380b4415..58fdfeddd 100644 --- a/sphinx/builders/gettext.py +++ b/sphinx/builders/gettext.py @@ -156,8 +156,8 @@ class I18nBuilder(Builder): if 'index' in self.env.config.gettext_additional_targets: # Extract translatable messages from index entries. for node, entries in traverse_translatable_index(doctree): - for typ, msg, _tid, _main, _key in entries: - for m in split_index_msg(typ, msg): + for entry_type, value, _target_id, _main, _category_key in entries: + for m in split_index_msg(entry_type, value): catalog.add(m, node) diff --git a/sphinx/domains/index.py b/sphinx/domains/index.py index 470c39f48..681d89c61 100644 --- a/sphinx/domains/index.py +++ b/sphinx/domains/index.py @@ -29,7 +29,7 @@ class IndexDomain(Domain): label = 'index' @property - def entries(self) -> dict[str, list[tuple[str, str, str, str, str]]]: + def entries(self) -> dict[str, list[tuple[str, str, str, str, str | None]]]: return self.data.setdefault('entries', {}) def clear_doc(self, docname: str) -> None: @@ -44,8 +44,8 @@ class IndexDomain(Domain): entries = self.entries.setdefault(env.docname, []) for node in list(document.findall(addnodes.index)): try: - for entry in node['entries']: - split_index_msg(entry[0], entry[1]) + for (entry_type, value, _target_id, _main, _category_key) in node['entries']: + split_index_msg(entry_type, value) except ValueError as exc: logger.warning(str(exc), location=node) node.parent.remove(node) diff --git a/sphinx/environment/adapters/indexentries.py b/sphinx/environment/adapters/indexentries.py index 6045ae3b2..e9b2fe7b8 100644 --- a/sphinx/environment/adapters/indexentries.py +++ b/sphinx/environment/adapters/indexentries.py @@ -5,10 +5,9 @@ from __future__ import annotations import re import unicodedata from itertools import groupby -from typing import Any, cast +from typing import Any, Literal from sphinx.builders import Builder -from sphinx.domains.index import IndexDomain from sphinx.environment import BuildEnvironment from sphinx.errors import NoUri from sphinx.locale import _, __ @@ -20,6 +19,7 @@ logger = logging.getLogger(__name__) class IndexEntries: def __init__(self, env: BuildEnvironment) -> None: self.env = env + self.builder: Builder def create_index(self, builder: Builder, group_entries: bool = True, _fixre: re.Pattern = re.compile(r'(.*) ([(][^()]*[)])'), @@ -27,89 +27,60 @@ class IndexEntries: """Create the real index from the collected index entries.""" new: dict[str, list] = {} - def add_entry(word: str, subword: str, main: str | None, link: bool = True, - dic: dict[str, list] = new, key: str | None = None) -> None: - # Force the word to be unicode if it's a ASCII bytestring. - # This will solve problems with unicode normalization later. - # For instance the RFC role will add bytestrings at the moment - word = str(word) - entry = dic.get(word) - if not entry: - dic[word] = entry = [[], {}, key] - if subword: - add_entry(subword, '', main, link=link, dic=entry[1], key=key) - elif link: - try: - uri = builder.get_relative_uri('genindex', fn) + '#' + tid - except NoUri: - pass - else: - entry[0].append((main, uri)) + rel_uri: str | Literal[False] + index_domain = self.env.domains['index'] + for docname, entries in index_domain.entries.items(): + try: + rel_uri = builder.get_relative_uri('genindex', docname) + except NoUri: + rel_uri = False - domain = cast(IndexDomain, self.env.get_domain('index')) - for fn, entries in domain.entries.items(): # new entry types must be listed in directives/other.py! - for type, value, tid, main, index_key in entries: # noqa: B007 + for entry_type, value, target_id, main, category_key in entries: + uri = rel_uri is not False and f'{rel_uri}#{target_id}' try: - if type == 'single': + if entry_type == 'single': try: - entry, subentry = split_into(2, 'single', value) + entry, sub_entry = split_into(2, 'single', value) except ValueError: entry, = split_into(1, 'single', value) - subentry = '' - add_entry(entry, subentry, main, key=index_key) - elif type == 'pair': + sub_entry = '' + _add_entry(entry, sub_entry, main, + dic=new, link=uri, key=category_key) + elif entry_type == 'pair': first, second = split_into(2, 'pair', value) - add_entry(first, second, main, key=index_key) - add_entry(second, first, main, key=index_key) - elif type == 'triple': + _add_entry(first, second, main, + dic=new, link=uri, key=category_key) + _add_entry(second, first, main, + dic=new, link=uri, key=category_key) + elif entry_type == 'triple': first, second, third = split_into(3, 'triple', value) - add_entry(first, second + ' ' + third, main, key=index_key) - add_entry(second, third + ', ' + first, main, key=index_key) - add_entry(third, first + ' ' + second, main, key=index_key) - elif type == 'see': + _add_entry(first, second + ' ' + third, main, + dic=new, link=uri, key=category_key) + _add_entry(second, third + ', ' + first, main, + dic=new, link=uri, key=category_key) + _add_entry(third, first + ' ' + second, main, + dic=new, link=uri, key=category_key) + elif entry_type == 'see': first, second = split_into(2, 'see', value) - add_entry(first, _('see %s') % second, None, - link=False, key=index_key) - elif type == 'seealso': + _add_entry(first, _('see %s') % second, None, + dic=new, link=False, key=category_key) + elif entry_type == 'seealso': first, second = split_into(2, 'see', value) - add_entry(first, _('see also %s') % second, None, - link=False, key=index_key) + _add_entry(first, _('see also %s') % second, None, + dic=new, link=False, key=category_key) else: - logger.warning(__('unknown index entry type %r'), type, location=fn) + logger.warning(__('unknown index entry type %r'), entry_type, + location=docname) except ValueError as err: - logger.warning(str(err), location=fn) + logger.warning(str(err), location=docname) - # sort the index entries for same keyword. - def keyfunc0(entry: tuple[str, str]) -> tuple[bool, str]: - main, uri = entry - return (not main, uri) # show main entries at first + for (targets, sub_items, _category_key) in new.values(): + targets.sort(key=_key_func_0) + for (sub_targets, _0, _sub_category_key) in sub_items.values(): + sub_targets.sort(key=_key_func_0) - for indexentry in new.values(): - indexentry[0].sort(key=keyfunc0) - for subentry in indexentry[1].values(): - subentry[0].sort(key=keyfunc0) # type: ignore - - # sort the index entries - def keyfunc(entry: tuple[str, list]) -> tuple[tuple[int, str], str]: - key, (void, void, category_key) = entry - if category_key: - # using specified category key to sort - key = category_key - lckey = unicodedata.normalize('NFD', key.lower()) - if lckey.startswith('\N{RIGHT-TO-LEFT MARK}'): - lckey = lckey[1:] - - if lckey[0:1].isalpha() or lckey.startswith('_'): - # put non-symbol characters at the following group (1) - sortkey = (1, lckey) - else: - # put symbols at the front of the index (0) - sortkey = (0, lckey) - # ensure a deterministic order *within* letters by also sorting on - # the entry itself - return (sortkey, entry[0]) - newlist = sorted(new.items(), key=keyfunc) + new_list = sorted(new.items(), key=_key_func_1) if group_entries: # fixup entries: transform @@ -119,54 +90,95 @@ class IndexEntries: # func() # (in module foo) # (in module bar) - oldkey = '' - oldsubitems: dict[str, list] = {} + old_key = '' + old_sub_items: dict[str, list] = {} i = 0 - while i < len(newlist): - key, (targets, subitems, _key) = newlist[i] - # cannot move if it has subitems; structure gets too complex - if not subitems: + while i < len(new_list): + key, (targets, sub_items, category_key) = new_list[i] + # cannot move if it has sub_items; structure gets too complex + if not sub_items: m = _fixre.match(key) if m: - if oldkey == m.group(1): + if old_key == m.group(1): # prefixes match: add entry as subitem of the # previous entry - oldsubitems.setdefault(m.group(2), [[], {}, _key])[0].\ - extend(targets) - del newlist[i] + old_sub_items.setdefault( + m.group(2), [[], {}, category_key])[0].extend(targets) + del new_list[i] continue - oldkey = m.group(1) + old_key = m.group(1) else: - oldkey = key - oldsubitems = subitems + old_key = key + old_sub_items = sub_items i += 1 - # sort the sub-index entries - def keyfunc2(entry: tuple[str, list]) -> str: - key = unicodedata.normalize('NFD', entry[0].lower()) - if key.startswith('\N{RIGHT-TO-LEFT MARK}'): - key = key[1:] - if key[0:1].isalpha() or key.startswith('_'): - key = chr(127) + key - return key - - # group the entries by letter - def keyfunc3(item: tuple[str, list]) -> str: - # hack: mutating the subitems dicts to a list in the keyfunc - k, v = item - v[1] = sorted(((si, se) for (si, (se, void, void)) in v[1].items()), - key=keyfunc2) - if v[2] is None: - # now calculate the key - if k.startswith('\N{RIGHT-TO-LEFT MARK}'): - k = k[1:] - letter = unicodedata.normalize('NFD', k[0])[0].upper() - if letter.isalpha() or letter == '_': - return letter - else: - # get all other symbols under one heading - return _('Symbols') - else: - return v[2] return [(key_, list(group)) - for (key_, group) in groupby(newlist, keyfunc3)] + for (key_, group) in groupby(new_list, _key_func_3)] + + +def _add_entry(word: str, subword: str, main: str | None, *, + dic: dict[str, list], link: str | Literal[False], key: str | None) -> None: + entry = dic.setdefault(word, [[], {}, key]) + if subword: + entry = entry[1].setdefault(subword, [[], {}, key]) + if link: + entry[0].append((main, link)) + + +def _key_func_0(entry: tuple[str, str]) -> tuple[bool, str]: + """sort the index entries for same keyword.""" + main, uri = entry + return not main, uri # show main entries at first + + +def _key_func_1(entry: tuple[str, list]) -> tuple[tuple[int, str], str]: + """Sort the index entries""" + key, (_targets, _sub_items, category_key) = entry + if category_key: + # using the specified category key to sort + key = category_key + lc_key = unicodedata.normalize('NFD', key.lower()) + if lc_key.startswith('\N{RIGHT-TO-LEFT MARK}'): + lc_key = lc_key[1:] + + if not lc_key[0:1].isalpha() and not lc_key.startswith('_'): + # put symbols at the front of the index (0) + group = 0 + else: + # put non-symbol characters at the following group (1) + group = 1 + # ensure a deterministic order *within* letters by also sorting on + # the entry itself + return (group, lc_key), entry[0] + + +def _key_func_2(entry: tuple[str, list]) -> str: + """sort the sub-index entries""" + key = unicodedata.normalize('NFD', entry[0].lower()) + if key.startswith('\N{RIGHT-TO-LEFT MARK}'): + key = key[1:] + if key[0:1].isalpha() or key.startswith('_'): + key = chr(127) + key + return key + + +def _key_func_3(entry: tuple[str, list]) -> str: + """Group the entries by letter""" + key, (targets, sub_items, category_key) = entry + # hack: mutating the sub_items dicts to a list in the key_func + entry[1][1] = sorted(((sub_key, sub_targets) + for (sub_key, (sub_targets, _0, _sub_category_key)) + in sub_items.items()), key=_key_func_2) + + if category_key is not None: + return category_key + + # now calculate the key + if key.startswith('\N{RIGHT-TO-LEFT MARK}'): + key = key[1:] + letter = unicodedata.normalize('NFD', key[0])[0].upper() + if letter.isalpha() or letter == '_': + return letter + + # get all other symbols under one heading + return _('Symbols') diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index d662ba80c..f981905c7 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -31,7 +31,7 @@ from docutils.nodes import Element, Node from sphinx import addnodes, package_dir from sphinx.environment import BuildEnvironment -from sphinx.util import split_into +from sphinx.util import split_index_msg class SearchLanguage: @@ -478,8 +478,17 @@ class IndexBuilder: # find explicit entries within index directives _index_entries: set[tuple[str, str, str]] = set() for node in doctree.findall(addnodes.index): - for entry_type, value, target_id, main, *index_key in node['entries']: - _index_entries |= _parse_index_entry(entry_type, value, target_id, main) + for entry_type, value, target_id, main, _category_key in node['entries']: + try: + result = split_index_msg(entry_type, value) + except ValueError: + pass + else: + target_id = target_id or '' + if entry_type in {'see', 'seealso'}: + _index_entries.add((result[0], target_id, main)) + _index_entries |= {(x, target_id, main) for x in result} + self._index_entries[docname] = sorted(_index_entries) def _word_collector(self, doctree: nodes.document) -> WordStore: @@ -557,41 +566,3 @@ class IndexBuilder: (base_js, language_js, self.lang.language_name)) else: return self.lang.js_stemmer_code - - -def _parse_index_entry( - entry_type: str, - value: str, - target_id: str, - main: str -) -> set[tuple[str, str, str]]: - target_id = target_id or '' - if entry_type == 'single': - try: - entry, subentry = split_into(2, 'single', value) - if subentry: - return {(entry, target_id, main), (subentry, target_id, main)} - except ValueError: - entry, = split_into(1, 'single', value) - return {(entry, target_id, main)} - elif entry_type == 'pair': - try: - first, second = split_into(2, 'pair', value) - return {(first, target_id, main), (second, target_id, main)} - except ValueError: - pass - elif entry_type == 'triple': - try: - first, second, third = split_into(3, 'triple', value) - return {(first, target_id, main), - (second, target_id, main), - (third, target_id, main)} - except ValueError: - pass - elif entry_type in {'see', 'seealso'}: - try: - first, second = split_into(2, 'see', value) - return {(first, target_id, main)} - except ValueError: - pass - return set() diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py index a6585a34c..c412a0d12 100644 --- a/sphinx/transforms/i18n.py +++ b/sphinx/transforms/i18n.py @@ -498,8 +498,8 @@ class Locale(SphinxTransform): # Extract and translate messages for index entries. for node, entries in traverse_translatable_index(self.document): new_entries: list[tuple[str, str, str, str, str | None]] = [] - for type, msg, tid, main, _key in entries: - msg_parts = split_index_msg(type, msg) + for entry_type, value, target_id, main, _category_key in entries: + msg_parts = split_index_msg(entry_type, value) msgstr_parts = [] for part in msg_parts: msgstr = catalog.gettext(part) @@ -507,7 +507,8 @@ class Locale(SphinxTransform): msgstr = part msgstr_parts.append(msgstr) - new_entries.append((type, ';'.join(msgstr_parts), tid, main, None)) + new_entry = entry_type, ';'.join(msgstr_parts), target_id, main, None + new_entries.append(new_entry) node['raw_entries'] = entries node['entries'] = new_entries diff --git a/sphinx/util/__init__.py b/sphinx/util/__init__.py index 3e23c55a6..087ec0d96 100644 --- a/sphinx/util/__init__.py +++ b/sphinx/util/__init__.py @@ -247,28 +247,25 @@ def parselinenos(spec: str, total: int) -> list[int]: def split_into(n: int, type: str, value: str) -> list[str]: """Split an index entry into a given number of parts at semicolons.""" parts = [x.strip() for x in value.split(';', n - 1)] - if sum(1 for part in parts if part) < n: + if len(list(filter(None, parts))) < n: raise ValueError(f'invalid {type} index entry {value!r}') return parts -def split_index_msg(type: str, value: str) -> list[str]: - # new entry types must be listed in directives/other.py! - if type == 'single': +def split_index_msg(entry_type: str, value: str) -> list[str]: + # new entry types must be listed in util/nodes.py! + if entry_type == 'single': try: - result = split_into(2, 'single', value) + return split_into(2, 'single', value) except ValueError: - result = split_into(1, 'single', value) - elif type == 'pair': - result = split_into(2, 'pair', value) - elif type == 'triple': - result = split_into(3, 'triple', value) - elif type in {'see', 'seealso'}: - result = split_into(2, 'see', value) - else: - raise ValueError(f'invalid {type} index entry {value!r}') - - return result + return split_into(1, 'single', value) + if entry_type == 'pair': + return split_into(2, 'pair', value) + if entry_type == 'triple': + return split_into(3, 'triple', value) + if entry_type in {'see', 'seealso'}: + return split_into(2, 'see', value) + raise ValueError(f'invalid {entry_type} index entry {value!r}') def import_object(objname: str, source: str | None = None) -> Any: diff --git a/sphinx/writers/texinfo.py b/sphinx/writers/texinfo.py index 267c9aa47..24316260e 100644 --- a/sphinx/writers/texinfo.py +++ b/sphinx/writers/texinfo.py @@ -1326,9 +1326,8 @@ class TexinfoTranslator(SphinxTranslator): self.ensure_eol() else: self.body.append('\n') - for entry in node['entries']: - typ, text, tid, text2, key_ = entry - text = self.escape_menu(text) + for (_entry_type, value, _target_id, _main, _category_key) in node['entries']: + text = self.escape_menu(value) self.body.append('@geindex %s\n' % text) def visit_versionmodified(self, node: Element) -> None: