mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
Re-structure index entry processing (#11505)
This commit is contained in:
parent
480630c649
commit
24b4d65a02
@ -156,8 +156,8 @@ class I18nBuilder(Builder):
|
||||
if 'index' in self.env.config.gettext_additional_targets:
|
||||
# Extract translatable messages from index entries.
|
||||
for node, entries in traverse_translatable_index(doctree):
|
||||
for typ, msg, _tid, _main, _key in entries:
|
||||
for m in split_index_msg(typ, msg):
|
||||
for entry_type, value, _target_id, _main, _category_key in entries:
|
||||
for m in split_index_msg(entry_type, value):
|
||||
catalog.add(m, node)
|
||||
|
||||
|
||||
|
@ -29,7 +29,7 @@ class IndexDomain(Domain):
|
||||
label = 'index'
|
||||
|
||||
@property
|
||||
def entries(self) -> dict[str, list[tuple[str, str, str, str, str]]]:
|
||||
def entries(self) -> dict[str, list[tuple[str, str, str, str, str | None]]]:
|
||||
return self.data.setdefault('entries', {})
|
||||
|
||||
def clear_doc(self, docname: str) -> None:
|
||||
@ -44,8 +44,8 @@ class IndexDomain(Domain):
|
||||
entries = self.entries.setdefault(env.docname, [])
|
||||
for node in list(document.findall(addnodes.index)):
|
||||
try:
|
||||
for entry in node['entries']:
|
||||
split_index_msg(entry[0], entry[1])
|
||||
for (entry_type, value, _target_id, _main, _category_key) in node['entries']:
|
||||
split_index_msg(entry_type, value)
|
||||
except ValueError as exc:
|
||||
logger.warning(str(exc), location=node)
|
||||
node.parent.remove(node)
|
||||
|
@ -5,10 +5,9 @@ from __future__ import annotations
|
||||
import re
|
||||
import unicodedata
|
||||
from itertools import groupby
|
||||
from typing import Any, cast
|
||||
from typing import Any, Literal
|
||||
|
||||
from sphinx.builders import Builder
|
||||
from sphinx.domains.index import IndexDomain
|
||||
from sphinx.environment import BuildEnvironment
|
||||
from sphinx.errors import NoUri
|
||||
from sphinx.locale import _, __
|
||||
@ -20,6 +19,7 @@ logger = logging.getLogger(__name__)
|
||||
class IndexEntries:
|
||||
def __init__(self, env: BuildEnvironment) -> None:
|
||||
self.env = env
|
||||
self.builder: Builder
|
||||
|
||||
def create_index(self, builder: Builder, group_entries: bool = True,
|
||||
_fixre: re.Pattern = re.compile(r'(.*) ([(][^()]*[)])'),
|
||||
@ -27,89 +27,60 @@ class IndexEntries:
|
||||
"""Create the real index from the collected index entries."""
|
||||
new: dict[str, list] = {}
|
||||
|
||||
def add_entry(word: str, subword: str, main: str | None, link: bool = True,
|
||||
dic: dict[str, list] = new, key: str | None = None) -> None:
|
||||
# Force the word to be unicode if it's a ASCII bytestring.
|
||||
# This will solve problems with unicode normalization later.
|
||||
# For instance the RFC role will add bytestrings at the moment
|
||||
word = str(word)
|
||||
entry = dic.get(word)
|
||||
if not entry:
|
||||
dic[word] = entry = [[], {}, key]
|
||||
if subword:
|
||||
add_entry(subword, '', main, link=link, dic=entry[1], key=key)
|
||||
elif link:
|
||||
rel_uri: str | Literal[False]
|
||||
index_domain = self.env.domains['index']
|
||||
for docname, entries in index_domain.entries.items():
|
||||
try:
|
||||
uri = builder.get_relative_uri('genindex', fn) + '#' + tid
|
||||
rel_uri = builder.get_relative_uri('genindex', docname)
|
||||
except NoUri:
|
||||
pass
|
||||
else:
|
||||
entry[0].append((main, uri))
|
||||
rel_uri = False
|
||||
|
||||
domain = cast(IndexDomain, self.env.get_domain('index'))
|
||||
for fn, entries in domain.entries.items():
|
||||
# new entry types must be listed in directives/other.py!
|
||||
for type, value, tid, main, index_key in entries: # noqa: B007
|
||||
for entry_type, value, target_id, main, category_key in entries:
|
||||
uri = rel_uri is not False and f'{rel_uri}#{target_id}'
|
||||
try:
|
||||
if type == 'single':
|
||||
if entry_type == 'single':
|
||||
try:
|
||||
entry, subentry = split_into(2, 'single', value)
|
||||
entry, sub_entry = split_into(2, 'single', value)
|
||||
except ValueError:
|
||||
entry, = split_into(1, 'single', value)
|
||||
subentry = ''
|
||||
add_entry(entry, subentry, main, key=index_key)
|
||||
elif type == 'pair':
|
||||
sub_entry = ''
|
||||
_add_entry(entry, sub_entry, main,
|
||||
dic=new, link=uri, key=category_key)
|
||||
elif entry_type == 'pair':
|
||||
first, second = split_into(2, 'pair', value)
|
||||
add_entry(first, second, main, key=index_key)
|
||||
add_entry(second, first, main, key=index_key)
|
||||
elif type == 'triple':
|
||||
_add_entry(first, second, main,
|
||||
dic=new, link=uri, key=category_key)
|
||||
_add_entry(second, first, main,
|
||||
dic=new, link=uri, key=category_key)
|
||||
elif entry_type == 'triple':
|
||||
first, second, third = split_into(3, 'triple', value)
|
||||
add_entry(first, second + ' ' + third, main, key=index_key)
|
||||
add_entry(second, third + ', ' + first, main, key=index_key)
|
||||
add_entry(third, first + ' ' + second, main, key=index_key)
|
||||
elif type == 'see':
|
||||
_add_entry(first, second + ' ' + third, main,
|
||||
dic=new, link=uri, key=category_key)
|
||||
_add_entry(second, third + ', ' + first, main,
|
||||
dic=new, link=uri, key=category_key)
|
||||
_add_entry(third, first + ' ' + second, main,
|
||||
dic=new, link=uri, key=category_key)
|
||||
elif entry_type == 'see':
|
||||
first, second = split_into(2, 'see', value)
|
||||
add_entry(first, _('see %s') % second, None,
|
||||
link=False, key=index_key)
|
||||
elif type == 'seealso':
|
||||
_add_entry(first, _('see %s') % second, None,
|
||||
dic=new, link=False, key=category_key)
|
||||
elif entry_type == 'seealso':
|
||||
first, second = split_into(2, 'see', value)
|
||||
add_entry(first, _('see also %s') % second, None,
|
||||
link=False, key=index_key)
|
||||
_add_entry(first, _('see also %s') % second, None,
|
||||
dic=new, link=False, key=category_key)
|
||||
else:
|
||||
logger.warning(__('unknown index entry type %r'), type, location=fn)
|
||||
logger.warning(__('unknown index entry type %r'), entry_type,
|
||||
location=docname)
|
||||
except ValueError as err:
|
||||
logger.warning(str(err), location=fn)
|
||||
logger.warning(str(err), location=docname)
|
||||
|
||||
# sort the index entries for same keyword.
|
||||
def keyfunc0(entry: tuple[str, str]) -> tuple[bool, str]:
|
||||
main, uri = entry
|
||||
return (not main, uri) # show main entries at first
|
||||
for (targets, sub_items, _category_key) in new.values():
|
||||
targets.sort(key=_key_func_0)
|
||||
for (sub_targets, _0, _sub_category_key) in sub_items.values():
|
||||
sub_targets.sort(key=_key_func_0)
|
||||
|
||||
for indexentry in new.values():
|
||||
indexentry[0].sort(key=keyfunc0)
|
||||
for subentry in indexentry[1].values():
|
||||
subentry[0].sort(key=keyfunc0) # type: ignore
|
||||
|
||||
# sort the index entries
|
||||
def keyfunc(entry: tuple[str, list]) -> tuple[tuple[int, str], str]:
|
||||
key, (void, void, category_key) = entry
|
||||
if category_key:
|
||||
# using specified category key to sort
|
||||
key = category_key
|
||||
lckey = unicodedata.normalize('NFD', key.lower())
|
||||
if lckey.startswith('\N{RIGHT-TO-LEFT MARK}'):
|
||||
lckey = lckey[1:]
|
||||
|
||||
if lckey[0:1].isalpha() or lckey.startswith('_'):
|
||||
# put non-symbol characters at the following group (1)
|
||||
sortkey = (1, lckey)
|
||||
else:
|
||||
# put symbols at the front of the index (0)
|
||||
sortkey = (0, lckey)
|
||||
# ensure a deterministic order *within* letters by also sorting on
|
||||
# the entry itself
|
||||
return (sortkey, entry[0])
|
||||
newlist = sorted(new.items(), key=keyfunc)
|
||||
new_list = sorted(new.items(), key=_key_func_1)
|
||||
|
||||
if group_entries:
|
||||
# fixup entries: transform
|
||||
@ -119,30 +90,70 @@ class IndexEntries:
|
||||
# func()
|
||||
# (in module foo)
|
||||
# (in module bar)
|
||||
oldkey = ''
|
||||
oldsubitems: dict[str, list] = {}
|
||||
old_key = ''
|
||||
old_sub_items: dict[str, list] = {}
|
||||
i = 0
|
||||
while i < len(newlist):
|
||||
key, (targets, subitems, _key) = newlist[i]
|
||||
# cannot move if it has subitems; structure gets too complex
|
||||
if not subitems:
|
||||
while i < len(new_list):
|
||||
key, (targets, sub_items, category_key) = new_list[i]
|
||||
# cannot move if it has sub_items; structure gets too complex
|
||||
if not sub_items:
|
||||
m = _fixre.match(key)
|
||||
if m:
|
||||
if oldkey == m.group(1):
|
||||
if old_key == m.group(1):
|
||||
# prefixes match: add entry as subitem of the
|
||||
# previous entry
|
||||
oldsubitems.setdefault(m.group(2), [[], {}, _key])[0].\
|
||||
extend(targets)
|
||||
del newlist[i]
|
||||
old_sub_items.setdefault(
|
||||
m.group(2), [[], {}, category_key])[0].extend(targets)
|
||||
del new_list[i]
|
||||
continue
|
||||
oldkey = m.group(1)
|
||||
old_key = m.group(1)
|
||||
else:
|
||||
oldkey = key
|
||||
oldsubitems = subitems
|
||||
old_key = key
|
||||
old_sub_items = sub_items
|
||||
i += 1
|
||||
|
||||
# sort the sub-index entries
|
||||
def keyfunc2(entry: tuple[str, list]) -> str:
|
||||
return [(key_, list(group))
|
||||
for (key_, group) in groupby(new_list, _key_func_3)]
|
||||
|
||||
|
||||
def _add_entry(word: str, subword: str, main: str | None, *,
|
||||
dic: dict[str, list], link: str | Literal[False], key: str | None) -> None:
|
||||
entry = dic.setdefault(word, [[], {}, key])
|
||||
if subword:
|
||||
entry = entry[1].setdefault(subword, [[], {}, key])
|
||||
if link:
|
||||
entry[0].append((main, link))
|
||||
|
||||
|
||||
def _key_func_0(entry: tuple[str, str]) -> tuple[bool, str]:
|
||||
"""sort the index entries for same keyword."""
|
||||
main, uri = entry
|
||||
return not main, uri # show main entries at first
|
||||
|
||||
|
||||
def _key_func_1(entry: tuple[str, list]) -> tuple[tuple[int, str], str]:
|
||||
"""Sort the index entries"""
|
||||
key, (_targets, _sub_items, category_key) = entry
|
||||
if category_key:
|
||||
# using the specified category key to sort
|
||||
key = category_key
|
||||
lc_key = unicodedata.normalize('NFD', key.lower())
|
||||
if lc_key.startswith('\N{RIGHT-TO-LEFT MARK}'):
|
||||
lc_key = lc_key[1:]
|
||||
|
||||
if not lc_key[0:1].isalpha() and not lc_key.startswith('_'):
|
||||
# put symbols at the front of the index (0)
|
||||
group = 0
|
||||
else:
|
||||
# put non-symbol characters at the following group (1)
|
||||
group = 1
|
||||
# ensure a deterministic order *within* letters by also sorting on
|
||||
# the entry itself
|
||||
return (group, lc_key), entry[0]
|
||||
|
||||
|
||||
def _key_func_2(entry: tuple[str, list]) -> str:
|
||||
"""sort the sub-index entries"""
|
||||
key = unicodedata.normalize('NFD', entry[0].lower())
|
||||
if key.startswith('\N{RIGHT-TO-LEFT MARK}'):
|
||||
key = key[1:]
|
||||
@ -150,23 +161,24 @@ class IndexEntries:
|
||||
key = chr(127) + key
|
||||
return key
|
||||
|
||||
# group the entries by letter
|
||||
def keyfunc3(item: tuple[str, list]) -> str:
|
||||
# hack: mutating the subitems dicts to a list in the keyfunc
|
||||
k, v = item
|
||||
v[1] = sorted(((si, se) for (si, (se, void, void)) in v[1].items()),
|
||||
key=keyfunc2)
|
||||
if v[2] is None:
|
||||
|
||||
def _key_func_3(entry: tuple[str, list]) -> str:
|
||||
"""Group the entries by letter"""
|
||||
key, (targets, sub_items, category_key) = entry
|
||||
# hack: mutating the sub_items dicts to a list in the key_func
|
||||
entry[1][1] = sorted(((sub_key, sub_targets)
|
||||
for (sub_key, (sub_targets, _0, _sub_category_key))
|
||||
in sub_items.items()), key=_key_func_2)
|
||||
|
||||
if category_key is not None:
|
||||
return category_key
|
||||
|
||||
# now calculate the key
|
||||
if k.startswith('\N{RIGHT-TO-LEFT MARK}'):
|
||||
k = k[1:]
|
||||
letter = unicodedata.normalize('NFD', k[0])[0].upper()
|
||||
if key.startswith('\N{RIGHT-TO-LEFT MARK}'):
|
||||
key = key[1:]
|
||||
letter = unicodedata.normalize('NFD', key[0])[0].upper()
|
||||
if letter.isalpha() or letter == '_':
|
||||
return letter
|
||||
else:
|
||||
|
||||
# get all other symbols under one heading
|
||||
return _('Symbols')
|
||||
else:
|
||||
return v[2]
|
||||
return [(key_, list(group))
|
||||
for (key_, group) in groupby(newlist, keyfunc3)]
|
||||
|
@ -31,7 +31,7 @@ from docutils.nodes import Element, Node
|
||||
|
||||
from sphinx import addnodes, package_dir
|
||||
from sphinx.environment import BuildEnvironment
|
||||
from sphinx.util import split_into
|
||||
from sphinx.util import split_index_msg
|
||||
|
||||
|
||||
class SearchLanguage:
|
||||
@ -478,8 +478,17 @@ class IndexBuilder:
|
||||
# find explicit entries within index directives
|
||||
_index_entries: set[tuple[str, str, str]] = set()
|
||||
for node in doctree.findall(addnodes.index):
|
||||
for entry_type, value, target_id, main, *index_key in node['entries']:
|
||||
_index_entries |= _parse_index_entry(entry_type, value, target_id, main)
|
||||
for entry_type, value, target_id, main, _category_key in node['entries']:
|
||||
try:
|
||||
result = split_index_msg(entry_type, value)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
target_id = target_id or ''
|
||||
if entry_type in {'see', 'seealso'}:
|
||||
_index_entries.add((result[0], target_id, main))
|
||||
_index_entries |= {(x, target_id, main) for x in result}
|
||||
|
||||
self._index_entries[docname] = sorted(_index_entries)
|
||||
|
||||
def _word_collector(self, doctree: nodes.document) -> WordStore:
|
||||
@ -557,41 +566,3 @@ class IndexBuilder:
|
||||
(base_js, language_js, self.lang.language_name))
|
||||
else:
|
||||
return self.lang.js_stemmer_code
|
||||
|
||||
|
||||
def _parse_index_entry(
|
||||
entry_type: str,
|
||||
value: str,
|
||||
target_id: str,
|
||||
main: str
|
||||
) -> set[tuple[str, str, str]]:
|
||||
target_id = target_id or ''
|
||||
if entry_type == 'single':
|
||||
try:
|
||||
entry, subentry = split_into(2, 'single', value)
|
||||
if subentry:
|
||||
return {(entry, target_id, main), (subentry, target_id, main)}
|
||||
except ValueError:
|
||||
entry, = split_into(1, 'single', value)
|
||||
return {(entry, target_id, main)}
|
||||
elif entry_type == 'pair':
|
||||
try:
|
||||
first, second = split_into(2, 'pair', value)
|
||||
return {(first, target_id, main), (second, target_id, main)}
|
||||
except ValueError:
|
||||
pass
|
||||
elif entry_type == 'triple':
|
||||
try:
|
||||
first, second, third = split_into(3, 'triple', value)
|
||||
return {(first, target_id, main),
|
||||
(second, target_id, main),
|
||||
(third, target_id, main)}
|
||||
except ValueError:
|
||||
pass
|
||||
elif entry_type in {'see', 'seealso'}:
|
||||
try:
|
||||
first, second = split_into(2, 'see', value)
|
||||
return {(first, target_id, main)}
|
||||
except ValueError:
|
||||
pass
|
||||
return set()
|
||||
|
@ -498,8 +498,8 @@ class Locale(SphinxTransform):
|
||||
# Extract and translate messages for index entries.
|
||||
for node, entries in traverse_translatable_index(self.document):
|
||||
new_entries: list[tuple[str, str, str, str, str | None]] = []
|
||||
for type, msg, tid, main, _key in entries:
|
||||
msg_parts = split_index_msg(type, msg)
|
||||
for entry_type, value, target_id, main, _category_key in entries:
|
||||
msg_parts = split_index_msg(entry_type, value)
|
||||
msgstr_parts = []
|
||||
for part in msg_parts:
|
||||
msgstr = catalog.gettext(part)
|
||||
@ -507,7 +507,8 @@ class Locale(SphinxTransform):
|
||||
msgstr = part
|
||||
msgstr_parts.append(msgstr)
|
||||
|
||||
new_entries.append((type, ';'.join(msgstr_parts), tid, main, None))
|
||||
new_entry = entry_type, ';'.join(msgstr_parts), target_id, main, None
|
||||
new_entries.append(new_entry)
|
||||
|
||||
node['raw_entries'] = entries
|
||||
node['entries'] = new_entries
|
||||
|
@ -247,28 +247,25 @@ def parselinenos(spec: str, total: int) -> list[int]:
|
||||
def split_into(n: int, type: str, value: str) -> list[str]:
|
||||
"""Split an index entry into a given number of parts at semicolons."""
|
||||
parts = [x.strip() for x in value.split(';', n - 1)]
|
||||
if sum(1 for part in parts if part) < n:
|
||||
if len(list(filter(None, parts))) < n:
|
||||
raise ValueError(f'invalid {type} index entry {value!r}')
|
||||
return parts
|
||||
|
||||
|
||||
def split_index_msg(type: str, value: str) -> list[str]:
|
||||
# new entry types must be listed in directives/other.py!
|
||||
if type == 'single':
|
||||
def split_index_msg(entry_type: str, value: str) -> list[str]:
|
||||
# new entry types must be listed in util/nodes.py!
|
||||
if entry_type == 'single':
|
||||
try:
|
||||
result = split_into(2, 'single', value)
|
||||
return split_into(2, 'single', value)
|
||||
except ValueError:
|
||||
result = split_into(1, 'single', value)
|
||||
elif type == 'pair':
|
||||
result = split_into(2, 'pair', value)
|
||||
elif type == 'triple':
|
||||
result = split_into(3, 'triple', value)
|
||||
elif type in {'see', 'seealso'}:
|
||||
result = split_into(2, 'see', value)
|
||||
else:
|
||||
raise ValueError(f'invalid {type} index entry {value!r}')
|
||||
|
||||
return result
|
||||
return split_into(1, 'single', value)
|
||||
if entry_type == 'pair':
|
||||
return split_into(2, 'pair', value)
|
||||
if entry_type == 'triple':
|
||||
return split_into(3, 'triple', value)
|
||||
if entry_type in {'see', 'seealso'}:
|
||||
return split_into(2, 'see', value)
|
||||
raise ValueError(f'invalid {entry_type} index entry {value!r}')
|
||||
|
||||
|
||||
def import_object(objname: str, source: str | None = None) -> Any:
|
||||
|
@ -1326,9 +1326,8 @@ class TexinfoTranslator(SphinxTranslator):
|
||||
self.ensure_eol()
|
||||
else:
|
||||
self.body.append('\n')
|
||||
for entry in node['entries']:
|
||||
typ, text, tid, text2, key_ = entry
|
||||
text = self.escape_menu(text)
|
||||
for (_entry_type, value, _target_id, _main, _category_key) in node['entries']:
|
||||
text = self.escape_menu(value)
|
||||
self.body.append('@geindex %s\n' % text)
|
||||
|
||||
def visit_versionmodified(self, node: Element) -> None:
|
||||
|
Loading…
Reference in New Issue
Block a user