Split up logic in `sphinx.transforms.i18n.Locale.apply` (#11166)

Before, the apply method was one big 400-line chunk. This just splits it
up into more manageable chunks by introducing a ``_NodeUpdater`` auxiliary
class holding logic for updating various types of references.

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
Jean Abou-Samra 2023-03-15 23:52:18 +01:00 committed by GitHub
parent ed97d7ac9f
commit b7345ad066
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,14 +2,14 @@
from __future__ import annotations from __future__ import annotations
import contextlib
from os import path from os import path
from re import DOTALL, match from re import DOTALL, match
from textwrap import indent from textwrap import indent
from typing import TYPE_CHECKING, Any, TypeVar from typing import TYPE_CHECKING, Any, Sequence, TypeVar
from docutils import nodes from docutils import nodes
from docutils.io import StringInput from docutils.io import StringInput
from docutils.nodes import Element
from sphinx import addnodes from sphinx import addnodes
from sphinx.config import Config from sphinx.config import Config
@ -44,7 +44,7 @@ N = TypeVar('N', bound=nodes.Node)
def publish_msgstr(app: Sphinx, source: str, source_path: str, source_line: int, def publish_msgstr(app: Sphinx, source: str, source_path: str, source_line: int,
config: Config, settings: Any) -> Element: config: Config, settings: Any) -> nodes.Element:
"""Publish msgstr (single line) into docutils document """Publish msgstr (single line) into docutils document
:param sphinx.application.Sphinx app: sphinx application :param sphinx.application.Sphinx app: sphinx application
@ -59,7 +59,7 @@ def publish_msgstr(app: Sphinx, source: str, source_path: str, source_line: int,
try: try:
# clear rst_prolog temporarily # clear rst_prolog temporarily
rst_prolog = config.rst_prolog rst_prolog = config.rst_prolog
config.rst_prolog = None # type: ignore config.rst_prolog = None # type: ignore[attr-defined]
from sphinx.io import SphinxI18nReader from sphinx.io import SphinxI18nReader
reader = SphinxI18nReader() reader = SphinxI18nReader()
@ -72,13 +72,11 @@ def publish_msgstr(app: Sphinx, source: str, source_path: str, source_line: int,
parser=parser, parser=parser,
settings=settings, settings=settings,
) )
try: with contextlib.suppress(IndexError): # empty node
doc = doc[0] # type: ignore return doc[0] # type: ignore[return-value]
except IndexError: # empty node
pass
return doc return doc
finally: finally:
config.rst_prolog = rst_prolog # type: ignore config.rst_prolog = rst_prolog # type: ignore[attr-defined]
def parse_noqa(source: str) -> tuple[str, bool]: def parse_noqa(source: str) -> tuple[str, bool]:
@ -100,6 +98,238 @@ class PreserveTranslatableMessages(SphinxTransform):
node.preserve_original_messages() node.preserve_original_messages()
class _NodeUpdater:
"""Contains logic for updating one node with the translated content."""
def __init__(
self, node: nodes.Element, patch: nodes.Element, document: nodes.document, noqa: bool,
) -> None:
self.node: nodes.Element = node
self.patch: nodes.Element = patch
self.document: nodes.document = document
self.noqa: bool = noqa
def compare_references(self, old_refs: Sequence[nodes.Element],
new_refs: Sequence[nodes.Element],
warning_msg: str) -> None:
"""Warn about mismatches between references in original and translated content."""
# FIXME: could use a smarter strategy than len(old_refs) == len(new_refs)
if not self.noqa and len(old_refs) != len(new_refs):
old_ref_rawsources = [ref.rawsource for ref in old_refs]
new_ref_rawsources = [ref.rawsource for ref in new_refs]
logger.warning(warning_msg.format(old_ref_rawsources, new_ref_rawsources),
location=self.node, type='i18n', subtype='inconsistent_references')
def update_title_mapping(self) -> bool:
processed = False # skip flag
# update title(section) target name-id mapping
if isinstance(self.node, nodes.title) and isinstance(self.node.parent, nodes.section):
section_node = self.node.parent
new_name = nodes.fully_normalize_name(self.patch.astext())
old_name = nodes.fully_normalize_name(self.node.astext())
if old_name != new_name:
# if name would be changed, replace node names and
# document nameids mapping with new name.
names = section_node.setdefault('names', [])
names.append(new_name)
# Original section name (reference target name) should be kept to refer
# from other nodes which is still not translated or uses explicit target
# name like "`text to display <explicit target name_>`_"..
# So, `old_name` is still exist in `names`.
_id = self.document.nameids.get(old_name, None)
explicit = self.document.nametypes.get(old_name, None)
# * if explicit: _id is label. title node need another id.
# * if not explicit:
#
# * if _id is None:
#
# _id is None means:
#
# 1. _id was not provided yet.
#
# 2. _id was duplicated.
#
# old_name entry still exists in nameids and
# nametypes for another duplicated entry.
#
# * if _id is provided: below process
if _id:
if not explicit:
# _id was not duplicated.
# remove old_name entry from document ids database
# to reuse original _id.
self.document.nameids.pop(old_name, None)
self.document.nametypes.pop(old_name, None)
self.document.ids.pop(_id, None)
# re-entry with new named section node.
#
# Note: msgnode that is a second parameter of the
# `note_implicit_target` is not necessary here because
# section_node has been noted previously on rst parsing by
# `docutils.parsers.rst.states.RSTState.new_subsection()`
# and already has `system_message` if needed.
self.document.note_implicit_target(section_node)
# replace target's refname to new target name
matcher = NodeMatcher(nodes.target, refname=old_name)
for old_target in self.document.findall(matcher): # type: nodes.target
old_target['refname'] = new_name
processed = True
return processed
def update_autofootnote_references(self) -> None:
# auto-numbered foot note reference should use original 'ids'.
def list_replace_or_append(lst: list[N], old: N, new: N) -> None:
if old in lst:
lst[lst.index(old)] = new
else:
lst.append(new)
is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any)
old_foot_refs: list[nodes.footnote_reference] = [
*self.node.findall(is_autofootnote_ref)]
new_foot_refs: list[nodes.footnote_reference] = [
*self.patch.findall(is_autofootnote_ref)]
self.compare_references(old_foot_refs, new_foot_refs,
__('inconsistent footnote references in translated message.' +
' original: {0}, translated: {1}'))
old_foot_namerefs: dict[str, list[nodes.footnote_reference]] = {}
for r in old_foot_refs:
old_foot_namerefs.setdefault(r.get('refname'), []).append(r)
for newf in new_foot_refs:
refname = newf.get('refname')
refs = old_foot_namerefs.get(refname, [])
if not refs:
newf.parent.remove(newf)
continue
oldf = refs.pop(0)
newf['ids'] = oldf['ids']
for id in newf['ids']:
self.document.ids[id] = newf
if newf['auto'] == 1:
# autofootnote_refs
list_replace_or_append(self.document.autofootnote_refs, oldf, newf)
else:
# symbol_footnote_refs
list_replace_or_append(self.document.symbol_footnote_refs, oldf, newf)
if refname:
footnote_refs = self.document.footnote_refs.setdefault(refname, [])
list_replace_or_append(footnote_refs, oldf, newf)
refnames = self.document.refnames.setdefault(refname, [])
list_replace_or_append(refnames, oldf, newf)
def update_refnamed_references(self) -> None:
# reference should use new (translated) 'refname'.
# * reference target ".. _Python: ..." is not translatable.
# * use translated refname for section refname.
# * inline reference "`Python <...>`_" has no 'refname'.
is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any)
old_refs: list[nodes.reference] = [*self.node.findall(is_refnamed_ref)]
new_refs: list[nodes.reference] = [*self.patch.findall(is_refnamed_ref)]
self.compare_references(old_refs, new_refs,
__('inconsistent references in translated message.' +
' original: {0}, translated: {1}'))
old_ref_names = [r['refname'] for r in old_refs]
new_ref_names = [r['refname'] for r in new_refs]
orphans = [*({*old_ref_names} - {*new_ref_names})]
for newr in new_refs:
if not self.document.has_name(newr['refname']):
# Maybe refname is translated but target is not translated.
# Note: multiple translated refnames break link ordering.
if orphans:
newr['refname'] = orphans.pop(0)
else:
# orphan refnames is already empty!
# reference number is same in new_refs and old_refs.
pass
self.document.note_refname(newr)
def update_refnamed_footnote_references(self) -> None:
# refnamed footnote should use original 'ids'.
is_refnamed_footnote_ref = NodeMatcher(nodes.footnote_reference, refname=Any)
old_foot_refs: list[nodes.footnote_reference] = [*self.node.findall(
is_refnamed_footnote_ref)]
new_foot_refs: list[nodes.footnote_reference] = [*self.patch.findall(
is_refnamed_footnote_ref)]
refname_ids_map: dict[str, list[str]] = {}
self.compare_references(old_foot_refs, new_foot_refs,
__('inconsistent footnote references in translated message.' +
' original: {0}, translated: {1}'))
for oldf in old_foot_refs:
refname_ids_map.setdefault(oldf["refname"], []).append(oldf["ids"])
for newf in new_foot_refs:
refname = newf["refname"]
if refname_ids_map.get(refname):
newf["ids"] = refname_ids_map[refname].pop(0)
def update_citation_references(self) -> None:
# citation should use original 'ids'.
is_citation_ref = NodeMatcher(nodes.citation_reference, refname=Any)
old_cite_refs: list[nodes.citation_reference] = [*self.node.findall(is_citation_ref)]
new_cite_refs: list[nodes.citation_reference] = [*self.patch.findall(is_citation_ref)]
self.compare_references(old_cite_refs, new_cite_refs,
__('inconsistent citation references in translated message.' +
' original: {0}, translated: {1}'))
refname_ids_map: dict[str, list[str]] = {}
for oldc in old_cite_refs:
refname_ids_map.setdefault(oldc["refname"], []).append(oldc["ids"])
for newc in new_cite_refs:
refname = newc["refname"]
if refname_ids_map.get(refname):
newc["ids"] = refname_ids_map[refname].pop()
def update_pending_xrefs(self) -> None:
# Original pending_xref['reftarget'] contain not-translated
# target name, new pending_xref must use original one.
# This code restricts to change ref-targets in the translation.
old_xrefs = [*self.node.findall(addnodes.pending_xref)]
new_xrefs = [*self.patch.findall(addnodes.pending_xref)]
self.compare_references(old_xrefs, new_xrefs,
__('inconsistent term references in translated message.' +
' original: {0}, translated: {1}'))
xref_reftarget_map = {}
def get_ref_key(node: addnodes.pending_xref) -> tuple[str, str, str] | None:
case = node["refdomain"], node["reftype"]
if case == ('std', 'term'):
return None
else:
return (
node["refdomain"],
node["reftype"],
node['reftarget'],)
for old in old_xrefs:
key = get_ref_key(old)
if key:
xref_reftarget_map[key] = old.attributes
for new in new_xrefs:
key = get_ref_key(new)
# Copy attributes to keep original node behavior. Especially
# copying 'reftarget', 'py:module', 'py:class' are needed.
for k, v in xref_reftarget_map.get(key, {}).items():
if k not in EXCLUDED_PENDING_XREF_ATTRIBUTES:
new[k] = v
def update_leaves(self) -> None:
for child in self.patch.children:
child.parent = self.node
self.node.children = self.patch.children
class Locale(SphinxTransform): class Locale(SphinxTransform):
""" """
Replace translatable nodes with their translated doctree. Replace translatable nodes with their translated doctree.
@ -155,83 +385,24 @@ class Locale(SphinxTransform):
if not isinstance(patch, nodes.paragraph): if not isinstance(patch, nodes.paragraph):
continue # skip for now continue # skip for now
processed = False # skip flag updater = _NodeUpdater(node, patch, self.document, noqa=False)
processed = updater.update_title_mapping()
# update title(section) target name-id mapping
if isinstance(node, nodes.title) and isinstance(node.parent, nodes.section):
section_node = node.parent
new_name = nodes.fully_normalize_name(patch.astext())
old_name = nodes.fully_normalize_name(node.astext())
if old_name != new_name:
# if name would be changed, replace node names and
# document nameids mapping with new name.
names = section_node.setdefault('names', [])
names.append(new_name)
# Original section name (reference target name) should be kept to refer
# from other nodes which is still not translated or uses explicit target
# name like "`text to display <explicit target name_>`_"..
# So, `old_name` is still exist in `names`.
_id = self.document.nameids.get(old_name, None)
explicit = self.document.nametypes.get(old_name, None)
# * if explicit: _id is label. title node need another id.
# * if not explicit:
#
# * if _id is None:
#
# _id is None means:
#
# 1. _id was not provided yet.
#
# 2. _id was duplicated.
#
# old_name entry still exists in nameids and
# nametypes for another duplicated entry.
#
# * if _id is provided: below process
if _id:
if not explicit:
# _id was not duplicated.
# remove old_name entry from document ids database
# to reuse original _id.
self.document.nameids.pop(old_name, None)
self.document.nametypes.pop(old_name, None)
self.document.ids.pop(_id, None)
# re-entry with new named section node.
#
# Note: msgnode that is a second parameter of the
# `note_implicit_target` is not necessary here because
# section_node has been noted previously on rst parsing by
# `docutils.parsers.rst.states.RSTState.new_subsection()`
# and already has `system_message` if needed.
self.document.note_implicit_target(section_node)
# replace target's refname to new target name
matcher = NodeMatcher(nodes.target, refname=old_name)
for old_target in self.document.findall(matcher): # type: nodes.target
old_target['refname'] = new_name
processed = True
# glossary terms update refid # glossary terms update refid
if isinstance(node, nodes.term): if isinstance(node, nodes.term):
for _id in node['ids']: for _id in node['ids']:
parts = split_term_classifiers(msgstr) parts = split_term_classifiers(msgstr)
patch = publish_msgstr(self.app, parts[0], source, patch = publish_msgstr(
node.line, self.config, settings) self.app, parts[0], source, node.line, self.config, settings,
patch = make_glossary_term(self.env, patch, parts[1], )
source, node.line, _id, updater.patch = make_glossary_term(
self.document) self.env, patch, parts[1], source, node.line, _id, self.document,
)
processed = True processed = True
# update leaves with processed nodes # update leaves with processed nodes
if processed: if processed:
for child in patch.children: updater.update_leaves()
child.parent = node
node.children = patch.children
node['translated'] = True # to avoid double translation node['translated'] = True # to avoid double translation
# phase2: translation # phase2: translation
@ -252,11 +423,11 @@ class Locale(SphinxTransform):
# update translatable nodes # update translatable nodes
if isinstance(node, addnodes.translatable): if isinstance(node, addnodes.translatable):
node.apply_translated_message(msg, msgstr) # type: ignore node.apply_translated_message(msg, msgstr) # type: ignore[attr-defined]
continue continue
# update meta nodes # update meta nodes
if isinstance(node, nodes.meta): # type: ignore if isinstance(node, nodes.meta): # type: ignore[attr-defined]
node['content'] = msgstr node['content'] = msgstr
continue continue
@ -292,7 +463,7 @@ class Locale(SphinxTransform):
patch = patch.next_node() patch = patch.next_node()
# ignore unexpected markups in translation message # ignore unexpected markups in translation message
unexpected: tuple[type[Element], ...] = ( unexpected: tuple[type[nodes.Element], ...] = (
nodes.paragraph, # expected form of translation nodes.paragraph, # expected form of translation
nodes.title, # generated by above "Subelements phase2" nodes.title, # generated by above "Subelements phase2"
) )
@ -305,166 +476,13 @@ class Locale(SphinxTransform):
if not isinstance(patch, unexpected): if not isinstance(patch, unexpected):
continue # skip continue # skip
# auto-numbered foot note reference should use original 'ids'. updater = _NodeUpdater(node, patch, self.document, noqa)
def list_replace_or_append(lst: list[N], old: N, new: N) -> None: updater.update_autofootnote_references()
if old in lst: updater.update_refnamed_references()
lst[lst.index(old)] = new updater.update_refnamed_footnote_references()
else: updater.update_citation_references()
lst.append(new) updater.update_pending_xrefs()
updater.update_leaves()
is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any)
old_foot_refs: list[nodes.footnote_reference] = list(
node.findall(is_autofootnote_ref),
)
new_foot_refs: list[nodes.footnote_reference] = list(
patch.findall(is_autofootnote_ref),
)
if not noqa and len(old_foot_refs) != len(new_foot_refs):
old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
logger.warning(__('inconsistent footnote references in translated message.' +
' original: {0}, translated: {1}')
.format(old_foot_ref_rawsources, new_foot_ref_rawsources),
location=node, type='i18n', subtype='inconsistent_references')
old_foot_namerefs: dict[str, list[nodes.footnote_reference]] = {}
for r in old_foot_refs:
old_foot_namerefs.setdefault(r.get('refname'), []).append(r)
for newf in new_foot_refs:
refname = newf.get('refname')
refs = old_foot_namerefs.get(refname, [])
if not refs:
newf.parent.remove(newf)
continue
oldf = refs.pop(0)
newf['ids'] = oldf['ids']
for id in newf['ids']:
self.document.ids[id] = newf
if newf['auto'] == 1:
# autofootnote_refs
list_replace_or_append(self.document.autofootnote_refs, oldf, newf)
else:
# symbol_footnote_refs
list_replace_or_append(self.document.symbol_footnote_refs, oldf, newf)
if refname:
footnote_refs = self.document.footnote_refs.setdefault(refname, [])
list_replace_or_append(footnote_refs, oldf, newf)
refnames = self.document.refnames.setdefault(refname, [])
list_replace_or_append(refnames, oldf, newf)
# reference should use new (translated) 'refname'.
# * reference target ".. _Python: ..." is not translatable.
# * use translated refname for section refname.
# * inline reference "`Python <...>`_" has no 'refname'.
is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any)
old_refs: list[nodes.reference] = list(node.findall(is_refnamed_ref))
new_refs: list[nodes.reference] = list(patch.findall(is_refnamed_ref))
if not noqa and len(old_refs) != len(new_refs):
old_ref_rawsources = [ref.rawsource for ref in old_refs]
new_ref_rawsources = [ref.rawsource for ref in new_refs]
logger.warning(__('inconsistent references in translated message.' +
' original: {0}, translated: {1}')
.format(old_ref_rawsources, new_ref_rawsources),
location=node, type='i18n', subtype='inconsistent_references')
old_ref_names = [r['refname'] for r in old_refs]
new_ref_names = [r['refname'] for r in new_refs]
orphans = list(set(old_ref_names) - set(new_ref_names))
for newr in new_refs:
if not self.document.has_name(newr['refname']):
# Maybe refname is translated but target is not translated.
# Note: multiple translated refnames break link ordering.
if orphans:
newr['refname'] = orphans.pop(0)
else:
# orphan refnames is already empty!
# reference number is same in new_refs and old_refs.
pass
self.document.note_refname(newr)
# refnamed footnote should use original 'ids'.
is_refnamed_footnote_ref = NodeMatcher(nodes.footnote_reference, refname=Any)
old_foot_refs = list(node.findall(is_refnamed_footnote_ref))
new_foot_refs = list(patch.findall(is_refnamed_footnote_ref))
refname_ids_map: dict[str, list[str]] = {}
if not noqa and len(old_foot_refs) != len(new_foot_refs):
old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
logger.warning(__('inconsistent footnote references in translated message.' +
' original: {0}, translated: {1}')
.format(old_foot_ref_rawsources, new_foot_ref_rawsources),
location=node, type='i18n', subtype='inconsistent_references')
for oldf in old_foot_refs:
refname_ids_map.setdefault(oldf["refname"], []).append(oldf["ids"])
for newf in new_foot_refs:
refname = newf["refname"]
if refname_ids_map.get(refname):
newf["ids"] = refname_ids_map[refname].pop(0)
# citation should use original 'ids'.
is_citation_ref = NodeMatcher(nodes.citation_reference, refname=Any)
old_cite_refs: list[nodes.citation_reference] = list(node.findall(is_citation_ref))
new_cite_refs: list[nodes.citation_reference] = list(
patch.findall(is_citation_ref),
)
refname_ids_map = {}
if not noqa and len(old_cite_refs) != len(new_cite_refs):
old_cite_ref_rawsources = [ref.rawsource for ref in old_cite_refs]
new_cite_ref_rawsources = [ref.rawsource for ref in new_cite_refs]
logger.warning(__('inconsistent citation references in translated message.' +
' original: {0}, translated: {1}')
.format(old_cite_ref_rawsources, new_cite_ref_rawsources),
location=node, type='i18n', subtype='inconsistent_references')
for oldc in old_cite_refs:
refname_ids_map.setdefault(oldc["refname"], []).append(oldc["ids"])
for newc in new_cite_refs:
refname = newc["refname"]
if refname_ids_map.get(refname):
newc["ids"] = refname_ids_map[refname].pop()
# Original pending_xref['reftarget'] contain not-translated
# target name, new pending_xref must use original one.
# This code restricts to change ref-targets in the translation.
old_xrefs = list(node.findall(addnodes.pending_xref))
new_xrefs = list(patch.findall(addnodes.pending_xref))
xref_reftarget_map = {}
if not noqa and len(old_xrefs) != len(new_xrefs):
old_xref_rawsources = [xref.rawsource for xref in old_xrefs]
new_xref_rawsources = [xref.rawsource for xref in new_xrefs]
logger.warning(__('inconsistent term references in translated message.' +
' original: {0}, translated: {1}')
.format(old_xref_rawsources, new_xref_rawsources),
location=node, type='i18n', subtype='inconsistent_references')
def get_ref_key(node: addnodes.pending_xref) -> tuple[str, str, str] | None:
case = node["refdomain"], node["reftype"]
if case == ('std', 'term'):
return None
else:
return (
node["refdomain"],
node["reftype"],
node['reftarget'],)
for old in old_xrefs:
key = get_ref_key(old)
if key:
xref_reftarget_map[key] = old.attributes
for new in new_xrefs:
key = get_ref_key(new)
# Copy attributes to keep original node behavior. Especially
# copying 'reftarget', 'py:module', 'py:class' are needed.
for k, v in xref_reftarget_map.get(key, {}).items():
if k not in EXCLUDED_PENDING_XREF_ATTRIBUTES:
new[k] = v
# update leaves
for child in patch.children:
child.parent = node
node.children = patch.children
# for highlighting that expects .rawsource and .astext() are same. # for highlighting that expects .rawsource and .astext() are same.
if isinstance(node, LITERAL_TYPE_NODES): if isinstance(node, LITERAL_TYPE_NODES):
@ -495,7 +513,8 @@ class Locale(SphinxTransform):
node['entries'] = new_entries node['entries'] = new_entries
# remove translated attribute that is used for avoiding double translation. # remove translated attribute that is used for avoiding double translation.
for translated in self.document.findall(NodeMatcher(translated=Any)): # type: Element matcher = NodeMatcher(translated=Any)
for translated in self.document.findall(matcher): # type: nodes.Element
translated.delattr('translated') translated.delattr('translated')