Remove `productionlist` hard-coding in translators

2025-02-25 18:55:22 -06:00 · 2025-02-10 19:04:19 +00:00 · 2025-02-10 19:04:19 +00:00 · eed2524640
commit eed2524640
parent a9534901a2
8 changed files with 146 additions and 114 deletions
--- a/doc/usage/restructuredtext/directives.rst
+++ b/doc/usage/restructuredtext/directives.rst
@ -1647,7 +1647,7 @@ derived forms), but provides enough to allow context-free grammars to be
 displayed in a way that causes uses of a symbol to be rendered as hyperlinks to
 the definition of the symbol.  There is this directive:

-.. rst:directive:: .. productionlist:: [productionGroup]
+.. rst:directive:: .. productionlist:: [production_group]

   This directive is used to enclose a group of productions.  Each production
   is given on a single line and consists of a name, separated by a colon from
@ -1662,26 +1662,26 @@ the definition of the symbol.  There is this directive:
   production list, you can reference to token productions using
   :rst:role:`token`.

-   The *productionGroup* argument to :rst:dir:`productionlist` serves to
+   The *production_group* argument to :rst:dir:`productionlist` serves to
   distinguish different sets of production lists that belong to different
-   grammars.  Multiple production lists with the same *productionGroup* thus
+   grammars.  Multiple production lists with the same *production_group* thus
   define rules in the same scope.

   Inside of the production list, tokens implicitly refer to productions
   from the current group. You can refer to the production of another
   grammar by prefixing the token with its group name and a colon, e.g,
-   "``otherGroup:sum``". If the group of the token should not be shown in
+   "``other-group:sum``". If the group of the token should not be shown in
   the production, it can be prefixed by a tilde, e.g.,
-   "``~otherGroup:sum``". To refer to a production from an unnamed
+   "``~other-group:sum``". To refer to a production from an unnamed
   grammar, the token should be prefixed by a colon, e.g., "``:sum``".

   Outside of the production list,
-   if you have given a *productionGroup* argument you must prefix the
+   if you have given a *production_group* argument you must prefix the
   token name in the cross-reference with the group name and a colon,
-   e.g., "``myGroup:sum``" instead of just "``sum``".
+   e.g., "``my_group:sum``" instead of just "``sum``".
   If the group should not be shown in the title of the link either
-   an explicit title can be given (e.g., "``myTitle <myGroup:sum>``"),
-   or the target can be prefixed with a tilde (e.g., "``~myGroup:sum``").
+   an explicit title can be given (e.g., "``myTitle <my_group:sum>``"),
+   or the target can be prefixed with a tilde (e.g., "``~my_group:sum``").

   Note that no further reStructuredText parsing is done in the production,
   so that you don't have to escape ``*`` or ``|`` characters.
--- a/sphinx/domains/std/init.py
+++ b/sphinx/domains/std/init.py
@ -22,7 +22,7 @@ from sphinx.util.nodes import clean_astext, make_id, make_refnode
 from sphinx.util.parsing import nested_parse_to_nodes

 if TYPE_CHECKING:
-    from collections.abc import Callable, Iterable, Iterator, Set
+    from collections.abc import Callable, Iterable, Iterator, Sequence, Set
    from typing import Any, ClassVar, Final

    from docutils.nodes import Element, Node, system_message
@ -597,42 +597,92 @@ class ProductionList(SphinxDirective):
    option_spec: ClassVar[OptionSpec] = {}

    def run(self) -> list[Node]:
-        domain = self.env.domains.standard_domain
-        node: Element = addnodes.productionlist()
+        node = addnodes.productionlist()
        self.set_source_info(node)
        # The backslash handling is from ObjectDescription.get_signatures
        nl_escape_re = re.compile(r'\\\n')
        lines = nl_escape_re.sub('', self.arguments[0]).split('\n')
-
-        production_group = ''
-        first_rule_seen = False
-        for rule in lines:
-            if not first_rule_seen and ':' not in rule:
-                production_group = rule.strip()
-                continue
-            first_rule_seen = True
-            try:
-                name, tokens = rule.split(':', 1)
-            except ValueError:
-                break
-            subnode = addnodes.production(rule)
-            name = name.strip()
-            subnode['tokenname'] = name
-            if subnode['tokenname']:
-                prefix = 'grammar-token-%s' % production_group
-                node_id = make_id(self.env, self.state.document, prefix, name)
-                subnode['ids'].append(node_id)
-                self.state.document.note_implicit_target(subnode, subnode)
-
-                if len(production_group) != 0:
-                    obj_name = f'{production_group}:{name}'
-                else:
-                    obj_name = name
-                domain.note_object('token', obj_name, node_id, location=node)
-            subnode.extend(token_xrefs(tokens, production_group=production_group))
-            node.append(subnode)
+        production_group = self.production_group(lines, self.options)
+        production_lines = list(self.production_definitions(lines))
+        max_len = max(len(name) for _, name, _ in production_lines)
+        node_location = self.get_location()
+        node += [
+            self.make_production(
+                rawsource=rule,
+                name=name,
+                tokens=tokens,
+                production_group=production_group,
+                max_len=max_len,
+                location=node_location,
+            )
+            for rule, name, tokens in production_lines
+        ]
        return [node]

+    @staticmethod
+    def production_group(lines: Sequence[str], options: dict[str, Any]) -> str:  # NoQA: ARG004
+        # get production_group
+        if not lines or ':' in lines[0]:
+            return ''
+        production_group = lines[0].strip()
+        lines[:] = lines[1:]
+        return production_group
+
+    @staticmethod
+    def production_definitions(lines: Iterable[str]) -> Iterator[tuple[str, str, str]]:
+        """Yield triples of rawsource, name, definition."""
+        for line in lines:
+            if ':' not in line:
+                break
+            name, _, tokens = line.partition(':')
+            yield line, name.strip(), tokens.strip()
+
+    def make_production(
+        self,
+        rawsource: str,
+        name: str,
+        tokens: str,
+        production_group: str,
+        max_len: int,
+        location: str,
+    ) -> addnodes.production:
+        production_node = addnodes.production(rawsource, tokenname=name)
+        if name:
+            production_node += self.make_target(name, production_group, location)
+        else:
+            production_node += self.continuation_padding(max_len)
+        production_node.append(self.production_separator(name, max_len))
+        production_node += token_xrefs(tokens, production_group=production_group)
+        production_node.append(nodes.Text('\n'))
+        return production_node
+
+    def make_target(
+        self,
+        name: str,
+        production_group: str,
+        location: str,
+    ) -> addnodes.literal_strong:
+        """Make a link target for the given production."""
+        name_node = addnodes.literal_strong(name, name)
+        prefix = f'grammar-token-{production_group}'
+        node_id = make_id(self.env, self.state.document, prefix, name)
+        name_node['ids'].append(node_id)
+        self.state.document.note_implicit_target(name_node, name_node)
+        obj_name = f'{production_group}:{name}' if production_group else name
+        std = self.env.domains.standard_domain
+        std.note_object('token', obj_name, node_id, location=location)
+        return name_node
+
+    @staticmethod
+    def continuation_padding(max_len: int) -> nodes.Text:
+        return nodes.Text(' ' * max_len)
+
+    @staticmethod
+    def production_separator(name: str, max_len: int) -> nodes.Text:
+        if name:
+            return nodes.Text(' ::= '.rjust(max_len - len(name) + 5))
+        return nodes.Text('     ')
+

 class TokenXRefRole(XRefRole):
    def process_link(
--- a/sphinx/writers/html5.py
+++ b/sphinx/writers/html5.py
@ -5,7 +5,7 @@ from __future__ import annotations
 import posixpath
 import re
 import urllib.parse
-from typing import TYPE_CHECKING, cast
+from typing import TYPE_CHECKING

 from docutils import nodes
 from docutils.writers.html5_polyglot import HTMLTranslator as BaseTranslator
@ -17,8 +17,6 @@ from sphinx.util.docutils import SphinxTranslator
 from sphinx.util.images import get_image_size

 if TYPE_CHECKING:
-    from collections.abc import Iterable
-
    from docutils.nodes import Element, Node, Text

    from sphinx.builders import Builder
@ -695,23 +693,9 @@ class HTML5Translator(SphinxTranslator, BaseTranslator):  # type: ignore[misc]

    def visit_productionlist(self, node: Element) -> None:
        self.body.append(self.starttag(node, 'pre'))
-        productionlist = cast('Iterable[addnodes.production]', node)
-        maxlen = max(len(production['tokenname']) for production in productionlist)
-        lastname = None
-        for production in productionlist:
-            if production['tokenname']:
-                lastname = production['tokenname'].ljust(maxlen)
-                self.body.append(self.starttag(production, 'strong', ''))
-                self.body.append(lastname + '</strong> ::= ')
-            elif lastname is not None:
-                self.body.append(' ' * (maxlen + 5))
-            production.walkabout(self)
-            self.body.append('\n')
-        self.body.append('</pre>\n')
-        raise nodes.SkipNode

    def depart_productionlist(self, node: Element) -> None:
-        pass
+        self.body.append('</pre>\n')

    def visit_production(self, node: Element) -> None:
        pass
--- a/sphinx/writers/latex.py
+++ b/sphinx/writers/latex.py
@ -323,7 +323,7 @@ class LaTeXTranslator(SphinxTranslator):

        # flags
        self.in_title = 0
-        self.in_production_list = 0
+        self.in_production_list = False
        self.in_footnote = 0
        self.in_caption = 0
        self.in_term = 0
@ -671,20 +671,25 @@ class LaTeXTranslator(SphinxTranslator):
    def visit_productionlist(self, node: Element) -> None:
        self.body.append(BLANKLINE)
        self.body.append(r'\begin{productionlist}' + CR)
-        self.in_production_list = 1
+        self.in_production_list = True

    def depart_productionlist(self, node: Element) -> None:
        self.body.append(r'\end{productionlist}' + BLANKLINE)
-        self.in_production_list = 0
+        self.in_production_list = False

    def visit_production(self, node: Element) -> None:
        if node['tokenname']:
            tn = node['tokenname']
-            self.body.append(self.hypertarget('grammar-token-' + tn))
+            self.body.append(self.hypertarget(f'grammar-token-{tn}'))
            self.body.append(r'\production{%s}{' % self.encode(tn))
        else:
            self.body.append(r'\productioncont{')

+        # remove name/padding and seperator child nodes,
+        # these are handled by '\production' and '\productioncont'
+        # TODO: remove special LaTeX handling of production nodes
+        node[:] = node[2:]
+
    def depart_production(self, node: Element) -> None:
        self.body.append('}' + CR)

@ -2070,9 +2075,13 @@ class LaTeXTranslator(SphinxTranslator):
        self.body.append('}')

    def visit_literal_strong(self, node: Element) -> None:
+        if self.in_production_list:
+            return
        self.body.append(r'\sphinxstyleliteralstrong{\sphinxupquote{')

    def depart_literal_strong(self, node: Element) -> None:
+        if self.in_production_list:
+            return
        self.body.append('}}')

    def visit_abbreviation(self, node: Element) -> None:
--- a/sphinx/writers/manpage.py
+++ b/sphinx/writers/manpage.py
@ -79,8 +79,6 @@ class ManualPageTranslator(SphinxTranslator, BaseTranslator):  # type: ignore[mi
    def __init__(self, document: nodes.document, builder: Builder) -> None:
        super().__init__(document, builder)

-        self.in_productionlist = 0
-
        # first title is the manpage title
        self.section_level = -1

@ -274,25 +272,10 @@ class ManualPageTranslator(SphinxTranslator, BaseTranslator):  # type: ignore[mi

    def visit_productionlist(self, node: Element) -> None:
        self.ensure_eol()
-        self.in_productionlist += 1
        self.body.append('.sp\n.nf\n')
-        productionlist = cast('Iterable[addnodes.production]', node)
-        maxlen = max(len(production['tokenname']) for production in productionlist)
-        lastname = None
-        for production in productionlist:
-            if production['tokenname']:
-                lastname = production['tokenname'].ljust(maxlen)
-                self.body.append(self.defs['strong'][0])
-                self.body.append(self.deunicode(lastname))
-                self.body.append(self.defs['strong'][1])
-                self.body.append(' ::= ')
-            elif lastname is not None:
-                self.body.append(' ' * (maxlen + 5))
-            production.walkabout(self)
-            self.body.append('\n')
+
+    def depart_productionlist(self, node: Element) -> None:
        self.body.append('\n.fi\n')
-        self.in_productionlist -= 1
-        raise nodes.SkipNode

    def visit_production(self, node: Element) -> None:
        pass
--- a/sphinx/writers/texinfo.py
+++ b/sphinx/writers/texinfo.py
@ -189,6 +189,7 @@ class TexinfoTranslator(SphinxTranslator):
        self.escape_hyphens = 0
        self.curfilestack: list[str] = []
        self.footnotestack: list[dict[str, list[collected_footnote | bool]]] = []
+        self.in_production_list = False
        self.in_footnote = 0
        self.in_samp = 0
        self.handled_abbrs: set[str] = set()
@ -1308,20 +1309,11 @@ class TexinfoTranslator(SphinxTranslator):

    def visit_productionlist(self, node: Element) -> None:
        self.visit_literal_block(None)
-        productionlist = cast('Iterable[addnodes.production]', node)
-        maxlen = max(len(production['tokenname']) for production in productionlist)
+        self.in_production_list = True

-        for production in productionlist:
-            if production['tokenname']:
-                for id in production.get('ids'):
-                    self.add_anchor(id, production)
-                s = production['tokenname'].ljust(maxlen) + ' ::='
-            else:
-                s = ' ' * (maxlen + 4)
-            self.body.append(self.escape(s))
-            self.body.append(self.escape(production.astext() + '\n'))
+    def depart_productionlist(self, node: Element) -> None:
+        self.in_production_list = False
        self.depart_literal_block(None)
-        raise nodes.SkipNode

    def visit_production(self, node: Element) -> None:
        pass
@ -1336,9 +1328,15 @@ class TexinfoTranslator(SphinxTranslator):
        self.body.append('}')

    def visit_literal_strong(self, node: Element) -> None:
+        if self.in_production_list:
+            for id_ in node['ids']:
+                self.add_anchor(id_, node)
+            return
        self.body.append('@code{')

    def depart_literal_strong(self, node: Element) -> None:
+        if self.in_production_list:
+            return
        self.body.append('}')

    def visit_index(self, node: Element) -> None:
--- a/sphinx/writers/text.py
+++ b/sphinx/writers/text.py
@ -408,6 +408,7 @@ class TextTranslator(SphinxTranslator):
        self.sectionlevel = 0
        self.lineblocklevel = 0
        self.table: Table
+        self.in_production_list = False

        self.context: list[str] = []
        """Heterogeneous stack.
@ -787,18 +788,17 @@ class TextTranslator(SphinxTranslator):

    def visit_productionlist(self, node: Element) -> None:
        self.new_state()
-        productionlist = cast('Iterable[addnodes.production]', node)
-        maxlen = max(len(production['tokenname']) for production in productionlist)
-        lastname = None
-        for production in productionlist:
-            if production['tokenname']:
-                self.add_text(production['tokenname'].ljust(maxlen) + ' ::=')
-                lastname = production['tokenname']
-            elif lastname is not None:
-                self.add_text(' ' * (maxlen + 4))
-            self.add_text(production.astext() + self.nl)
+        self.in_production_list = True
+
+    def depart_productionlist(self, node: Element) -> None:
+        self.in_production_list = False
        self.end_state(wrap=False)
-        raise nodes.SkipNode
+
+    def visit_production(self, node: Element) -> None:
+        pass
+
+    def depart_production(self, node: Element) -> None:
+        pass

    def visit_footnote(self, node: Element) -> None:
        label = cast('nodes.label', node[0])
@ -1224,9 +1224,13 @@ class TextTranslator(SphinxTranslator):
        self.add_text('**')

    def visit_literal_strong(self, node: Element) -> None:
+        if self.in_production_list:
+            return
        self.add_text('**')

    def depart_literal_strong(self, node: Element) -> None:
+        if self.in_production_list:
+            return
        self.add_text('**')

    def visit_abbreviation(self, node: Element) -> None:
@ -1249,9 +1253,13 @@ class TextTranslator(SphinxTranslator):
        self.add_text('*')

    def visit_literal(self, node: Element) -> None:
+        if self.in_production_list:
+            return
        self.add_text('"')

    def depart_literal(self, node: Element) -> None:
+        if self.in_production_list:
+            return
        self.add_text('"')

    def visit_subscript(self, node: Element) -> None:
--- a/tests/test_directives/test_directive_productionlist.py
+++ b/tests/test_directives/test_directive_productionlist.py
@ -78,7 +78,7 @@ def test_productionlist(app: SphinxTestApp) -> None:
    ]

    text = (app.outdir / 'LineContinuation.html').read_text(encoding='utf8')
-    assert 'A</strong> ::=  B C D    E F G' in text
+    assert 'A</strong> ::= B C D    E F G' in text


@pytest.mark.sphinx('html', testroot='root')
@ -140,14 +140,14 @@ def test_productionlist_continuation_lines(
    _, _, content = content.partition('<pre>')
    content, _, _ = content.partition('</pre>')
    expected = """
-<strong id="grammar-token-python-grammar-assignment_stmt">assignment_stmt</strong> ::=  (<a class="reference internal" href="#grammar-token-python-grammar-target_list"><code class="xref docutils literal notranslate"><span class="pre">target_list</span></code></a> &quot;=&quot;)+ (<code class="xref docutils literal notranslate"><span class="pre">starred_expression</span></code> | <code class="xref docutils literal notranslate"><span class="pre">yield_expression</span></code>)
-<strong id="grammar-token-python-grammar-target_list">target_list    </strong> ::=  <a class="reference internal" href="#grammar-token-python-grammar-target"><code class="xref docutils literal notranslate"><span class="pre">target</span></code></a> (&quot;,&quot; <a class="reference internal" href="#grammar-token-python-grammar-target"><code class="xref docutils literal notranslate"><span class="pre">target</span></code></a>)* [&quot;,&quot;]
-<strong id="grammar-token-python-grammar-target">target         </strong> ::=  <code class="xref docutils literal notranslate"><span class="pre">identifier</span></code>
-                     | &quot;(&quot; [<a class="reference internal" href="#grammar-token-python-grammar-target_list"><code class="xref docutils literal notranslate"><span class="pre">target_list</span></code></a>] &quot;)&quot;
-                     | &quot;[&quot; [<a class="reference internal" href="#grammar-token-python-grammar-target_list"><code class="xref docutils literal notranslate"><span class="pre">target_list</span></code></a>] &quot;]&quot;
-                     | <code class="xref docutils literal notranslate"><span class="pre">attributeref</span></code>
-                     | <code class="xref docutils literal notranslate"><span class="pre">subscription</span></code>
-                     | <code class="xref docutils literal notranslate"><span class="pre">slicing</span></code>
-                     | &quot;*&quot; <a class="reference internal" href="#grammar-token-python-grammar-target"><code class="xref docutils literal notranslate"><span class="pre">target</span></code></a>
+<strong id="grammar-token-python-grammar-assignment_stmt">assignment_stmt</strong> ::= (<a class="reference internal" href="#grammar-token-python-grammar-target_list"><code class="xref docutils literal notranslate"><span class="pre">target_list</span></code></a> &quot;=&quot;)+ (<code class="xref docutils literal notranslate"><span class="pre">starred_expression</span></code> | <code class="xref docutils literal notranslate"><span class="pre">yield_expression</span></code>)
+<strong id="grammar-token-python-grammar-target_list">target_list</strong>     ::= <a class="reference internal" href="#grammar-token-python-grammar-target"><code class="xref docutils literal notranslate"><span class="pre">target</span></code></a> (&quot;,&quot; <a class="reference internal" href="#grammar-token-python-grammar-target"><code class="xref docutils literal notranslate"><span class="pre">target</span></code></a>)* [&quot;,&quot;]
+<strong id="grammar-token-python-grammar-target">target</strong>          ::= <code class="xref docutils literal notranslate"><span class="pre">identifier</span></code>
+                    | &quot;(&quot; [<a class="reference internal" href="#grammar-token-python-grammar-target_list"><code class="xref docutils literal notranslate"><span class="pre">target_list</span></code></a>] &quot;)&quot;
+                    | &quot;[&quot; [<a class="reference internal" href="#grammar-token-python-grammar-target_list"><code class="xref docutils literal notranslate"><span class="pre">target_list</span></code></a>] &quot;]&quot;
+                    | <code class="xref docutils literal notranslate"><span class="pre">attributeref</span></code>
+                    | <code class="xref docutils literal notranslate"><span class="pre">subscription</span></code>
+                    | <code class="xref docutils literal notranslate"><span class="pre">slicing</span></code>
+                    | &quot;*&quot; <a class="reference internal" href="#grammar-token-python-grammar-target"><code class="xref docutils literal notranslate"><span class="pre">target</span></code></a>
 """
    assert content == expected