From c49d925b4d861d28cc11356846f687c64069887d Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 13 Feb 2025 00:16:26 +0000 Subject: [PATCH] Remove ``productionlist`` hard-coding in writers (#13326) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ``productionlist`` directive operates in a line-based context, creating an ``addnodes.productionlist`` container of ``addnodes.production`` nodes, with one per production in the directive. However, the full state of the abstract document tree is not included in the produced nodes, with each builder/translator implementing a different way of appending the fixed separator ``::=`` and justifying the displayed text. This should not happen in the writer, and hard-coding such details hampers flexibility when documenting different abstract grammars. We move the specific form of the ``.. productionlist::`` directive to the logic in the directive body and have the writers apply minimal custom logic. LaTeX changes written by Jean-François B. --- CHANGES.rst | 3 + doc/usage/restructuredtext/directives.rst | 76 +++++----- sphinx/domains/std/__init__.py | 135 +++++++++++++----- sphinx/texinputs/sphinxlatexobjects.sty | 39 +++-- sphinx/writers/html5.py | 20 +-- sphinx/writers/latex.py | 25 ++-- sphinx/writers/manpage.py | 21 +-- sphinx/writers/texinfo.py | 22 ++- sphinx/writers/text.py | 30 ++-- .../test_directive_productionlist.py | 20 +-- 10 files changed, 229 insertions(+), 162 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 4aedf0411..9f7b8f91e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -108,6 +108,9 @@ Features added Patch by Jakob Lykke Andersen and Adam Turner. * #11280: Add ability to skip a particular section using the ``no-search`` class. Patch by Will Lachance. +* #13326: Remove hardcoding from handling :class:`~sphinx.addnodes.productionlist` + nodes in all writers, to improve flexibility. + Patch by Adam Turner. Bugs fixed ---------- diff --git a/doc/usage/restructuredtext/directives.rst b/doc/usage/restructuredtext/directives.rst index ee085788e..33269b522 100644 --- a/doc/usage/restructuredtext/directives.rst +++ b/doc/usage/restructuredtext/directives.rst @@ -1642,49 +1642,51 @@ Grammar production displays --------------------------- Special markup is available for displaying the productions of a formal grammar. -The markup is simple and does not attempt to model all aspects of BNF (or any -derived forms), but provides enough to allow context-free grammars to be -displayed in a way that causes uses of a symbol to be rendered as hyperlinks to -the definition of the symbol. There is this directive: +The markup is simple and does not attempt to model all aspects of BNF_ +(or any derived forms), but provides enough to allow context-free grammars +to be displayed in a way that causes uses of a symbol to be rendered +as hyperlinks to the definition of the symbol. +There is this directive: -.. rst:directive:: .. productionlist:: [productionGroup] +.. _BNF: https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form - This directive is used to enclose a group of productions. Each production - is given on a single line and consists of a name, separated by a colon from - the following definition. If the definition spans multiple lines, each - continuation line must begin with a colon placed at the same column as in - the first line. +.. rst:directive:: .. productionlist:: [production_group] + + This directive is used to enclose a group of productions. + Each production is given on a single line and consists of a name, + separated by a colon from the following definition. + If the definition spans multiple lines, each continuation line + must begin with a colon placed at the same column as in the first line. Blank lines are not allowed within ``productionlist`` directive arguments. - The definition can contain token names which are marked as interpreted text - (e.g., "``sum ::= `integer` "+" `integer```") -- this generates - cross-references to the productions of these tokens. Outside of the - production list, you can reference to token productions using - :rst:role:`token`. + The optional *production_group* directive argument serves to distinguish + different sets of production lists that belong to different grammars. + Multiple production lists with the same *production_group* + thus define rules in the same scope. + This can also be used to split the description of a long or complex grammar + accross multiple ``productionlist`` directives with the same *production_group*. - The *productionGroup* argument to :rst:dir:`productionlist` serves to - distinguish different sets of production lists that belong to different - grammars. Multiple production lists with the same *productionGroup* thus - define rules in the same scope. + The definition can contain token names which are marked as interpreted text, + (e.g. "``sum ::= `integer` "+" `integer```"), + to generate cross-references to the productions of these tokens. + Such cross-references implicitly refer to productions from the current group. + To reference a production from another grammar, the token name + must be prefixed with the group name and a colon, e.g. "``other-group:sum``". + If the group of the token should not be shown in the production, + it can be prefixed by a tilde, e.g., "``~other-group:sum``". + To refer to a production from an unnamed grammar, + the token should be prefixed by a colon, e.g., "``:sum``". + No further reStructuredText parsing is done in the production, + so that special characters (``*``, ``|``, etc) do not need to be escaped. - Inside of the production list, tokens implicitly refer to productions - from the current group. You can refer to the production of another - grammar by prefixing the token with its group name and a colon, e.g, - "``otherGroup:sum``". If the group of the token should not be shown in - the production, it can be prefixed by a tilde, e.g., - "``~otherGroup:sum``". To refer to a production from an unnamed - grammar, the token should be prefixed by a colon, e.g., "``:sum``". - - Outside of the production list, - if you have given a *productionGroup* argument you must prefix the - token name in the cross-reference with the group name and a colon, - e.g., "``myGroup:sum``" instead of just "``sum``". - If the group should not be shown in the title of the link either - an explicit title can be given (e.g., "``myTitle ``"), - or the target can be prefixed with a tilde (e.g., "``~myGroup:sum``"). - - Note that no further reStructuredText parsing is done in the production, - so that you don't have to escape ``*`` or ``|`` characters. + Token productions can be cross-referenced outwith the production list + by using the :rst:role:`token` role. + If you have used a *production_group* argument, + the token name must be prefixed with the group name and a colon, + e.g., "``my_group:sum``" instead of just "``sum``". + Standard :ref:`cross-referencing modifiers ` + may be used with the ``:token:`` role, + such as custom link text and suppressing the group name with a tilde (``~``). The following is an example taken from the Python Reference Manual:: diff --git a/sphinx/domains/std/__init__.py b/sphinx/domains/std/__init__.py index f6f176deb..7909138ac 100644 --- a/sphinx/domains/std/__init__.py +++ b/sphinx/domains/std/__init__.py @@ -2,6 +2,7 @@ from __future__ import annotations +import operator import re from copy import copy from typing import TYPE_CHECKING, cast @@ -22,7 +23,7 @@ from sphinx.util.nodes import clean_astext, make_id, make_refnode from sphinx.util.parsing import nested_parse_to_nodes if TYPE_CHECKING: - from collections.abc import Callable, Iterable, Iterator, Set + from collections.abc import Callable, Iterable, Iterator, MutableSequence, Set from typing import Any, ClassVar, Final from docutils.nodes import Element, Node, system_message @@ -553,7 +554,7 @@ class Glossary(SphinxDirective): return [*messages, node] -def token_xrefs(text: str, production_group: str = '') -> list[Node]: +def token_xrefs(text: str, production_group: str = '') -> Iterable[Node]: if len(production_group) != 0: production_group += ':' retnodes: list[Node] = [] @@ -596,43 +597,107 @@ class ProductionList(SphinxDirective): final_argument_whitespace = True option_spec: ClassVar[OptionSpec] = {} + # The backslash handling is from ObjectDescription.get_signatures + _nl_escape_re: Final = re.compile(r'\\\n') + + # Get 'name' from triples of rawsource, name, definition (tokens) + _name_getter = operator.itemgetter(1) + def run(self) -> list[Node]: - domain = self.env.domains.standard_domain - node: Element = addnodes.productionlist() + name_getter = self._name_getter + lines = self._nl_escape_re.sub('', self.arguments[0]).splitlines() + + # Extract production_group argument. + # Must be before extracting production definition triples. + production_group = self.production_group(lines=lines, options=self.options) + production_lines = list(self.production_definitions(lines)) + max_name_len = max(map(len, map(name_getter, production_lines))) + node_location = self.get_location() + + productions = [ + self.make_production( + rawsource=rule, + name=name, + tokens=tokens, + production_group=production_group, + max_len=max_name_len, + location=node_location, + ) + for rule, name, tokens in production_lines + ] + node = addnodes.productionlist('', *productions) self.set_source_info(node) - # The backslash handling is from ObjectDescription.get_signatures - nl_escape_re = re.compile(r'\\\n') - lines = nl_escape_re.sub('', self.arguments[0]).split('\n') - - production_group = '' - first_rule_seen = False - for rule in lines: - if not first_rule_seen and ':' not in rule: - production_group = rule.strip() - continue - first_rule_seen = True - try: - name, tokens = rule.split(':', 1) - except ValueError: - break - subnode = addnodes.production(rule) - name = name.strip() - subnode['tokenname'] = name - if subnode['tokenname']: - prefix = 'grammar-token-%s' % production_group - node_id = make_id(self.env, self.state.document, prefix, name) - subnode['ids'].append(node_id) - self.state.document.note_implicit_target(subnode, subnode) - - if len(production_group) != 0: - obj_name = f'{production_group}:{name}' - else: - obj_name = name - domain.note_object('token', obj_name, node_id, location=node) - subnode.extend(token_xrefs(tokens, production_group=production_group)) - node.append(subnode) return [node] + @staticmethod + def production_group( + *, + lines: MutableSequence[str], + options: dict[str, Any], # NoQA: ARG004 + ) -> str: + # get production_group + if not lines or ':' in lines[0]: + return '' + production_group = lines[0].strip() + lines[:] = lines[1:] + return production_group + + @staticmethod + def production_definitions( + lines: Iterable[str], / + ) -> Iterator[tuple[str, str, str]]: + """Yield triples of rawsource, name, definition (tokens).""" + for line in lines: + if ':' not in line: + break + name, _, tokens = line.partition(':') + yield line, name.strip(), tokens.strip() + + def make_production( + self, + *, + rawsource: str, + name: str, + tokens: str, + production_group: str, + max_len: int, + location: str, + ) -> addnodes.production: + production_node = addnodes.production(rawsource, tokenname=name) + if name: + production_node += self.make_name_target( + name=name, production_group=production_group, location=location + ) + production_node.append(self.separator_node(name=name, max_len=max_len)) + production_node += token_xrefs(text=tokens, production_group=production_group) + production_node.append(nodes.Text('\n')) + return production_node + + def make_name_target( + self, + *, + name: str, + production_group: str, + location: str, + ) -> addnodes.literal_strong: + """Make a link target for the given production.""" + name_node = addnodes.literal_strong(name, name) + prefix = f'grammar-token-{production_group}' + node_id = make_id(self.env, self.state.document, prefix, name) + name_node['ids'].append(node_id) + self.state.document.note_implicit_target(name_node, name_node) + obj_name = f'{production_group}:{name}' if production_group else name + std = self.env.domains.standard_domain + std.note_object('token', obj_name, node_id, location=location) + return name_node + + @staticmethod + def separator_node(*, name: str, max_len: int) -> nodes.Text: + """Return seperator between 'name' and 'tokens'.""" + if name: + return nodes.Text(' ::= '.rjust(max_len - len(name) + 5)) + return nodes.Text(' ' * (max_len + 5)) + class TokenXRefRole(XRefRole): def process_link( diff --git a/sphinx/texinputs/sphinxlatexobjects.sty b/sphinx/texinputs/sphinxlatexobjects.sty index e960440ca..1147a0162 100644 --- a/sphinx/texinputs/sphinxlatexobjects.sty +++ b/sphinx/texinputs/sphinxlatexobjects.sty @@ -1,7 +1,7 @@ %% MODULE RELEASE DATA AND OBJECT DESCRIPTIONS % % change this info string if making any custom modification -\ProvidesPackage{sphinxlatexobjects}[2023/07/23 documentation environments] +\ProvidesPackage{sphinxlatexobjects}[2025/02/11 documentation environments] % Provides support for this output mark-up from Sphinx latex writer: % @@ -279,18 +279,37 @@ \newcommand{\pysigstopmultiline}{\sphinxsigismultilinefalse\itemsep\sphinxsignaturesep}% % Production lists +% This simply outputs the lines as is, in monospace font. Refers #13326. +% (the left padding for multi-line alignment is from the nodes themselves, +% and latex is configured below to obey such horizontal whitespace). +% +% - The legacy code used longtable and hardcoded the separator as ::= +% via dedicated macros defined by the environment itself. +% - Here the separator is part of the node. Any extra LaTeX mark-up would +% have to originate from the writer itself to decorate it. +% - The legacy code used strangely \parindent and \indent. Possibly +% (unchecked) due to an earlier tabular usage, but a longtable does not +% work in paragraph mode, so \parindent was without effect and +% \indent only caused some extra blank line above display. +% - The table had some whitespace on its left, which we imitate here via +% \parindent usage (which works in our context...). % \newenvironment{productionlist}{% -% \def\sphinxoptional##1{{\Large[}##1{\Large]}} - \def\production##1##2{\\\sphinxcode{\sphinxupquote{##1}}&::=&\sphinxcode{\sphinxupquote{##2}}}% - \def\productioncont##1{\\& &\sphinxcode{\sphinxupquote{##1}}}% - \parindent=2em - \indent - \setlength{\LTpre}{0pt}% - \setlength{\LTpost}{0pt}% - \begin{longtable}[l]{lcl} + \bigskip % imitate close enough legacy vertical whitespace, which was + % visibly excessive + \ttfamily % needed for space tokens to have same width as letters + \parindent1em % width of a "quad", font-dependent, usually circa width of 2 + % letters + \obeylines % line in = line out + \parskip\z@skip % prevent the parskip vertical whitespace between lines, + % which are technically to LaTeX now each its own paragraph + \@vobeyspaces % obey whitespace + % now a technicality to, only locally to this environment, prevent the + % suppression of indentation of first line, if it comes right after + % \section. Cf package indentfirst from which the code is borrowed. + \let\@afterindentfalse\@afterindenttrue\@afterindenttrue }{% - \end{longtable} + \par % does not hurt... } % Definition lists; requested by AMK for HOWTO documents. Probably useful diff --git a/sphinx/writers/html5.py b/sphinx/writers/html5.py index 86ab4f4c2..683eebbf8 100644 --- a/sphinx/writers/html5.py +++ b/sphinx/writers/html5.py @@ -5,7 +5,7 @@ from __future__ import annotations import posixpath import re import urllib.parse -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING from docutils import nodes from docutils.writers.html5_polyglot import HTMLTranslator as BaseTranslator @@ -17,8 +17,6 @@ from sphinx.util.docutils import SphinxTranslator from sphinx.util.images import get_image_size if TYPE_CHECKING: - from collections.abc import Iterable - from docutils.nodes import Element, Node, Text from sphinx.builders import Builder @@ -695,23 +693,9 @@ class HTML5Translator(SphinxTranslator, BaseTranslator): # type: ignore[misc] def visit_productionlist(self, node: Element) -> None: self.body.append(self.starttag(node, 'pre')) - productionlist = cast('Iterable[addnodes.production]', node) - maxlen = max(len(production['tokenname']) for production in productionlist) - lastname = None - for production in productionlist: - if production['tokenname']: - lastname = production['tokenname'].ljust(maxlen) - self.body.append(self.starttag(production, 'strong', '')) - self.body.append(lastname + ' ::= ') - elif lastname is not None: - self.body.append(' ' * (maxlen + 5)) - production.walkabout(self) - self.body.append('\n') - self.body.append('\n') - raise nodes.SkipNode def depart_productionlist(self, node: Element) -> None: - pass + self.body.append('\n') def visit_production(self, node: Element) -> None: pass diff --git a/sphinx/writers/latex.py b/sphinx/writers/latex.py index ee779ea7c..bfbdb268a 100644 --- a/sphinx/writers/latex.py +++ b/sphinx/writers/latex.py @@ -323,7 +323,7 @@ class LaTeXTranslator(SphinxTranslator): # flags self.in_title = 0 - self.in_production_list = 0 + self.in_production_list = False self.in_footnote = 0 self.in_caption = 0 self.in_term = 0 @@ -671,22 +671,20 @@ class LaTeXTranslator(SphinxTranslator): def visit_productionlist(self, node: Element) -> None: self.body.append(BLANKLINE) self.body.append(r'\begin{productionlist}' + CR) - self.in_production_list = 1 + self.in_production_list = True def depart_productionlist(self, node: Element) -> None: + self.in_production_list = False self.body.append(r'\end{productionlist}' + BLANKLINE) - self.in_production_list = 0 def visit_production(self, node: Element) -> None: - if node['tokenname']: - tn = node['tokenname'] - self.body.append(self.hypertarget('grammar-token-' + tn)) - self.body.append(r'\production{%s}{' % self.encode(tn)) - else: - self.body.append(r'\productioncont{') + # Nothing to do, the productionlist LaTeX environment + # is configured to render the nodes line-by-line + # But see also visit_literal_strong special clause. + pass def depart_production(self, node: Element) -> None: - self.body.append('}' + CR) + pass def visit_transition(self, node: Element) -> None: self.body.append(self.elements['transition']) @@ -2070,9 +2068,16 @@ class LaTeXTranslator(SphinxTranslator): self.body.append('}') def visit_literal_strong(self, node: Element) -> None: + if self.in_production_list: + ctx = [r'\phantomsection'] + ctx += [self.hypertarget(id_, anchor=False) for id_ in node['ids']] + self.body.append(''.join(ctx)) + return self.body.append(r'\sphinxstyleliteralstrong{\sphinxupquote{') def depart_literal_strong(self, node: Element) -> None: + if self.in_production_list: + return self.body.append('}}') def visit_abbreviation(self, node: Element) -> None: diff --git a/sphinx/writers/manpage.py b/sphinx/writers/manpage.py index 3fa4638c9..171761fa2 100644 --- a/sphinx/writers/manpage.py +++ b/sphinx/writers/manpage.py @@ -79,8 +79,6 @@ class ManualPageTranslator(SphinxTranslator, BaseTranslator): # type: ignore[mi def __init__(self, document: nodes.document, builder: Builder) -> None: super().__init__(document, builder) - self.in_productionlist = 0 - # first title is the manpage title self.section_level = -1 @@ -274,25 +272,10 @@ class ManualPageTranslator(SphinxTranslator, BaseTranslator): # type: ignore[mi def visit_productionlist(self, node: Element) -> None: self.ensure_eol() - self.in_productionlist += 1 self.body.append('.sp\n.nf\n') - productionlist = cast('Iterable[addnodes.production]', node) - maxlen = max(len(production['tokenname']) for production in productionlist) - lastname = None - for production in productionlist: - if production['tokenname']: - lastname = production['tokenname'].ljust(maxlen) - self.body.append(self.defs['strong'][0]) - self.body.append(self.deunicode(lastname)) - self.body.append(self.defs['strong'][1]) - self.body.append(' ::= ') - elif lastname is not None: - self.body.append(' ' * (maxlen + 5)) - production.walkabout(self) - self.body.append('\n') + + def depart_productionlist(self, node: Element) -> None: self.body.append('\n.fi\n') - self.in_productionlist -= 1 - raise nodes.SkipNode def visit_production(self, node: Element) -> None: pass diff --git a/sphinx/writers/texinfo.py b/sphinx/writers/texinfo.py index d3da34f10..5a7826a5d 100644 --- a/sphinx/writers/texinfo.py +++ b/sphinx/writers/texinfo.py @@ -189,6 +189,7 @@ class TexinfoTranslator(SphinxTranslator): self.escape_hyphens = 0 self.curfilestack: list[str] = [] self.footnotestack: list[dict[str, list[collected_footnote | bool]]] = [] + self.in_production_list = False self.in_footnote = 0 self.in_samp = 0 self.handled_abbrs: set[str] = set() @@ -1308,20 +1309,11 @@ class TexinfoTranslator(SphinxTranslator): def visit_productionlist(self, node: Element) -> None: self.visit_literal_block(None) - productionlist = cast('Iterable[addnodes.production]', node) - maxlen = max(len(production['tokenname']) for production in productionlist) + self.in_production_list = True - for production in productionlist: - if production['tokenname']: - for id in production.get('ids'): - self.add_anchor(id, production) - s = production['tokenname'].ljust(maxlen) + ' ::=' - else: - s = ' ' * (maxlen + 4) - self.body.append(self.escape(s)) - self.body.append(self.escape(production.astext() + '\n')) + def depart_productionlist(self, node: Element) -> None: + self.in_production_list = False self.depart_literal_block(None) - raise nodes.SkipNode def visit_production(self, node: Element) -> None: pass @@ -1336,9 +1328,15 @@ class TexinfoTranslator(SphinxTranslator): self.body.append('}') def visit_literal_strong(self, node: Element) -> None: + if self.in_production_list: + for id_ in node['ids']: + self.add_anchor(id_, node) + return self.body.append('@code{') def depart_literal_strong(self, node: Element) -> None: + if self.in_production_list: + return self.body.append('}') def visit_index(self, node: Element) -> None: diff --git a/sphinx/writers/text.py b/sphinx/writers/text.py index d712fd133..84ce9ccc7 100644 --- a/sphinx/writers/text.py +++ b/sphinx/writers/text.py @@ -408,6 +408,7 @@ class TextTranslator(SphinxTranslator): self.sectionlevel = 0 self.lineblocklevel = 0 self.table: Table + self.in_production_list = False self.context: list[str] = [] """Heterogeneous stack. @@ -787,18 +788,17 @@ class TextTranslator(SphinxTranslator): def visit_productionlist(self, node: Element) -> None: self.new_state() - productionlist = cast('Iterable[addnodes.production]', node) - maxlen = max(len(production['tokenname']) for production in productionlist) - lastname = None - for production in productionlist: - if production['tokenname']: - self.add_text(production['tokenname'].ljust(maxlen) + ' ::=') - lastname = production['tokenname'] - elif lastname is not None: - self.add_text(' ' * (maxlen + 4)) - self.add_text(production.astext() + self.nl) + self.in_production_list = True + + def depart_productionlist(self, node: Element) -> None: + self.in_production_list = False self.end_state(wrap=False) - raise nodes.SkipNode + + def visit_production(self, node: Element) -> None: + pass + + def depart_production(self, node: Element) -> None: + pass def visit_footnote(self, node: Element) -> None: label = cast('nodes.label', node[0]) @@ -1224,9 +1224,13 @@ class TextTranslator(SphinxTranslator): self.add_text('**') def visit_literal_strong(self, node: Element) -> None: + if self.in_production_list: + return self.add_text('**') def depart_literal_strong(self, node: Element) -> None: + if self.in_production_list: + return self.add_text('**') def visit_abbreviation(self, node: Element) -> None: @@ -1249,9 +1253,13 @@ class TextTranslator(SphinxTranslator): self.add_text('*') def visit_literal(self, node: Element) -> None: + if self.in_production_list: + return self.add_text('"') def depart_literal(self, node: Element) -> None: + if self.in_production_list: + return self.add_text('"') def visit_subscript(self, node: Element) -> None: diff --git a/tests/test_directives/test_directive_productionlist.py b/tests/test_directives/test_directive_productionlist.py index 2026730c6..b127936ca 100644 --- a/tests/test_directives/test_directive_productionlist.py +++ b/tests/test_directives/test_directive_productionlist.py @@ -78,7 +78,7 @@ def test_productionlist(app: SphinxTestApp) -> None: ] text = (app.outdir / 'LineContinuation.html').read_text(encoding='utf8') - assert 'A ::= B C D E F G' in text + assert 'A ::= B C D E F G' in text @pytest.mark.sphinx('html', testroot='root') @@ -140,14 +140,14 @@ def test_productionlist_continuation_lines( _, _, content = content.partition('
')
     content, _, _ = content.partition('
') expected = """ -assignment_stmt ::= (target_list "=")+ (starred_expression | yield_expression) -target_list ::= target ("," target)* [","] -target ::= identifier - | "(" [target_list] ")" - | "[" [target_list] "]" - | attributeref - | subscription - | slicing - | "*" target +assignment_stmt ::= (target_list "=")+ (starred_expression | yield_expression) +target_list ::= target ("," target)* [","] +target ::= identifier + | "(" [target_list] ")" + | "[" [target_list] "]" + | attributeref + | subscription + | slicing + | "*" target """ assert content == expected