Skip to content

Commit 443c0cd

Browse files
AA-Turnerencukoublaisep
authored
gh-127833: Use productionlist nodes to implement the grammar-snippet directive (#130376)
Co-authored-by: Petr Viktorin <[email protected]> Co-authored-by: Blaise Pabon <[email protected]>
1 parent 86d5fa9 commit 443c0cd

File tree

2 files changed

+486
-417
lines changed

2 files changed

+486
-417
lines changed

Doc/tools/extensions/grammar_snippet.py

+112-70
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
from sphinx.util.nodes import make_id
1414

1515
if TYPE_CHECKING:
16-
from collections.abc import Sequence
17-
from typing import Any
16+
from collections.abc import Iterable, Iterator, Sequence
17+
from typing import Any, Final
1818

1919
from docutils.nodes import Node
2020
from sphinx.application import Sphinx
@@ -41,98 +41,140 @@ class GrammarSnippetBase(SphinxDirective):
4141

4242
# The option/argument handling is left to the individual classes.
4343

44+
grammar_re: Final = re.compile(
45+
r"""
46+
(?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
47+
(?=:) # ... followed by a colon
48+
|
49+
(?P<rule_ref>`[^\s`]+`) # identifier in backquotes
50+
|
51+
(?P<single_quoted>'[^']*') # string in 'quotes'
52+
|
53+
(?P<double_quoted>"[^"]*") # string in "quotes"
54+
""",
55+
re.VERBOSE,
56+
)
57+
4458
def make_grammar_snippet(
4559
self, options: dict[str, Any], content: Sequence[str]
46-
) -> list[nodes.paragraph]:
60+
) -> list[addnodes.productionlist]:
4761
"""Create a literal block from options & content."""
4862

4963
group_name = options['group']
50-
51-
# Docutils elements have a `rawsource` attribute that is supposed to be
52-
# set to the original ReST source.
53-
# Sphinx does the following with it:
54-
# - if it's empty, set it to `self.astext()`
55-
# - if it matches `self.astext()` when generating the output,
56-
# apply syntax highlighting (which is based on the plain-text content
57-
# and thus discards internal formatting, like references).
58-
# To get around this, we set it to this non-empty string:
59-
rawsource = 'You should not see this.'
60-
61-
literal = nodes.literal_block(
62-
rawsource,
64+
node_location = self.get_location()
65+
production_nodes = []
66+
for rawsource, production_defs in self.production_definitions(content):
67+
production = self.make_production(
68+
rawsource,
69+
production_defs,
70+
group_name=group_name,
71+
location=node_location,
72+
)
73+
production_nodes.append(production)
74+
75+
node = addnodes.productionlist(
6376
'',
77+
*production_nodes,
78+
support_smartquotes=False,
6479
classes=['highlight'],
6580
)
81+
self.set_source_info(node)
82+
return [node]
6683

67-
grammar_re = re.compile(
68-
r"""
69-
(?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
70-
(?=:) # ... followed by a colon
71-
|
72-
(?P<rule_ref>`[^\s`]+`) # identifier in backquotes
73-
|
74-
(?P<single_quoted>'[^']*') # string in 'quotes'
75-
|
76-
(?P<double_quoted>"[^"]*") # string in "quotes"
77-
""",
78-
re.VERBOSE,
79-
)
80-
81-
for line in content:
84+
def production_definitions(
85+
self, lines: Iterable[str], /
86+
) -> Iterator[tuple[str, list[tuple[str, str]]]]:
87+
"""Yield pairs of rawsource and production content dicts."""
88+
production_lines: list[str] = []
89+
production_content: list[tuple[str, str]] = []
90+
for line in lines:
91+
# If this line is the start of a new rule (text in the column 1),
92+
# emit the current production and start a new one.
93+
if not line[:1].isspace():
94+
rawsource = '\n'.join(production_lines)
95+
production_lines.clear()
96+
if production_content:
97+
yield rawsource, production_content
98+
production_content = []
99+
100+
# Append the current line for the raw source
101+
production_lines.append(line)
102+
103+
# Parse the line into constituent parts
82104
last_pos = 0
83-
for match in grammar_re.finditer(line):
105+
for match in self.grammar_re.finditer(line):
84106
# Handle text between matches
85107
if match.start() > last_pos:
86-
literal += nodes.Text(line[last_pos : match.start()])
108+
unmatched_text = line[last_pos : match.start()]
109+
production_content.append(('text', unmatched_text))
87110
last_pos = match.end()
88111

89-
# Handle matches
90-
group_dict = {
91-
name: content
92-
for name, content in match.groupdict().items()
112+
# Handle matches.
113+
# After filtering None (non-matches), exactly one groupdict()
114+
# entry should remain.
115+
[(re_group_name, content)] = (
116+
(re_group_name, content)
117+
for re_group_name, content in match.groupdict().items()
93118
if content is not None
94-
}
95-
match group_dict:
96-
case {'rule_name': name}:
97-
literal += self.make_link_target_for_token(
98-
group_name, name
99-
)
100-
case {'rule_ref': ref_text}:
101-
literal += token_xrefs(ref_text, group_name)
102-
case {'single_quoted': name} | {'double_quoted': name}:
103-
literal += snippet_string_node('', name)
104-
case _:
105-
raise ValueError('unhandled match')
106-
literal += nodes.Text(line[last_pos:] + '\n')
107-
108-
node = nodes.paragraph(
109-
'',
110-
'',
111-
literal,
112-
)
119+
)
120+
production_content.append((re_group_name, content))
121+
production_content.append(('text', line[last_pos:] + '\n'))
113122

114-
return [node]
123+
# Emit the final production
124+
if production_content:
125+
rawsource = '\n'.join(production_lines)
126+
yield rawsource, production_content
115127

116-
def make_link_target_for_token(
117-
self, group_name: str, name: str
128+
def make_production(
129+
self,
130+
rawsource: str,
131+
production_defs: list[tuple[str, str]],
132+
*,
133+
group_name: str,
134+
location: str,
135+
) -> addnodes.production:
136+
"""Create a production node from a list of parts."""
137+
production_node = addnodes.production(rawsource)
138+
for re_group_name, content in production_defs:
139+
match re_group_name:
140+
case 'rule_name':
141+
production_node += self.make_name_target(
142+
name=content,
143+
production_group=group_name,
144+
location=location,
145+
)
146+
case 'rule_ref':
147+
production_node += token_xrefs(content, group_name)
148+
case 'single_quoted' | 'double_quoted':
149+
production_node += snippet_string_node('', content)
150+
case 'text':
151+
production_node += nodes.Text(content)
152+
case _:
153+
raise ValueError(f'unhandled match: {re_group_name!r}')
154+
return production_node
155+
156+
def make_name_target(
157+
self,
158+
*,
159+
name: str,
160+
production_group: str,
161+
location: str,
118162
) -> addnodes.literal_strong:
119-
"""Return a literal node which is a link target for the given token."""
120-
name_node = addnodes.literal_strong()
163+
"""Make a link target for the given production."""
121164

122165
# Cargo-culted magic to make `name_node` a link target
123166
# similar to Sphinx `production`.
124167
# This needs to be the same as what Sphinx does
125168
# to avoid breaking existing links.
126-
domain = self.env.domains['std']
127-
obj_name = f"{group_name}:{name}"
128-
prefix = f'grammar-token-{group_name}'
169+
170+
name_node = addnodes.literal_strong(name, name)
171+
prefix = f'grammar-token-{production_group}'
129172
node_id = make_id(self.env, self.state.document, prefix, name)
130173
name_node['ids'].append(node_id)
131174
self.state.document.note_implicit_target(name_node, name_node)
132-
domain.note_object('token', obj_name, node_id, location=name_node)
133-
134-
text_node = nodes.Text(name)
135-
name_node += text_node
175+
obj_name = f'{production_group}:{name}' if production_group else name
176+
std = self.env.domains.standard_domain
177+
std.note_object('token', obj_name, node_id, location=location)
136178
return name_node
137179

138180

@@ -168,7 +210,7 @@ class GrammarSnippetDirective(GrammarSnippetBase):
168210
optional_arguments = 1
169211
final_argument_whitespace = True
170212

171-
def run(self) -> list[nodes.paragraph]:
213+
def run(self) -> list[addnodes.productionlist]:
172214
return self.make_grammar_snippet(self.options, self.content)
173215

174216

@@ -187,7 +229,7 @@ class CompatProductionList(GrammarSnippetBase):
187229
final_argument_whitespace = True
188230
option_spec = {}
189231

190-
def run(self) -> list[nodes.paragraph]:
232+
def run(self) -> list[addnodes.productionlist]:
191233
# The "content" of a productionlist is actually the first and only
192234
# argument. The first line is the group; the rest is the content lines.
193235
lines = self.arguments[0].splitlines()

0 commit comments

Comments
 (0)