13
13
from sphinx .util .nodes import make_id
14
14
15
15
if TYPE_CHECKING :
16
- from collections .abc import Sequence
17
- from typing import Any
16
+ from collections .abc import Iterable , Iterator , Sequence
17
+ from typing import Any , Final
18
18
19
19
from docutils .nodes import Node
20
20
from sphinx .application import Sphinx
@@ -41,98 +41,140 @@ class GrammarSnippetBase(SphinxDirective):
41
41
42
42
# The option/argument handling is left to the individual classes.
43
43
44
+ grammar_re : Final = re .compile (
45
+ r"""
46
+ (?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
47
+ (?=:) # ... followed by a colon
48
+ |
49
+ (?P<rule_ref>`[^\s`]+`) # identifier in backquotes
50
+ |
51
+ (?P<single_quoted>'[^']*') # string in 'quotes'
52
+ |
53
+ (?P<double_quoted>"[^"]*") # string in "quotes"
54
+ """ ,
55
+ re .VERBOSE ,
56
+ )
57
+
44
58
def make_grammar_snippet (
45
59
self , options : dict [str , Any ], content : Sequence [str ]
46
- ) -> list [nodes . paragraph ]:
60
+ ) -> list [addnodes . productionlist ]:
47
61
"""Create a literal block from options & content."""
48
62
49
63
group_name = options ['group' ]
50
-
51
- # Docutils elements have a `rawsource` attribute that is supposed to be
52
- # set to the original ReST source.
53
- # Sphinx does the following with it:
54
- # - if it's empty, set it to `self.astext()`
55
- # - if it matches `self.astext()` when generating the output,
56
- # apply syntax highlighting (which is based on the plain-text content
57
- # and thus discards internal formatting, like references).
58
- # To get around this, we set it to this non-empty string:
59
- rawsource = 'You should not see this.'
60
-
61
- literal = nodes .literal_block (
62
- rawsource ,
64
+ node_location = self .get_location ()
65
+ production_nodes = []
66
+ for rawsource , production_defs in self .production_definitions (content ):
67
+ production = self .make_production (
68
+ rawsource ,
69
+ production_defs ,
70
+ group_name = group_name ,
71
+ location = node_location ,
72
+ )
73
+ production_nodes .append (production )
74
+
75
+ node = addnodes .productionlist (
63
76
'' ,
77
+ * production_nodes ,
78
+ support_smartquotes = False ,
64
79
classes = ['highlight' ],
65
80
)
81
+ self .set_source_info (node )
82
+ return [node ]
66
83
67
- grammar_re = re .compile (
68
- r"""
69
- (?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
70
- (?=:) # ... followed by a colon
71
- |
72
- (?P<rule_ref>`[^\s`]+`) # identifier in backquotes
73
- |
74
- (?P<single_quoted>'[^']*') # string in 'quotes'
75
- |
76
- (?P<double_quoted>"[^"]*") # string in "quotes"
77
- """ ,
78
- re .VERBOSE ,
79
- )
80
-
81
- for line in content :
84
+ def production_definitions (
85
+ self , lines : Iterable [str ], /
86
+ ) -> Iterator [tuple [str , list [tuple [str , str ]]]]:
87
+ """Yield pairs of rawsource and production content dicts."""
88
+ production_lines : list [str ] = []
89
+ production_content : list [tuple [str , str ]] = []
90
+ for line in lines :
91
+ # If this line is the start of a new rule (text in the column 1),
92
+ # emit the current production and start a new one.
93
+ if not line [:1 ].isspace ():
94
+ rawsource = '\n ' .join (production_lines )
95
+ production_lines .clear ()
96
+ if production_content :
97
+ yield rawsource , production_content
98
+ production_content = []
99
+
100
+ # Append the current line for the raw source
101
+ production_lines .append (line )
102
+
103
+ # Parse the line into constituent parts
82
104
last_pos = 0
83
- for match in grammar_re .finditer (line ):
105
+ for match in self . grammar_re .finditer (line ):
84
106
# Handle text between matches
85
107
if match .start () > last_pos :
86
- literal += nodes .Text (line [last_pos : match .start ()])
108
+ unmatched_text = line [last_pos : match .start ()]
109
+ production_content .append (('text' , unmatched_text ))
87
110
last_pos = match .end ()
88
111
89
- # Handle matches
90
- group_dict = {
91
- name : content
92
- for name , content in match .groupdict ().items ()
112
+ # Handle matches.
113
+ # After filtering None (non-matches), exactly one groupdict()
114
+ # entry should remain.
115
+ [(re_group_name , content )] = (
116
+ (re_group_name , content )
117
+ for re_group_name , content in match .groupdict ().items ()
93
118
if content is not None
94
- }
95
- match group_dict :
96
- case {'rule_name' : name }:
97
- literal += self .make_link_target_for_token (
98
- group_name , name
99
- )
100
- case {'rule_ref' : ref_text }:
101
- literal += token_xrefs (ref_text , group_name )
102
- case {'single_quoted' : name } | {'double_quoted' : name }:
103
- literal += snippet_string_node ('' , name )
104
- case _:
105
- raise ValueError ('unhandled match' )
106
- literal += nodes .Text (line [last_pos :] + '\n ' )
107
-
108
- node = nodes .paragraph (
109
- '' ,
110
- '' ,
111
- literal ,
112
- )
119
+ )
120
+ production_content .append ((re_group_name , content ))
121
+ production_content .append (('text' , line [last_pos :] + '\n ' ))
113
122
114
- return [node ]
123
+ # Emit the final production
124
+ if production_content :
125
+ rawsource = '\n ' .join (production_lines )
126
+ yield rawsource , production_content
115
127
116
- def make_link_target_for_token (
117
- self , group_name : str , name : str
128
+ def make_production (
129
+ self ,
130
+ rawsource : str ,
131
+ production_defs : list [tuple [str , str ]],
132
+ * ,
133
+ group_name : str ,
134
+ location : str ,
135
+ ) -> addnodes .production :
136
+ """Create a production node from a list of parts."""
137
+ production_node = addnodes .production (rawsource )
138
+ for re_group_name , content in production_defs :
139
+ match re_group_name :
140
+ case 'rule_name' :
141
+ production_node += self .make_name_target (
142
+ name = content ,
143
+ production_group = group_name ,
144
+ location = location ,
145
+ )
146
+ case 'rule_ref' :
147
+ production_node += token_xrefs (content , group_name )
148
+ case 'single_quoted' | 'double_quoted' :
149
+ production_node += snippet_string_node ('' , content )
150
+ case 'text' :
151
+ production_node += nodes .Text (content )
152
+ case _:
153
+ raise ValueError (f'unhandled match: { re_group_name !r} ' )
154
+ return production_node
155
+
156
+ def make_name_target (
157
+ self ,
158
+ * ,
159
+ name : str ,
160
+ production_group : str ,
161
+ location : str ,
118
162
) -> addnodes .literal_strong :
119
- """Return a literal node which is a link target for the given token."""
120
- name_node = addnodes .literal_strong ()
163
+ """Make a link target for the given production."""
121
164
122
165
# Cargo-culted magic to make `name_node` a link target
123
166
# similar to Sphinx `production`.
124
167
# This needs to be the same as what Sphinx does
125
168
# to avoid breaking existing links.
126
- domain = self . env . domains [ 'std' ]
127
- obj_name = f" { group_name } : { name } "
128
- prefix = f'grammar-token-{ group_name } '
169
+
170
+ name_node = addnodes . literal_strong ( name , name )
171
+ prefix = f'grammar-token-{ production_group } '
129
172
node_id = make_id (self .env , self .state .document , prefix , name )
130
173
name_node ['ids' ].append (node_id )
131
174
self .state .document .note_implicit_target (name_node , name_node )
132
- domain .note_object ('token' , obj_name , node_id , location = name_node )
133
-
134
- text_node = nodes .Text (name )
135
- name_node += text_node
175
+ obj_name = f'{ production_group } :{ name } ' if production_group else name
176
+ std = self .env .domains .standard_domain
177
+ std .note_object ('token' , obj_name , node_id , location = location )
136
178
return name_node
137
179
138
180
@@ -168,7 +210,7 @@ class GrammarSnippetDirective(GrammarSnippetBase):
168
210
optional_arguments = 1
169
211
final_argument_whitespace = True
170
212
171
- def run (self ) -> list [nodes . paragraph ]:
213
+ def run (self ) -> list [addnodes . productionlist ]:
172
214
return self .make_grammar_snippet (self .options , self .content )
173
215
174
216
@@ -187,7 +229,7 @@ class CompatProductionList(GrammarSnippetBase):
187
229
final_argument_whitespace = True
188
230
option_spec = {}
189
231
190
- def run (self ) -> list [nodes . paragraph ]:
232
+ def run (self ) -> list [addnodes . productionlist ]:
191
233
# The "content" of a productionlist is actually the first and only
192
234
# argument. The first line is the group; the rest is the content lines.
193
235
lines = self .arguments [0 ].splitlines ()
0 commit comments