1
1
//! A parser of the ENBF-like grammar.
2
2
3
3
use super :: { Characters , Expression , ExpressionKind , Grammar , Production } ;
4
- use regex:: { Captures , Regex } ;
5
4
use std:: fmt;
6
5
use std:: fmt:: Display ;
7
6
use std:: path:: Path ;
8
- use std:: sync:: LazyLock ;
9
7
10
8
struct Parser < ' a > {
11
9
input : & ' a str ,
12
10
index : usize ,
13
11
}
14
12
13
+ #[ derive( Debug ) ]
15
14
pub struct Error {
16
15
message : String ,
17
16
line : String ,
@@ -76,18 +75,6 @@ impl Parser<'_> {
76
75
& self . input [ i..i + upper]
77
76
}
78
77
79
- /// If the input matches the given regex, it is returned and the head is moved forward.
80
- ///
81
- /// Note that regexes must start with `^`.
82
- fn take_re ( & mut self , re : & Regex ) -> Option < Captures < ' _ > > {
83
- if let Some ( cap) = re. captures ( & self . input [ self . index ..] ) {
84
- self . index += cap[ 0 ] . len ( ) ;
85
- Some ( cap)
86
- } else {
87
- None
88
- }
89
- }
90
-
91
78
/// Returns whether or not the given string is next, and advances the head if it is.
92
79
fn take_str ( & mut self , s : & str ) -> bool {
93
80
if self . input [ self . index ..] . starts_with ( s) {
@@ -168,13 +155,12 @@ impl Parser<'_> {
168
155
}
169
156
170
157
fn parse_expression ( & mut self ) -> Result < Option < Expression > > {
171
- static ALT_RE : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( r"^ *\| *" ) . unwrap ( ) ) ;
172
-
173
158
let mut es = Vec :: new ( ) ;
174
159
loop {
175
160
let Some ( e) = self . parse_seq ( ) ? else { break } ;
176
161
es. push ( e) ;
177
- if self . take_re ( & ALT_RE ) . is_none ( ) {
162
+ _ = self . space0 ( ) ;
163
+ if !self . take_str ( "|" ) {
178
164
break ;
179
165
}
180
166
}
@@ -268,21 +254,28 @@ impl Parser<'_> {
268
254
Some ( ExpressionKind :: Nt ( nt) )
269
255
}
270
256
257
+ /// Parse terminal within backticks.
271
258
fn parse_terminal ( & mut self ) -> Result < ExpressionKind > {
272
- static TERMINAL_RE : LazyLock < Regex > =
273
- LazyLock :: new ( || Regex :: new ( r"^`([^`\n]+)`" ) . unwrap ( ) ) ;
274
- match self . take_re ( & TERMINAL_RE ) {
275
- Some ( cap) => Ok ( ExpressionKind :: Terminal ( cap[ 1 ] . to_string ( ) ) ) ,
276
- None => bail ! ( self , "unterminated terminal, expected closing backtick" ) ,
259
+ Ok ( ExpressionKind :: Terminal ( self . parse_terminal_str ( ) ?) )
260
+ }
261
+
262
+ /// Parse string within backticks.
263
+ fn parse_terminal_str ( & mut self ) -> Result < String > {
264
+ self . expect ( "`" , "expected opening backtick" ) ?;
265
+ let term = self . take_while ( & |x| ![ '\n' , '`' ] . contains ( & x) ) . to_string ( ) ;
266
+ if term. is_empty ( ) {
267
+ bail ! ( self , "expected terminal" ) ;
277
268
}
269
+ self . expect ( "`" , "expected closing backtick" ) ?;
270
+ Ok ( term)
278
271
}
279
272
280
273
fn parse_charset ( & mut self ) -> Result < ExpressionKind > {
281
274
self . expect ( "[" , "expected opening [" ) ?;
282
275
let mut characters = Vec :: new ( ) ;
283
276
loop {
284
277
self . space0 ( ) ;
285
- let Some ( ch) = self . parse_characters ( ) else {
278
+ let Some ( ch) = self . parse_characters ( ) ? else {
286
279
break ;
287
280
} ;
288
281
characters. push ( ch) ;
@@ -295,27 +288,49 @@ impl Parser<'_> {
295
288
Ok ( ExpressionKind :: Charset ( characters) )
296
289
}
297
290
298
- fn parse_characters ( & mut self ) -> Option < Characters > {
299
- static RANGE_RE : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( r"^`(.)`-`(.)`" ) . unwrap ( ) ) ;
300
- static TERMINAL_RE : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( "^`([^`\n ]+)`" ) . unwrap ( ) ) ;
301
- if let Some ( cap) = self . take_re ( & RANGE_RE ) {
302
- let a = cap[ 1 ] . chars ( ) . next ( ) . unwrap ( ) ;
303
- let b = cap[ 2 ] . chars ( ) . next ( ) . unwrap ( ) ;
304
- Some ( Characters :: Range ( a, b) )
305
- } else if let Some ( cap) = self . take_re ( & TERMINAL_RE ) {
306
- Some ( Characters :: Terminal ( cap[ 1 ] . to_string ( ) ) )
291
+ /// Parse an element of a character class, e.g.
292
+ /// `` `a`-`b` `` | `` `term` `` | `` NonTerminal ``.
293
+ fn parse_characters ( & mut self ) -> Result < Option < Characters > > {
294
+ if let Some ( b'`' ) = self . peek ( ) {
295
+ let recov = self . index ;
296
+ let a = self . parse_terminal_str ( ) ?;
297
+ if let Some ( b'-' ) = self . peek ( ) {
298
+ //~^ Parse `` `a`-`b` `` character range.
299
+ if a. len ( ) > 1 {
300
+ self . index = recov + 1 ;
301
+ bail ! ( self , "invalid start terminal in range" ) ;
302
+ }
303
+ self . expect ( "-" , "expected `-`" ) . unwrap ( ) ;
304
+ let recov = self . index ;
305
+ let b = self . parse_terminal_str ( ) ?;
306
+ if b. len ( ) > 1 {
307
+ self . index = recov + 1 ;
308
+ bail ! ( self , "invalid end terminal in range" ) ;
309
+ }
310
+ let a = a. chars ( ) . next ( ) . unwrap ( ) ;
311
+ let b = b. chars ( ) . next ( ) . unwrap ( ) ;
312
+ Ok ( Some ( Characters :: Range ( a, b) ) )
313
+ } else {
314
+ //~^ Parse terminal in backticks.
315
+ Ok ( Some ( Characters :: Terminal ( a) ) )
316
+ }
317
+ } else if let Some ( name) = self . parse_name ( ) {
318
+ //~^ Parse nonterminal identifier.
319
+ Ok ( Some ( Characters :: Named ( name) ) )
307
320
} else {
308
- let name = self . parse_name ( ) ?;
309
- Some ( Characters :: Named ( name) )
321
+ Ok ( None )
310
322
}
311
323
}
312
324
325
+ /// Parse e.g. `<prose text>`.
313
326
fn parse_prose ( & mut self ) -> Result < ExpressionKind > {
314
- static PROSE_RE : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( r"^<([^>\n]+)>" ) . unwrap ( ) ) ;
315
- match self . take_re ( & PROSE_RE ) {
316
- Some ( cap ) => Ok ( ExpressionKind :: Prose ( cap [ 1 ] . to_string ( ) ) ) ,
317
- None => bail ! ( self , "unterminated prose, expected closing `>`" ) ,
327
+ self . expect ( "<" , "expected opening `<`" ) ? ;
328
+ let text = self . take_while ( & |x| ! [ '\n' , '>' ] . contains ( & x ) ) . to_string ( ) ;
329
+ if text . is_empty ( ) {
330
+ bail ! ( self , "expected prose text" ) ;
318
331
}
332
+ self . expect ( ">" , "expected closing `>`" ) ?;
333
+ Ok ( ExpressionKind :: Prose ( text) )
319
334
}
320
335
321
336
fn parse_grouped ( & mut self ) -> Result < ExpressionKind > {
@@ -344,13 +359,19 @@ impl Parser<'_> {
344
359
Ok ( ExpressionKind :: NegExpression ( box_kind ( kind) ) )
345
360
}
346
361
362
+ /// Parse e.g. `F00F` after `U+`.
347
363
fn parse_unicode ( & mut self ) -> Result < ExpressionKind > {
348
- static UNICODE_RE : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( r"^[A-Z0-9]{4}" ) . unwrap ( ) ) ;
349
-
350
- match self . take_re ( & UNICODE_RE ) {
351
- Some ( s) => Ok ( ExpressionKind :: Unicode ( s[ 0 ] . to_string ( ) ) ) ,
352
- None => bail ! ( self , "expected 4 hexadecimal uppercase digits after U+" ) ,
364
+ let mut xs = Vec :: with_capacity ( 4 ) ;
365
+ for _ in 0 ..4 {
366
+ match self . peek ( ) {
367
+ Some ( x @ ( b'0' ..=b'9' | b'A' ..=b'F' ) ) => {
368
+ xs. push ( x) ;
369
+ self . index += 1 ;
370
+ }
371
+ _ => bail ! ( self , "expected 4 uppercase hexidecimal digits after `U+`" ) ,
372
+ }
353
373
}
374
+ Ok ( ExpressionKind :: Unicode ( String :: from_utf8 ( xs) . unwrap ( ) ) )
354
375
}
355
376
356
377
/// Parse `?` after expression.
@@ -428,16 +449,17 @@ impl Parser<'_> {
428
449
Ok ( Some ( self . input [ start..self . index - 1 ] . to_string ( ) ) )
429
450
}
430
451
452
+ /// Parse footnote reference, e.g. `[^id]`.
431
453
fn parse_footnote ( & mut self ) -> Result < Option < String > > {
432
- static FOOTNOTE_RE : LazyLock < Regex > =
433
- LazyLock :: new ( || Regex :: new ( r"^([^\]\n]+)]" ) . unwrap ( ) ) ;
434
454
if !self . take_str ( "[^" ) {
435
455
return Ok ( None ) ;
436
456
}
437
- match self . take_re ( & FOOTNOTE_RE ) {
438
- Some ( cap ) => Ok ( Some ( cap [ 1 ] . to_string ( ) ) ) ,
439
- None => bail ! ( self , "unterminated footnote, expected closing `]`" ) ,
457
+ let id = self . take_while ( & |x| ! [ '\n' , ']' ] . contains ( & x ) ) . to_string ( ) ;
458
+ if id . is_empty ( ) {
459
+ bail ! ( self , "expected footnote id" ) ;
440
460
}
461
+ self . expect ( "]" , "expected closing `]`" ) ?;
462
+ Ok ( Some ( id) )
441
463
}
442
464
}
443
465
0 commit comments