From b02ae8529e5d93a993d0ccbe387e9c72492ae0df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 17 Feb 2022 10:23:24 +0300 Subject: [PATCH 1/4] WIP --- src/ast.rs | 22 +--------- src/codegen.rs | 80 ++++++++-------------------------- src/first.rs | 8 ++-- src/grammar.rs | 109 ++++++++++++++++++---------------------------- src/lower.rs | 29 ++---------- src/lr1.rs | 55 ++++++++++------------- src/lr_codegen.rs | 68 ++++++++++++++++------------- src/lr_common.rs | 64 ++++++++++++--------------- 8 files changed, 159 insertions(+), 276 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 44218a8..1428465 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -93,7 +93,6 @@ pub struct NonTerminal { #[derive(Debug)] pub struct Production { pub symbols: Vec, - pub action: Action, } #[derive(Debug)] @@ -131,12 +130,6 @@ pub enum RepeatOp { Question, } -#[derive(Debug)] -pub enum Action { - User(syn::Expr), - Fallible(syn::Expr), -} - impl Parse for FieldPattern { fn parse(input: &ParseBuffer) -> syn::Result { let field_name = input.parse::()?; @@ -328,8 +321,7 @@ impl Parse for Production { while !input.peek(syn::token::FatArrow) { symbols.push(input.parse::()?); } - let action = input.parse::()?; - Ok(Production { symbols, action }) + Ok(Production { symbols }) } } @@ -383,18 +375,6 @@ fn symbol0(input: &ParseBuffer) -> syn::Result { // } } -impl Parse for Action { - fn parse(input: &ParseBuffer) -> syn::Result { - input.parse::()?; - if input.peek(syn::token::Question) { - input.parse::()?; - Ok(Action::Fallible(input.parse::()?)) - } else { - Ok(Action::User(input.parse::()?)) - } - } -} - impl Parse for GrammarItem { fn parse(input: &ParseBuffer) -> syn::Result { if input.peek(syn::token::Type) { diff --git a/src/codegen.rs b/src/codegen.rs index cf90084..9ecd792 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -1,5 +1,5 @@ -use crate::ast::{Conversion, FieldPattern, Name, Pattern, TokenEnum}; -use crate::grammar::{Grammar, NonTerminal, Production, Symbol, SymbolKind}; +use crate::ast::{Conversion, FieldPattern, Pattern, TokenEnum}; +use crate::grammar::{Grammar, NonTerminal, Production}; use fxhash::FxHashMap; use proc_macro2::{Span, TokenStream}; @@ -43,19 +43,9 @@ pub fn generate_token_kind_type(tokens: &TokenEnum) -> (syn::Ident, TokenStream) #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct SemanticActionIdx(u16); -impl SemanticActionIdx { - pub fn as_u16(&self) -> u16 { - self.0 - } -} - /// Generates semantic action functions, semantic action table (array of semantic action functions) /// and replaces semantic actions in the grammar with their indices in the array. -pub fn generate_semantic_action_table( - grammar: Grammar, - non_terminal_action_variant_name: &[usize], - token_lifetimes: &[syn::Lifetime], -) -> (Vec, Grammar) { +pub fn generate_semantic_action_table(grammar: Grammar) -> (Vec, Grammar) { let Grammar { non_terminals, terminals, @@ -65,7 +55,7 @@ pub fn generate_semantic_action_table( let mut decls: Vec = vec![]; let mut fn_names: Vec = vec![]; - let non_terminals: Vec> = non_terminals + let non_terminals: Vec = non_terminals .into_iter() .enumerate() .map( @@ -78,67 +68,35 @@ pub fn generate_semantic_action_table( public, }, )| { - let productions: Vec> = productions + let productions: Vec = productions .into_iter() .enumerate() - .map(|(p_i, Production { symbols, action })| { + .map(|(p_i, Production { symbols })| { // Statements to pop the values off the value stack and bind them, for the // pruduction's RHS - let mut pop_code: Vec = vec![]; - - for Symbol { binder, kind } in symbols.iter().rev() { - match binder { - None => { - pop_code.push(quote!(value_stack.pop())); - } - Some(Name { - mutable, - name, - }) => { - let mut_ = if *mutable { quote!(mut) } else { quote!() }; - let extract_method = match kind { - SymbolKind::NonTerminal(nt_idx) => syn::Ident::new( - &format!("non_terminal_{}", non_terminal_action_variant_name[nt_idx.as_usize()]), - Span::call_site() - ), - SymbolKind::Terminal(terminal_idx) => syn::Ident::new( - &format!("token_{}", terminal_idx.as_usize()), - Span::call_site(), - ), - }; - pop_code.push(quote!( - let #mut_ #name = value_stack.pop().unwrap().#extract_method() - )); - } - } - } + let n_pops = symbols.len(); let fn_name = syn::Ident::new( &format!("nt{}p{}_action", nt_i, p_i), Span::call_site(), ); - let fn_idx = decls.len(); - - let non_terminal_variant = syn::Ident::new( - &format!("NonTerminal{}", non_terminal_action_variant_name[nt_i]), - Span::call_site() - ); decls.push(quote!( - fn #fn_name<#(#token_lifetimes),*>( - value_stack: &mut Vec> - ) { - #(#pop_code;)* - value_stack.push(SemanticActionResult::#non_terminal_variant(#action)); + fn #fn_name(value_stack: &mut Vec) { + let children: Vec = + value_stack.drain(value_stack.len() - #n_pops..).collect(); + + value_stack.push(Node { + kind: Kind::NonTerminal(#nt_i), + span: (0, 0), + children, + }); } )); fn_names.push(fn_name); - Production { - symbols, - action: SemanticActionIdx(u16::try_from(fn_idx).unwrap()), - } + Production { symbols } }) .collect(); @@ -295,8 +253,8 @@ fn pattern_ignore(pattern: &Pattern) -> TokenStream { /// each token with value. Used in the value stack for terminals and non-terminals. /// /// The `Vec` maps `NonTerminalIdx`s to their value extraction method names. -pub fn semantic_action_result_type( - grammar: &Grammar, +pub fn semantic_action_result_type( + grammar: &Grammar, tokens: &[Conversion], token_lifetimes: &[syn::Lifetime], ) -> (syn::Ident, TokenStream, Vec) { diff --git a/src/first.rs b/src/first.rs index 7e42d9d..5bc2793 100644 --- a/src/first.rs +++ b/src/first.rs @@ -70,7 +70,7 @@ impl FirstTable { } } -pub fn generate_first_table(grammar: &Grammar) -> FirstTable { +pub fn generate_first_table(grammar: &Grammar) -> FirstTable { let mut table: FirstTable = FirstTable::new(grammar.non_terminals().len()); let mut updated = true; @@ -113,12 +113,12 @@ pub fn generate_first_table(grammar: &Grammar) -> FirstTable { use std::fmt; -pub struct FirstSetDisplay<'a, 'b, A> { +pub struct FirstSetDisplay<'a, 'b> { pub set: &'a FirstSet, - pub grammar: &'b Grammar, + pub grammar: &'b Grammar, } -impl<'a, 'b, A> fmt::Display for FirstSetDisplay<'a, 'b, A> { +impl<'a, 'b> fmt::Display for FirstSetDisplay<'a, 'b> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{{")?; for (t_idx, t) in self.set.terminals.iter().enumerate() { diff --git a/src/grammar.rs b/src/grammar.rs index 7181651..6d046eb 100644 --- a/src/grammar.rs +++ b/src/grammar.rs @@ -4,9 +4,9 @@ use crate::ast; /// Grammar type parameterized over terminals and user actions. #[derive(Debug, Clone)] -pub struct Grammar { +pub struct Grammar { // Non-terminals, indexed by `NonTerminalIdx` - pub non_terminals: Vec>, + pub non_terminals: Vec, // Maps terminals to their user-written names (in `enum Token { ... }`). The strings are only // used for debugging purposes, but we use the length of this vector to generate @@ -30,12 +30,6 @@ impl NonTerminalIdx { #[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Hash)] pub struct TerminalIdx(u32); -impl TerminalIdx { - pub fn as_usize(self) -> usize { - self.0 as usize - } -} - #[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Hash)] pub struct ProductionIdx(pub u32); @@ -51,27 +45,17 @@ impl ProductionIdx { } #[derive(Debug, Clone)] -pub struct NonTerminal { +pub struct NonTerminal { pub non_terminal: String, // Indexed by `ProductionIdx` - pub productions: Vec>, + pub productions: Vec, pub return_ty: syn::Type, pub public: bool, } -#[derive(Clone)] -pub struct Production { +#[derive(Debug, Clone)] +pub struct Production { pub symbols: Vec, - pub action: A, -} - -impl std::fmt::Debug for Production { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Production") - .field("symbols", &self.symbols) - .field("action", &"...") - .finish() - } } #[derive(Debug, Clone)] @@ -86,7 +70,7 @@ pub enum SymbolKind { Terminal(TerminalIdx), } -impl Grammar { +impl Grammar { pub fn new() -> Self { Grammar { non_terminals: vec![], @@ -116,7 +100,7 @@ impl Grammar { idx } - pub fn get_non_terminal(&self, idx: NonTerminalIdx) -> &NonTerminal { + pub fn get_non_terminal(&self, idx: NonTerminalIdx) -> &NonTerminal { &self.non_terminals[idx.0 as usize] } @@ -152,67 +136,60 @@ impl Grammar { &mut self, non_terminal: NonTerminalIdx, symbols: Vec, - action: A, ) -> ProductionIdx { let non_terminal = &mut self.non_terminals[non_terminal.0 as usize]; let prod_idx = non_terminal.productions.len(); - non_terminal - .productions - .push(Production { symbols, action }); + non_terminal.productions.push(Production { symbols }); ProductionIdx(prod_idx as u32) } - pub fn get_production( - &self, - nt_idx: NonTerminalIdx, - prod_idx: ProductionIdx, - ) -> &Production { + pub fn get_production(&self, nt_idx: NonTerminalIdx, prod_idx: ProductionIdx) -> &Production { &self.non_terminals[nt_idx.0 as usize].productions[prod_idx.0 as usize] } } -impl NonTerminal { - pub fn productions(&self) -> &[Production] { +impl NonTerminal { + pub fn productions(&self) -> &[Production] { &self.productions } - pub fn production_indices(&self) -> impl Iterator)> { + pub fn production_indices(&self) -> impl Iterator { self.productions .iter() .enumerate() .map(|(i, p)| (ProductionIdx(i as u32), p)) } - pub fn get_production(&self, production_idx: ProductionIdx) -> &Production { + pub fn get_production(&self, production_idx: ProductionIdx) -> &Production { &self.productions[production_idx.0 as usize] } } -impl Production { +impl Production { pub fn symbols(&self) -> &[Symbol] { &self.symbols } } -struct ProductionIndicesIter<'grammar, A> { - grammar: &'grammar Grammar, +struct ProductionIndicesIter<'grammar> { + grammar: &'grammar Grammar, non_terminal_idx: NonTerminalIdx, production_idx: ProductionIdx, } -struct NonTerminalIndicesIter<'grammar, A> { - grammar: &'grammar Grammar, +struct NonTerminalIndicesIter<'grammar> { + grammar: &'grammar Grammar, non_terminal_idx: NonTerminalIdx, } -struct NonTerminalProductionIndicesIter<'grammar, A> { - grammar: &'grammar Grammar, +struct NonTerminalProductionIndicesIter<'grammar> { + grammar: &'grammar Grammar, non_terminal_idx: NonTerminalIdx, production_idx: ProductionIdx, } -impl<'grammar, A> Iterator for ProductionIndicesIter<'grammar, A> { - type Item = (NonTerminalIdx, ProductionIdx, &'grammar Production); +impl<'grammar> Iterator for ProductionIndicesIter<'grammar> { + type Item = (NonTerminalIdx, ProductionIdx, &'grammar Production); fn next(&mut self) -> Option { loop { @@ -240,8 +217,8 @@ impl<'grammar, A> Iterator for ProductionIndicesIter<'grammar, A> { } } -impl<'grammar, A> Iterator for NonTerminalIndicesIter<'grammar, A> { - type Item = (NonTerminalIdx, &'grammar NonTerminal); +impl<'grammar> Iterator for NonTerminalIndicesIter<'grammar> { + type Item = (NonTerminalIdx, &'grammar NonTerminal); fn next(&mut self) -> Option { self.grammar @@ -255,8 +232,8 @@ impl<'grammar, A> Iterator for NonTerminalIndicesIter<'grammar, A> { } } -impl<'grammar, A> Iterator for NonTerminalProductionIndicesIter<'grammar, A> { - type Item = (ProductionIdx, &'grammar Production); +impl<'grammar> Iterator for NonTerminalProductionIndicesIter<'grammar> { + type Item = (ProductionIdx, &'grammar Production); fn next(&mut self) -> Option { let production_idx = self.production_idx; @@ -270,12 +247,12 @@ impl<'grammar, A> Iterator for NonTerminalProductionIndicesIter<'grammar, A> { } } -impl Grammar { - pub fn non_terminals(&self) -> &[NonTerminal] { +impl Grammar { + pub fn non_terminals(&self) -> &[NonTerminal] { &self.non_terminals } - pub fn non_terminal_indices(&self) -> impl Iterator)> { + pub fn non_terminal_indices(&self) -> impl Iterator { NonTerminalIndicesIter { grammar: self, non_terminal_idx: NonTerminalIdx(0), @@ -285,7 +262,7 @@ impl Grammar { pub fn non_terminal_production_indices( &self, non_terminal: NonTerminalIdx, - ) -> impl Iterator)> { + ) -> impl Iterator { NonTerminalProductionIndicesIter { grammar: self, non_terminal_idx: non_terminal, @@ -298,24 +275,24 @@ impl Grammar { } } -pub struct SymbolKindDisplay<'a, 'b, A> { +pub struct SymbolKindDisplay<'a, 'b> { symbol: &'a SymbolKind, - grammar: &'b Grammar, + grammar: &'b Grammar, } -impl<'a, 'b, A> SymbolKindDisplay<'a, 'b, A> { - pub fn new(symbol: &'a SymbolKind, grammar: &'b Grammar) -> Self { +impl<'a, 'b> SymbolKindDisplay<'a, 'b> { + pub fn new(symbol: &'a SymbolKind, grammar: &'b Grammar) -> Self { Self { symbol, grammar } } } -pub struct ProductionDisplay<'a, 'b, A> { - production: &'a Production, - grammar: &'b Grammar, +pub struct ProductionDisplay<'a, 'b> { + production: &'a Production, + grammar: &'b Grammar, } -impl<'a, 'b, A> ProductionDisplay<'a, 'b, A> { - pub fn new(production: &'a Production, grammar: &'b Grammar) -> Self { +impl<'a, 'b> ProductionDisplay<'a, 'b> { + pub fn new(production: &'a Production, grammar: &'b Grammar) -> Self { Self { production, grammar, @@ -325,7 +302,7 @@ impl<'a, 'b, A> ProductionDisplay<'a, 'b, A> { use std::fmt; -impl fmt::Display for Grammar { +impl fmt::Display for Grammar { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for (nt_idx, nt) in self.non_terminals.iter().enumerate() { writeln!( @@ -355,7 +332,7 @@ impl fmt::Display for Grammar { } } -impl<'a, 'b, A> fmt::Display for ProductionDisplay<'a, 'b, A> { +impl<'a, 'b> fmt::Display for ProductionDisplay<'a, 'b> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for (symbol_idx, symbol) in self.production.symbols().iter().enumerate() { match &symbol.kind { @@ -375,7 +352,7 @@ impl<'a, 'b, A> fmt::Display for ProductionDisplay<'a, 'b, A> { } } -impl<'a, 'b, A> fmt::Display for SymbolKindDisplay<'a, 'b, A> { +impl<'a, 'b> fmt::Display for SymbolKindDisplay<'a, 'b> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.symbol { SymbolKind::NonTerminal(nt) => { diff --git a/src/lower.rs b/src/lower.rs index e978efd..a66921b 100644 --- a/src/lower.rs +++ b/src/lower.rs @@ -1,15 +1,10 @@ use crate::ast; use crate::grammar::{Grammar, NonTerminalIdx, Symbol, SymbolKind, TerminalIdx}; -use std::iter::FromIterator; - use fxhash::FxHashMap; use proc_macro2::Span; -pub fn lower( - non_terminals: Vec, - terminals: &[ast::Conversion], -) -> Grammar { +pub fn lower(non_terminals: Vec, terminals: &[ast::Conversion]) -> Grammar { let mut grammar = Grammar::new(); let mut nt_indices: FxHashMap = Default::default(); @@ -55,17 +50,6 @@ pub fn lower( }), kind: SymbolKind::NonTerminal(nt1_idx), }], - syn::Expr::Path(syn::ExprPath { - attrs: vec![], - qself: None, - path: syn::Path { - leading_colon: None, - segments: syn::punctuated::Punctuated::from_iter(vec![syn::PathSegment { - ident: binder_ident, - arguments: syn::PathArguments::None, - }]), - }, - }), ); } else { let nt_name = nt.name.to_string(); @@ -92,20 +76,15 @@ pub fn lower( } let nt_idx = nt_indices.get(&name.to_string()).unwrap(); - let action = match prod.action { - ast::Action::User(expr) => expr, - ast::Action::Fallible(_) => todo!("Fallible actions not supported yet"), - }; - - grammar.add_production(*nt_idx, symbols, action); + grammar.add_production(*nt_idx, symbols); } } grammar } -fn add_symbol( - grammar: &mut Grammar, +fn add_symbol( + grammar: &mut Grammar, nt_indices: &FxHashMap, t_indices: &FxHashMap, symbols: &mut Vec, diff --git a/src/lr1.rs b/src/lr1.rs index a976311..9679e24 100644 --- a/src/lr1.rs +++ b/src/lr1.rs @@ -32,17 +32,14 @@ impl LR1Item { } } - fn next_symbol<'grammar, A>( - &self, - grammar: &'grammar Grammar, - ) -> Option<&'grammar SymbolKind> { + fn next_symbol<'grammar>(&self, grammar: &'grammar Grammar) -> Option<&'grammar SymbolKind> { let production = grammar.get_production(self.non_terminal_idx, self.production_idx); production.symbols().get(self.cursor).map(|s| &s.kind) } /// Returns non-terminal expected by the item, if the next expected symbol is a non-terminal. /// Otherwise returns `None`. - fn next_non_terminal(&self, grammar: &Grammar) -> Option { + fn next_non_terminal(&self, grammar: &Grammar) -> Option { match self.next_symbol(grammar) { Some(SymbolKind::NonTerminal(nt_idx)) => Some(*nt_idx), _ => None, @@ -51,17 +48,14 @@ impl LR1Item { /// Returns terminal expected by the item, if the next expected symbol is a terminal. Otherwise /// returns `None`. - fn next_terminal(&self, grammar: &Grammar) -> Option { + fn next_terminal(&self, grammar: &Grammar) -> Option { match self.next_symbol(grammar) { Some(SymbolKind::Terminal(t)) => Some(*t), _ => None, } } - fn get_production<'grammar, A>( - &self, - grammar: &'grammar Grammar, - ) -> &'grammar Production { + fn get_production<'grammar>(&self, grammar: &'grammar Grammar) -> &'grammar Production { grammar.get_production(self.non_terminal_idx, self.production_idx) } @@ -71,14 +65,14 @@ impl LR1Item { item } - fn is_complete(&self, grammar: &Grammar) -> bool { + fn is_complete(&self, grammar: &Grammar) -> bool { let production = self.get_production(grammar); self.cursor == production.symbols().len() } } -fn compute_lr1_closure( - grammar: &Grammar, +fn compute_lr1_closure( + grammar: &Grammar, first_table: &FirstTable, items: FxHashSet, ) -> BTreeSet { @@ -180,10 +174,10 @@ fn compute_lr1_closure( closure.into_iter().collect() } -fn compute_lr1_goto( +fn compute_lr1_goto( state: &BTreeSet, symbol: &SymbolKind, - grammar: &Grammar, + grammar: &Grammar, first: &FirstTable, ) -> BTreeSet { let mut goto: FxHashSet = Default::default(); @@ -256,8 +250,8 @@ impl Default for LR1Automaton { } } -pub fn generate_lr1_automaton( - grammar: &Grammar, +pub fn generate_lr1_automaton( + grammar: &Grammar, first_table: &FirstTable, ) -> (LR1Automaton, FxHashMap) { // Maps existing item sets to their state indices, to maintain sharing. @@ -363,11 +357,8 @@ pub fn generate_lr1_automaton( (automaton, non_terminal_state_indices) } -pub fn build_lr1_table( - grammar: &Grammar, - automaton: &LR1Automaton, -) -> LRTable { - let mut table: LRTableBuilder = LRTableBuilder::new(automaton.states.len()); +pub fn build_lr1_table(grammar: &Grammar, automaton: &LR1Automaton) -> LRTable { + let mut table = LRTableBuilder::new(automaton.states.len()); for (state_idx, state) in automaton.state_indices() { for item in state.items() { @@ -382,14 +373,12 @@ pub fn build_lr1_table( // Rule 2.b if item.is_complete(grammar) && !non_terminal.public { - let production = grammar.get_production(item.non_terminal_idx, item.production_idx); table.add_reduce( grammar, state_idx, item.lookahead, item.non_terminal_idx, item.production_idx, - production.action.clone(), ); } @@ -418,22 +407,22 @@ use crate::lr_common::LRTableDisplay; use std::fmt; -pub struct LR1AutomatonDisplay<'a, 'b, A> { +pub struct LR1AutomatonDisplay<'a, 'b> { pub automaton: &'a LR1Automaton, - pub grammar: &'b Grammar, + pub grammar: &'b Grammar, } -struct LR1StateDisplay<'a, 'b, A> { +struct LR1StateDisplay<'a, 'b> { state: &'a LR1State, - grammar: &'b Grammar, + grammar: &'b Grammar, } -struct LR1ItemDisplay<'a, 'b, A> { +struct LR1ItemDisplay<'a, 'b> { item: &'a LR1Item, - grammar: &'b Grammar, + grammar: &'b Grammar, } -impl<'a, 'b, A> fmt::Display for LR1ItemDisplay<'a, 'b, A> { +impl<'a, 'b> fmt::Display for LR1ItemDisplay<'a, 'b> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let non_terminal = self.grammar.get_non_terminal(self.item.non_terminal_idx); let production = non_terminal.get_production(self.item.production_idx); @@ -466,7 +455,7 @@ impl<'a, 'b, A> fmt::Display for LR1ItemDisplay<'a, 'b, A> { } } -impl<'a, 'b, A> fmt::Display for LR1StateDisplay<'a, 'b, A> { +impl<'a, 'b> fmt::Display for LR1StateDisplay<'a, 'b> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for item in &self.state.items { writeln!( @@ -492,7 +481,7 @@ impl<'a, 'b, A> fmt::Display for LR1StateDisplay<'a, 'b, A> { } } -impl<'a, 'b, A> fmt::Display for LR1AutomatonDisplay<'a, 'b, A> { +impl<'a, 'b> fmt::Display for LR1AutomatonDisplay<'a, 'b> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for (state_idx, state) in self.automaton.state_indices() { writeln!(f, "{}: {{", state_idx.0)?; diff --git a/src/lr_codegen.rs b/src/lr_codegen.rs index 5283948..c7c939a 100644 --- a/src/lr_codegen.rs +++ b/src/lr_codegen.rs @@ -1,10 +1,10 @@ use crate::ast::TokenEnum; use crate::codegen::{ generate_semantic_action_table, generate_token_kind_type, semantic_action_result_type, - token_value_fn, SemanticActionIdx, + token_value_fn, }; use crate::first::generate_first_table; -use crate::grammar::{Grammar, NonTerminalIdx, ProductionIdx, TerminalIdx}; +use crate::grammar::{Grammar, NonTerminalIdx, TerminalIdx}; use crate::lr1::{build_lr1_table, generate_lr1_automaton}; use crate::lr_common::{LRAction, StateIdx}; @@ -12,7 +12,7 @@ use fxhash::FxHashMap; use proc_macro2::{Span, TokenStream}; use quote::quote; -pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream { +pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream { let (token_kind_type_name, token_kind_type_decl) = generate_token_kind_type(tokens); let n_terminals = grammar.n_terminals(); @@ -31,11 +31,7 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> T // Generate semantic action table, replace semantic actions in the grammar with their indices // in the table - let (semantic_action_table, grammar) = generate_semantic_action_table( - grammar, - &non_terminal_action_variant_name, - &tokens.type_lifetimes, - ); + let (semantic_action_table, grammar) = generate_semantic_action_table(grammar); // println!( // "{}", @@ -83,19 +79,14 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> T .map(|(non_terminal_idx, parser_state)| { let parser_state = u32::try_from(parser_state.as_usize()).unwrap(); let non_terminal = grammar.get_non_terminal(non_terminal_idx); + let non_terminal_idx = non_terminal_idx.as_usize(); let non_terminal_name_id = syn::Ident::new(&non_terminal.non_terminal, Span::call_site()); let non_terminal_return_type = &non_terminal.return_ty; - let action_idx = usize::from( - grammar - .get_production(non_terminal_idx, ProductionIdx(0)) - .action - .as_u16(), - ); let extract_method_id = syn::Ident::new( &format!( "non_terminal_{}", - non_terminal_action_variant_name[non_terminal_idx.as_usize()] + non_terminal_action_variant_name[non_terminal_idx] ), Span::call_site(), ); @@ -112,7 +103,7 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> T input, SemanticActionResult::#extract_method_id, #parser_state, - #action_idx, + #non_terminal_idx, ) } } @@ -128,12 +119,25 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> T }, Reduce { non_terminal_idx: u16, + production_idx: u16, n_symbols: u16, - semantic_action_idx: u16, }, Accept, } + #[derive(Debug)] + enum Kind { + NonTerminal(usize), + Terminal(usize), + } + + #[derive(Debug)] + struct Node { + kind: Kind, + span: (usize, usize), + children: Vec, + } + // static ACTION: [[Option; ...]; ...] #action_array_code @@ -165,7 +169,7 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> T mut input: impl Iterator, E>>, extract_value: fn(SemanticActionResult<#(#token_lifetimes),*>) -> R, init_state: u32, - action_idx: usize, + non_terminal_idx: usize, ) -> Result> { let mut state_stack: Vec = vec![init_state]; @@ -216,19 +220,19 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> T } /// Generates array representation of the action table. Reminder: EOF = last terminal. -fn action_table_vec( - grammar: &Grammar, - action_table: &FxHashMap, LRAction>>, +fn action_table_vec( + grammar: &Grammar, + action_table: &FxHashMap, LRAction>>, n_states: usize, -) -> Vec>>> { +) -> Vec>> { let n_terminals = grammar.n_terminals(); - let mut state_to_terminal_to_action: Vec>>> = + let mut state_to_terminal_to_action: Vec>> = Vec::with_capacity(n_terminals); for state in 0..n_states { let state_idx = StateIdx(state); // +1 for EOF - let mut terminal_to_action: Vec>> = Vec::with_capacity(n_terminals + 1); + let mut terminal_to_action: Vec> = Vec::with_capacity(n_terminals + 1); for terminal in grammar.terminal_indices() { terminal_to_action.push( action_table @@ -274,9 +278,9 @@ fn generate_goto_vec( state_to_non_terminal_to_state } -fn generate_action_array( - grammar: &Grammar, - table: &[Vec>>], +fn generate_action_array( + grammar: &Grammar, + table: &[Vec>], n_states: usize, n_terminals: usize, ) -> TokenStream { @@ -296,7 +300,7 @@ fn generate_action_array( let next_state: u32 = u32::try_from(next_state.as_usize()).unwrap(); quote!(LRAction::Shift { next_state: #next_state }) } - LRAction::Reduce(non_terminal_idx, production_idx, semantic_action_idx) => { + LRAction::Reduce(non_terminal_idx, production_idx) => { let n_symbols: u16 = u16::try_from( grammar .get_production(*non_terminal_idx, *production_idx) @@ -304,13 +308,17 @@ fn generate_action_array( .len(), ) .unwrap(); + let non_terminal_idx: u16 = u16::try_from(non_terminal_idx.as_usize()).unwrap(); - let semantic_action_idx = semantic_action_idx.as_u16(); + + let production_idx: u16 = + u16::try_from(production_idx.as_usize()).unwrap(); + quote!(LRAction::Reduce { non_terminal_idx: #non_terminal_idx, + production_idx: #production_idx, n_symbols: #n_symbols, - semantic_action_idx: #semantic_action_idx, }) } LRAction::Accept => quote!(LRAction::Accept), diff --git a/src/lr_common.rs b/src/lr_common.rs index 0f05886..5f12337 100644 --- a/src/lr_common.rs +++ b/src/lr_common.rs @@ -12,28 +12,28 @@ impl StateIdx { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum LRAction { +pub enum LRAction { /// Shift current terminal, switch to given state Shift(StateIdx), /// Reduce using the given production - Reduce(NonTerminalIdx, ProductionIdx, A), + Reduce(NonTerminalIdx, ProductionIdx), /// Accept the input Accept, } -pub struct LRTable { - action: FxHashMap, LRAction>>, +pub struct LRTable { + action: FxHashMap, LRAction>>, goto: FxHashMap>, n_states: usize, } -pub struct LRTableBuilder { - table: LRTable, +pub struct LRTableBuilder { + table: LRTable, } -impl LRTableBuilder { +impl LRTableBuilder { pub fn new(n_states: usize) -> Self { Self { table: LRTable { @@ -44,13 +44,13 @@ impl LRTableBuilder { } } - pub fn build(self) -> LRTable { + pub fn build(self) -> LRTable { self.table } pub fn add_shift( &mut self, - grammar: &Grammar, + grammar: &Grammar, state: StateIdx, token: TerminalIdx, next_state: StateIdx, @@ -72,7 +72,7 @@ impl LRTableBuilder { ); } } - LRAction::Reduce(_nt_, _p_, _) => { + LRAction::Reduce(_nt_, _p_) => { // TODO: Allowing overriding reduce actions for shift for now // panic!( // "({}, {:?}): Overriding Reduce({}, {}) action with Shift({})", @@ -97,19 +97,15 @@ impl LRTableBuilder { pub fn add_reduce( &mut self, - grammar: &Grammar, + grammar: &Grammar, state: StateIdx, token: Option, non_terminal_idx: NonTerminalIdx, production_idx: ProductionIdx, - semantic_action: A, ) { let action = self.table.action.entry(state).or_default(); - let old_action = action.insert( - token, - LRAction::Reduce(non_terminal_idx, production_idx, semantic_action), - ); + let old_action = action.insert(token, LRAction::Reduce(non_terminal_idx, production_idx)); if let Some(old_action) = old_action { match old_action { @@ -123,7 +119,7 @@ impl LRTableBuilder { production_idx.0 ) } - LRAction::Reduce(nt_, p_, _) => { + LRAction::Reduce(nt_, p_) => { panic!( "({}, {:?}): Overriding Reduce({}, {}) action with Reduce({}, {})", state.0, @@ -169,13 +165,13 @@ impl LRTableBuilder { } } -impl LRTable { +impl LRTable { #[cfg(test)] pub fn get_action( &self, state: StateIdx, non_terminal: Option, - ) -> Option<&LRAction> { + ) -> Option<&LRAction> { self.action .get(&state) .and_then(|action| action.get(&non_terminal)) @@ -191,7 +187,7 @@ impl LRTable { pub fn get_action_table( &self, - ) -> &FxHashMap, LRAction>> { + ) -> &FxHashMap, LRAction>> { &self.action } @@ -205,11 +201,7 @@ impl LRTable { } #[cfg(test)] -pub fn simulate( - table: &LRTable, - grammar: &Grammar, - mut input: impl Iterator, -) { +pub fn simulate(table: &LRTable, grammar: &Grammar, mut input: impl Iterator) { let mut stack: Vec = vec![StateIdx(0)]; let mut a = input.next(); @@ -259,28 +251,28 @@ pub fn simulate( ); } -pub struct LRTableDisplay<'a, 'b, A> { - table: &'a LRTable, - grammar: &'b Grammar, +pub struct LRTableDisplay<'a, 'b> { + table: &'a LRTable, + grammar: &'b Grammar, } -impl<'a, 'b, A> LRTableDisplay<'a, 'b, A> { +impl<'a, 'b> LRTableDisplay<'a, 'b> { #[cfg(test)] - pub fn new(table: &'a LRTable, grammar: &'b Grammar) -> Self { + pub fn new(table: &'a LRTable, grammar: &'b Grammar) -> Self { Self { table, grammar } } } -pub struct LRActionDisplay<'a, 'b, A> { - action: &'a LRAction, - grammar: &'b Grammar, +pub struct LRActionDisplay<'a, 'b> { + action: &'a LRAction, + grammar: &'b Grammar, } use crate::grammar::ProductionDisplay; use std::fmt; -impl<'a, 'b, A> fmt::Display for LRTableDisplay<'a, 'b, A> { +impl<'a, 'b> fmt::Display for LRTableDisplay<'a, 'b> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for state_idx in 0..self.table.n_states() { writeln!(f, "{}: {{", state_idx)?; @@ -321,11 +313,11 @@ impl<'a, 'b, A> fmt::Display for LRTableDisplay<'a, 'b, A> { } } -impl<'a, 'b, A> fmt::Display for LRActionDisplay<'a, 'b, A> { +impl<'a, 'b> fmt::Display for LRActionDisplay<'a, 'b> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self.action { LRAction::Shift(next) => write!(f, "Shift {}", next.0), - LRAction::Reduce(nt, p, _) => { + LRAction::Reduce(nt, p) => { let p_ = self.grammar.get_production(*nt, *p); let nt_ = self.grammar.get_non_terminal(*nt); write!( From 2382b96d0cbb14ecb45b1dad3ddd2ab0ae552364 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 17 Feb 2022 11:35:12 +0300 Subject: [PATCH 2/4] Fix parser --- src/ast.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast.rs b/src/ast.rs index 1428465..de9b8c7 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -318,7 +318,7 @@ impl Parse for NonTerminal { impl Parse for Production { fn parse(input: &ParseBuffer) -> syn::Result { let mut symbols: Vec = vec![]; - while !input.peek(syn::token::FatArrow) { + while !input.peek(syn::token::Comma) { symbols.push(input.parse::()?); } Ok(Production { symbols }) From f9a4f62a96b965a4a13c54e7ef5d77a04e391c55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 17 Feb 2022 12:13:35 +0300 Subject: [PATCH 3/4] WIP --- src/lr_codegen.rs | 99 ++++++++++++++++------------------------------- 1 file changed, 34 insertions(+), 65 deletions(-) diff --git a/src/lr_codegen.rs b/src/lr_codegen.rs index c7c939a..1a79491 100644 --- a/src/lr_codegen.rs +++ b/src/lr_codegen.rs @@ -1,8 +1,5 @@ use crate::ast::TokenEnum; -use crate::codegen::{ - generate_semantic_action_table, generate_token_kind_type, semantic_action_result_type, - token_value_fn, -}; +use crate::codegen::generate_token_kind_type; use crate::first::generate_first_table; use crate::grammar::{Grammar, NonTerminalIdx, TerminalIdx}; use crate::lr1::{build_lr1_table, generate_lr1_automaton}; @@ -23,16 +20,6 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream let token_lifetimes = &tokens.type_lifetimes; - let ( - semantic_action_result_type_name, - semantic_action_result_type_decl, - non_terminal_action_variant_name, - ) = semantic_action_result_type(&grammar, &tokens.conversions, token_lifetimes); - - // Generate semantic action table, replace semantic actions in the grammar with their indices - // in the table - let (semantic_action_table, grammar) = generate_semantic_action_table(grammar); - // println!( // "{}", // crate::lr1::LR1AutomatonDisplay { @@ -65,13 +52,6 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream let (token_kind_fn_name, token_kind_fn_decl) = crate::codegen::token_kind_fn(&token_kind_type_name, tokens); - let (token_value_fn_name, token_value_fn_decl) = token_value_fn( - &tokens.conversions, - &tokens.type_name, - &tokens.type_lifetimes, - &semantic_action_result_type_name, - ); - // struct NonTerminal; // impl NonTerminal { fn parse() { ... } } let parser_structs: Vec = nt_state_indices @@ -82,26 +62,17 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream let non_terminal_idx = non_terminal_idx.as_usize(); let non_terminal_name_id = syn::Ident::new(&non_terminal.non_terminal, Span::call_site()); - let non_terminal_return_type = &non_terminal.return_ty; - let extract_method_id = syn::Ident::new( - &format!( - "non_terminal_{}", - non_terminal_action_variant_name[non_terminal_idx] - ), - Span::call_site(), - ); quote!( pub struct #non_terminal_name_id; impl #non_terminal_name_id { - pub fn parse<#(#token_lifetimes,)* E: Clone>( + pub fn parse<#(#token_lifetimes,)* E: ::std::fmt::Debug + Clone>( mut input: impl Iterator, E>> - ) -> Result<#non_terminal_return_type, ParseError_> + ) -> Result> { parse_generic( input, - SemanticActionResult::#extract_method_id, #parser_state, #non_terminal_idx, ) @@ -126,14 +97,14 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream } #[derive(Debug)] - enum Kind { + enum Kind<#(#token_lifetimes,)*> { NonTerminal(usize), - Terminal(usize), + Terminal(#token_type<#(#token_lifetimes,)*>), } #[derive(Debug)] - struct Node { - kind: Kind, + pub struct Node<#(#token_lifetimes,)*> { + kind: Kind<#(#token_lifetimes,)*>, span: (usize, usize), children: Vec, } @@ -155,27 +126,17 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream // fn token_kind(token: &Token) -> TokenKind { ... } #token_kind_fn_decl - // fn token_value(token: &Token) -> SemanticActionResult { ... } - #token_value_fn_decl - - // enum SemanticActionResult { ... } + an impl for extracting fields - #semantic_action_result_type_decl - - // static SEMANTIC_ACTIONS: [fn(&mut Vec( - mut input: impl Iterator, E>>, - extract_value: fn(SemanticActionResult<#(#token_lifetimes),*>) -> R, + fn parse_generic<#(#token_lifetimes,)* E: ::std::fmt::Debug + Clone>( + mut input: impl Iterator, E>>, init_state: u32, non_terminal_idx: usize, - ) -> Result> + ) -> Result> { let mut state_stack: Vec = vec![init_state]; - let mut value_stack: Vec> = vec![]; + let mut value_stack: Vec = vec![]; - let mut token = input.next(); + let mut token: Option, E>> = + input.next(); loop { let state = *state_stack.last().unwrap() as usize; @@ -188,16 +149,28 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream None => panic!("Stuck! (1) state={}, terminal={}", state, terminal_idx), Some(LRAction::Shift { next_state }) => { state_stack.push(next_state); - if let Some(Ok(token)) = &token { - value_stack.push(#token_value_fn_name(token)); - } + let token_ = token.unwrap().unwrap(); + value_stack.push(Node { + kind: Kind::Terminal(token_), + span: (0, 0), + children: Vec::new(), + }); token = input.next(); } - Some(LRAction::Reduce { non_terminal_idx, n_symbols, semantic_action_idx }) => { - (SEMANTIC_ACTIONS[semantic_action_idx as usize])(&mut value_stack); - for _ in 0 .. n_symbols { - state_stack.pop().unwrap(); - } + Some(LRAction::Reduce { + non_terminal_idx, + production_idx, + n_symbols, + }) => { + let children: Vec = + value_stack.drain(value_stack.len() - (n_symbols as usize)..).collect(); + + value_stack.push(Node { + kind: Kind::NonTerminal(non_terminal_idx as usize), + span: (0, 0), + children, + }); + let state = *state_stack.last().unwrap() as usize; match GOTO[state][non_terminal_idx as usize] { None => panic!("Stuck! (2)"), @@ -208,11 +181,7 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream } } - // TODO: We could call the function directly here, instead of going through the - // table - SEMANTIC_ACTIONS[action_idx](&mut value_stack); - - Ok(extract_value(value_stack.pop().unwrap())) + Ok(value_stack.pop().unwrap()) } #(#parser_structs)* From 7c513602a88e6db5add322671deb6609c85823ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Thu, 17 Feb 2022 12:29:41 +0300 Subject: [PATCH 4/4] Remove unused stuff --- src/codegen.rs | 314 +--------------------------------------------- src/lr_codegen.rs | 11 +- src/lr_common.rs | 1 - 3 files changed, 5 insertions(+), 321 deletions(-) diff --git a/src/codegen.rs b/src/codegen.rs index 9ecd792..19babbd 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -1,8 +1,6 @@ use crate::ast::{Conversion, FieldPattern, Pattern, TokenEnum}; -use crate::grammar::{Grammar, NonTerminal, Production}; -use fxhash::FxHashMap; -use proc_macro2::{Span, TokenStream}; +use proc_macro2::TokenStream; use quote::quote; /// Generates an `enum #{token}Kind { T0, T1, ... }` type with a variant for each token described @@ -43,89 +41,6 @@ pub fn generate_token_kind_type(tokens: &TokenEnum) -> (syn::Ident, TokenStream) #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct SemanticActionIdx(u16); -/// Generates semantic action functions, semantic action table (array of semantic action functions) -/// and replaces semantic actions in the grammar with their indices in the array. -pub fn generate_semantic_action_table(grammar: Grammar) -> (Vec, Grammar) { - let Grammar { - non_terminals, - terminals, - } = grammar; - - // Action function declarations and the array - let mut decls: Vec = vec![]; - let mut fn_names: Vec = vec![]; - - let non_terminals: Vec = non_terminals - .into_iter() - .enumerate() - .map( - |( - nt_i, - NonTerminal { - non_terminal, - productions, - return_ty, - public, - }, - )| { - let productions: Vec = productions - .into_iter() - .enumerate() - .map(|(p_i, Production { symbols })| { - // Statements to pop the values off the value stack and bind them, for the - // pruduction's RHS - let n_pops = symbols.len(); - - let fn_name = syn::Ident::new( - &format!("nt{}p{}_action", nt_i, p_i), - Span::call_site(), - ); - - decls.push(quote!( - fn #fn_name(value_stack: &mut Vec) { - let children: Vec = - value_stack.drain(value_stack.len() - #n_pops..).collect(); - - value_stack.push(Node { - kind: Kind::NonTerminal(#nt_i), - span: (0, 0), - children, - }); - } - )); - - fn_names.push(fn_name); - - Production { symbols } - }) - .collect(); - - NonTerminal { - non_terminal, - productions, - return_ty, - public, - } - }, - ) - .collect(); - - let n_fns = fn_names.len(); - decls.push(quote!( - static SEMANTIC_ACTIONS: [fn(&mut Vec); #n_fns] = [ - #(#fn_names),* - ]; - )); - - ( - decls, - Grammar { - non_terminals, - terminals, - }, - ) -} - /// Generates a `fn token_kind(& #token_type) -> #token_kind_type` that returns kind of a token. pub fn token_kind_fn( token_kind_type_name: &syn::Ident, @@ -167,42 +82,6 @@ pub fn token_kind_fn( (fn_name, code) } -/// Generates a `fn token_value(& #token_type) -> ActionResult` function that returns the value of -/// a matched token. -pub fn token_value_fn( - tokens: &[Conversion], - user_token_type_name: &syn::Ident, - user_token_type_lifetimes: &[syn::Lifetime], - action_result_type_name: &syn::Ident, -) -> (syn::Ident, TokenStream) { - let fn_name = syn::Ident::new("token_value", Span::call_site()); - - let mut variants: Vec = vec![]; - for (i, Conversion { to, .. }) in tokens.iter().enumerate() { - let (pattern_code, pattern_idents) = generate_pattern_syn_with_idents(to); - let variant_id = syn::Ident::new(&format!("Token{}", i), Span::call_site()); - variants.push(quote!( - #pattern_code => { - // TODO: This clone needs to go - #action_result_type_name::#variant_id(#(#pattern_idents.clone()),*) - } - )); - } - - let code = quote!( - fn #fn_name<#(#user_token_type_lifetimes),*>( - token: &#user_token_type_name<#(#user_token_type_lifetimes),*> - ) -> #action_result_type_name<#(#user_token_type_lifetimes),*> - { - match token { - #(#variants)* - } - } - ); - - (fn_name, code) -} - /// Given a `Pattern`, generate the pattern syntax for it, using `_` for the "choose" argument. fn pattern_ignore(pattern: &Pattern) -> TokenStream { match pattern { @@ -248,194 +127,3 @@ fn pattern_ignore(pattern: &Pattern) -> TokenStream { Pattern::Choose(_) => quote!(_), } } - -/// Declares an `ActionResult` enum type with a variant for each non-terminal in the grammar and -/// each token with value. Used in the value stack for terminals and non-terminals. -/// -/// The `Vec` maps `NonTerminalIdx`s to their value extraction method names. -pub fn semantic_action_result_type( - grammar: &Grammar, - tokens: &[Conversion], - token_lifetimes: &[syn::Lifetime], -) -> (syn::Ident, TokenStream, Vec) { - let semantic_action_result_type_name = - syn::Ident::new("SemanticActionResult", Span::call_site()); - - let mut variants: Vec = vec![]; - - // Inherent methods for extracting fields of variants - let mut extraction_fns: Vec = vec![]; - - // Generate variants for tokens - // TODO: Do we need variants for tokens without values? If not, remove those. If yes, then use - // a single variant for all value-less tokens. - for (i, Conversion { to, .. }) in tokens.iter().enumerate() { - let variant_id = syn::Ident::new(&format!("Token{}", i), Span::call_site()); - let pattern_types = pattern_types(to); - variants.push(quote!(#variant_id(#(#pattern_types,)*))); - - let method_id = syn::Ident::new(&format!("token_{}", i), Span::call_site()); - let field_ids: Vec = pattern_types - .iter() - .enumerate() - .map(|(i, _)| syn::Ident::new(&format!("f{}", i), Span::call_site())) - .collect(); - - extraction_fns.push(quote!( - fn #method_id(self) -> (#(#pattern_types),*) { - match self { - Self::#variant_id(#(#field_ids),*) => (#(#field_ids),*), - _ => unreachable!(), - } - } - )); - } - - // Generate variants for non-terminals. Non-terminals with same type use the same enum variant. - let mut non_terminal_action_variant_names: Vec = vec![]; - - let mut non_terminal_ty_indices: FxHashMap = Default::default(); - let mut i: usize = 0; - - for non_terminal in grammar.non_terminals().iter() { - match non_terminal_ty_indices - .get(&non_terminal.return_ty) - .copied() - { - Some(i) => { - non_terminal_action_variant_names.push(i); - } - None => { - let variant_id = syn::Ident::new(&format!("NonTerminal{}", i), Span::call_site()); - - let pattern_ty = &non_terminal.return_ty; - variants.push(quote!(#variant_id(#pattern_ty))); - - let method_id = syn::Ident::new(&format!("non_terminal_{}", i), Span::call_site()); - extraction_fns.push(quote!( - fn #method_id(self) -> #pattern_ty { - match self { - Self::#variant_id(f) => f, - _ => unreachable!("{:?}", self), - } - } - )); - - non_terminal_action_variant_names.push(i); - - non_terminal_ty_indices.insert(non_terminal.return_ty.clone(), i); - i += 1; - } - } - } - - let code = quote!( - #[derive(Debug)] - enum #semantic_action_result_type_name<#(#token_lifetimes),*> { - #(#variants,)* - } - - impl<#(#token_lifetimes),*> #semantic_action_result_type_name<#(#token_lifetimes),*> { - #(#extraction_fns)* - } - ); - - ( - semantic_action_result_type_name, - code, - non_terminal_action_variant_names, - ) -} - -fn pattern_types(pat: &Pattern) -> Vec<&syn::Type> { - let mut ret = vec![]; - pattern_types_(pat, &mut ret); - ret -} - -fn pattern_types_<'a, 'b>(pat: &'a Pattern, acc: &'b mut Vec<&'a syn::Type>) { - match pat { - Pattern::Choose(ty) => { - acc.push(ty); - } - - Pattern::Enum(_, pats) | Pattern::Tuple(pats) => { - for pat in pats { - pattern_types_(pat, acc); - } - } - - Pattern::Struct(_, pats, _) => { - for FieldPattern { pattern, .. } in pats { - pattern_types_(pattern, acc); - } - } - - Pattern::Path(_) | Pattern::Underscore | Pattern::DotDot | Pattern::Lit(_) => {} - } -} - -/// Given a pattern, generate the code for it and return variables generated for the `Choose` nodes -fn generate_pattern_syn_with_idents(pat: &Pattern) -> (TokenStream, Vec) { - let mut idents = vec![]; - let code = generate_pattern_syn_with_idents_(pat, &mut idents); - (code, idents) -} - -fn generate_pattern_syn_with_idents_(pat: &Pattern, idents: &mut Vec) -> TokenStream { - match pat { - Pattern::Choose(_) => { - let ident = syn::Ident::new(&format!("f{}", idents.len()), Span::call_site()); - let code = quote!(#ident); - idents.push(ident); - code - } - - Pattern::Enum(path, pats) => { - let pats: Vec = pats - .iter() - .map(|pat| generate_pattern_syn_with_idents_(pat, idents)) - .collect(); - - quote!(#path(#(#pats),*)) - } - - Pattern::Struct(path, fields, dots) => { - let mut pats: Vec = fields - .iter() - .map( - |FieldPattern { - field_name, - pattern, - }| { - let pattern = generate_pattern_syn_with_idents_(pattern, idents); - quote!(#field_name: #pattern,) - }, - ) - .collect(); - - if *dots { - pats.push(quote!(..)); - } - - quote!(#path { #(#pats)* }) - } - - Pattern::Tuple(pats) => { - let pats: Vec = pats - .iter() - .map(|pat| generate_pattern_syn_with_idents_(pat, idents)) - .collect(); - - quote!((#(#pats),*)) - } - - Pattern::Path(path) => quote!(#path), - - Pattern::Underscore => quote!(_), - - Pattern::DotDot => quote!(..), - - Pattern::Lit(lit) => quote!(#lit), - } -} diff --git a/src/lr_codegen.rs b/src/lr_codegen.rs index 1a79491..a7f48b0 100644 --- a/src/lr_codegen.rs +++ b/src/lr_codegen.rs @@ -30,10 +30,10 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream let lr1_table = build_lr1_table(&grammar, &lr1_automaton); - // println!( - // "{}", - // crate::lr_common::LRTableDisplay::new(&lr1_table, &grammar) - // ); + println!( + "{}", + crate::lr_common::LRTableDisplay::new(&lr1_table, &grammar) + ); let action_vec = action_table_vec(&grammar, lr1_table.get_action_table(), lr1_table.n_states()); @@ -59,7 +59,6 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream .map(|(non_terminal_idx, parser_state)| { let parser_state = u32::try_from(parser_state.as_usize()).unwrap(); let non_terminal = grammar.get_non_terminal(non_terminal_idx); - let non_terminal_idx = non_terminal_idx.as_usize(); let non_terminal_name_id = syn::Ident::new(&non_terminal.non_terminal, Span::call_site()); @@ -74,7 +73,6 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream parse_generic( input, #parser_state, - #non_terminal_idx, ) } } @@ -129,7 +127,6 @@ pub fn generate_lr1_parser(grammar: Grammar, tokens: &TokenEnum) -> TokenStream fn parse_generic<#(#token_lifetimes,)* E: ::std::fmt::Debug + Clone>( mut input: impl Iterator, E>>, init_state: u32, - non_terminal_idx: usize, ) -> Result> { let mut state_stack: Vec = vec![init_state]; diff --git a/src/lr_common.rs b/src/lr_common.rs index 5f12337..f6aa6bf 100644 --- a/src/lr_common.rs +++ b/src/lr_common.rs @@ -257,7 +257,6 @@ pub struct LRTableDisplay<'a, 'b> { } impl<'a, 'b> LRTableDisplay<'a, 'b> { - #[cfg(test)] pub fn new(table: &'a LRTable, grammar: &'b Grammar) -> Self { Self { table, grammar } }