Skip to content

Commit 77f486e

Browse files
authored
Add goto support in libcc2rs-macros (#161)
This PR adds support for writing goto and goto_block in the following form: ```rs let mut ret: i32 = 0; goto_block!({ '__entry: { if n < 0 { ret = -1; goto!('out); } ret = 100; } 'out: { return ret; } }); ``` This is equivalent to the following C code: ```c int f() { int ret; if (n < 0) { ret = -1; goto out; } ret = 100; out: return ret; } ``` goto_block is already used by the switch-with-fallthrough macro internally. I modified its syntax from a list of `label = { ... }` to `label: { ... }` so that rustfmt can format it properly. Then, I added GotoRewriter to rewrite each `goto!('label)` into `{ __s = <target index>; continue '__sm; }`. Whenever GotoStateMachine hits a goto macro, it calls GotoRewriter to generate the following state machine: ```rs { let mut __s: u32 = 0; '__sm: loop { match __s { 0u32 => { if n < 0 { ret = -1; __s = 1; continue '__sm; // <- was goto!('out) } ret = 100; __s = 1; continue '__sm; // fall-through to next arm } 1u32 => { return ret; break '__sm; // last arm: exit } _ => break '__sm, // match exhaustiveness } } } ``` Finally, to support nested goto_blocks, for example a switch inside a goto_block, I added StateMachineNames which gives a unique name to each state machine so that each goto targets the correct state machine. After this PR is merged, I will add the codegen counterpart in the `cpp2rust/` dir. A few limitations about how goto will work: * local variables of a function that contains goto will be hoisted outside the goto_block so that all arms of the goto_block macro can see them. c2rust also does this. All hoisted variables will be default initialized. * the rust compiler cannot prove that all paths of the code inside the loop of the state machine return. I reality the loop always returns because the C behavior is preserved. To suppress the rustc error, I added a panic at the end of the function: `panic!("ub: non-void function does not return a value");`
1 parent 4a32f50 commit 77f486e

5 files changed

Lines changed: 320 additions & 46 deletions

File tree

libcc2rs-macros/src/goto.rs

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22
// Distributed under the MIT license that can be found in the LICENSE file.
33

44
use proc_macro::TokenStream;
5+
use proc_macro2::Span;
56
use syn::parse::{Parse, ParseStream};
6-
use syn::{Expr, Lifetime, Token, parse_macro_input};
7+
use syn::{Block, Expr, ExprBlock, Lifetime, Stmt, parse_macro_input};
78

8-
use crate::state_machine::{Arm, GotoStateMachine, StateMachine};
9+
use crate::state_machine::{Arm, GotoStateMachine, StateMachine, StateMachineNames};
910

1011
pub fn expand(input: TokenStream) -> TokenStream {
1112
let GotoBlockInput { arms } = parse_macro_input!(input as GotoBlockInput);
1213
GotoStateMachine {
14+
names: StateMachineNames::fresh(),
1315
arms: arms
1416
.into_iter()
1517
.map(|a| Arm {
@@ -33,15 +35,29 @@ struct GotoArm {
3335

3436
impl Parse for GotoBlockInput {
3537
fn parse(input: ParseStream) -> syn::Result<Self> {
38+
let block: Block = input.parse()?;
3639
let mut arms = Vec::new();
37-
while !input.is_empty() {
38-
let label: Lifetime = input.parse()?;
39-
input.parse::<Token![=>]>()?;
40-
let body: Expr = input.parse()?;
41-
arms.push(GotoArm { label, body });
42-
if input.peek(Token![,]) {
43-
input.parse::<Token![,]>()?;
44-
}
40+
for stmt in block.stmts {
41+
let Stmt::Expr(Expr::Block(eb), _) = stmt else {
42+
return Err(syn::Error::new(
43+
Span::call_site(),
44+
"goto_block! body must be a sequence of labeled blocks",
45+
));
46+
};
47+
let Some(label) = eb.label else {
48+
return Err(syn::Error::new(
49+
Span::call_site(),
50+
"goto_block! arm must be a labeled block",
51+
));
52+
};
53+
arms.push(GotoArm {
54+
label: label.name,
55+
body: Expr::Block(ExprBlock {
56+
attrs: eb.attrs,
57+
label: None,
58+
block: eb.block,
59+
}),
60+
});
4561
}
4662
Ok(Self { arms })
4763
}

libcc2rs-macros/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ pub fn switch(input: TokenStream) -> TokenStream {
3636
switch::expand(input)
3737
}
3838

39-
// goto_block! {
40-
// '<label> => { /* body; may contain `break` or `continue` */ },
39+
// goto_block!({
40+
// '<label>: { /* body; may contain `break`, `continue`, or goto!('other) */ }
4141
// ...
42-
// };
42+
// });
4343
//
4444
// Expands to
4545
//

libcc2rs-macros/src/state_machine.rs

Lines changed: 96 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
// Copyright (c) 2022-present INESC-ID.
22
// Distributed under the MIT license that can be found in the LICENSE file.
33

4+
use std::collections::HashMap;
5+
use std::sync::atomic::{AtomicU64, Ordering};
6+
47
use proc_macro2::{Ident, Span, TokenStream as TokenStream2};
58
use quote::{format_ident, quote};
69
use syn::visit_mut::{self, VisitMut};
7-
use syn::{Expr, ExprBreak, ExprContinue, Lifetime, Pat};
10+
use syn::{Expr, ExprBreak, ExprContinue, Lifetime, Pat, Stmt, parse_quote};
811

912
pub struct Arm {
1013
pub label: String,
@@ -21,12 +24,29 @@ pub trait StateMachine {
2124
fn emit(self) -> TokenStream2;
2225
}
2326

24-
fn sm_label() -> Lifetime {
25-
Lifetime::new("'__sm", Span::call_site())
27+
pub(crate) struct StateMachineNames {
28+
pub label: Lifetime,
29+
pub state: Ident,
30+
pub break_flag: Ident,
31+
pub cont_flag: Ident,
32+
}
33+
34+
impl StateMachineNames {
35+
pub fn fresh() -> Self {
36+
static COUNTER: AtomicU64 = AtomicU64::new(0);
37+
let id = COUNTER.fetch_add(1, Ordering::Relaxed);
38+
Self {
39+
label: Lifetime::new(&format!("'__sm{id}"), Span::call_site()),
40+
state: format_ident!("__s{}", id),
41+
break_flag: format_ident!("__user_break{}", id),
42+
cont_flag: format_ident!("__user_continue{}", id),
43+
}
44+
}
2645
}
2746

2847
// Collection of labeled arms that fall-through by default
2948
pub struct GotoStateMachine {
49+
pub names: StateMachineNames,
3050
pub arms: Vec<Arm>,
3151
}
3252

@@ -87,10 +107,12 @@ impl GotoStateMachine {
87107

88108
impl StateMachine for GotoStateMachine {
89109
fn emit(self) -> TokenStream2 {
90-
let lbl = sm_label();
91-
let s = format_ident!("__s");
92-
let break_flag = format_ident!("__user_break");
93-
let cont_flag = format_ident!("__user_continue");
110+
let StateMachineNames {
111+
label: lbl,
112+
state: s,
113+
break_flag,
114+
cont_flag,
115+
} = self.names;
94116

95117
let n = self.arms.len();
96118
let mut arms_have_break = false;
@@ -101,6 +123,17 @@ impl StateMachine for GotoStateMachine {
101123
.enumerate()
102124
.map(|(i, arm)| {
103125
let mut body = arm.body.clone();
126+
GotoRewriter {
127+
map: &self
128+
.arms
129+
.iter()
130+
.enumerate()
131+
.map(|(i, a)| (a.label.clone(), i as u32))
132+
.collect(),
133+
state: s.clone(),
134+
sm_label: lbl.clone(),
135+
}
136+
.visit_expr_mut(&mut body);
104137
let (had_br, had_cn) =
105138
Self::propagate_rewrite(&mut body, &lbl, &break_flag, &cont_flag);
106139
arms_have_break |= had_br;
@@ -131,6 +164,58 @@ impl StateMachine for GotoStateMachine {
131164
}
132165
}
133166

167+
// Rewrites `goto!('label)` into `{ __s = <target index>; continue '__sm; }`.
168+
struct GotoRewriter<'a> {
169+
// Map with labels and their indices inside the current state machine. Used to check if the
170+
// label the goto jumps to is part of the current state machine. If it is, emit
171+
// `__s = map[label]`
172+
map: &'a HashMap<String, u32>,
173+
state: Ident,
174+
sm_label: Lifetime,
175+
}
176+
177+
impl GotoRewriter<'_> {
178+
fn expand_goto_into_state_machine_jump(&self, tokens: &TokenStream2) -> Option<Expr> {
179+
let idx = *self.map.get(
180+
&syn::parse2::<Lifetime>(tokens.clone())
181+
.expect("goto! expects a lifetime label")
182+
.ident
183+
.to_string(),
184+
)?;
185+
let state = &self.state;
186+
let sm_label = &self.sm_label;
187+
Some(parse_quote!({ #state = #idx; continue #sm_label; }))
188+
}
189+
190+
fn recurse_into_inner_goto_block(&mut self, mac: &mut syn::Macro) -> bool {
191+
if mac.path.is_ident("switch") || mac.path.is_ident("goto_block") {
192+
if let Ok(mut inner) = syn::parse2::<Expr>(mac.tokens.clone()) {
193+
self.visit_expr_mut(&mut inner);
194+
mac.tokens = quote!(#inner);
195+
}
196+
return true;
197+
}
198+
false
199+
}
200+
}
201+
202+
impl VisitMut for GotoRewriter<'_> {
203+
fn visit_stmt_mut(&mut self, stmt: &mut Stmt) {
204+
if let Stmt::Macro(sm) = stmt {
205+
if sm.mac.path.is_ident("goto") {
206+
if let Some(jump) = self.expand_goto_into_state_machine_jump(&sm.mac.tokens) {
207+
*stmt = Stmt::Expr(jump, Some(Default::default()));
208+
}
209+
return;
210+
}
211+
if self.recurse_into_inner_goto_block(&mut sm.mac) {
212+
return;
213+
}
214+
}
215+
visit_mut::visit_stmt_mut(self, stmt);
216+
}
217+
}
218+
134219
// GotoStateMachine(dispatch arm + cases)
135220
pub struct SwitchStateMachine {
136221
pub goto: GotoStateMachine,
@@ -186,17 +271,16 @@ impl SwitchStateMachine {
186271

187272
impl StateMachine for SwitchStateMachine {
188273
fn emit(self) -> TokenStream2 {
189-
let lbl = sm_label();
190-
let s = format_ident!("__s");
274+
let names = StateMachineNames::fresh();
191275

192-
let user_arms = Self::convert_break_to_switch_exit(&self.goto.arms, &lbl);
193-
let dispatch = self.build_dispatch_arm(&user_arms, &lbl, &s);
276+
let user_arms = Self::convert_break_to_switch_exit(&self.goto.arms, &names.label);
277+
let dispatch = self.build_dispatch_arm(&user_arms, &names.label, &names.state);
194278

195279
let mut arms = Vec::new();
196280
arms.push(dispatch);
197281
arms.extend(user_arms);
198282

199-
GotoStateMachine { arms }.emit()
283+
GotoStateMachine { names, arms }.emit()
200284
}
201285
}
202286

libcc2rs-macros/src/switch.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ use proc_macro::TokenStream;
55
use syn::parse::{Parse, ParseStream};
66
use syn::{Expr, Pat, parse_macro_input};
77

8-
use crate::state_machine::{Arm, DispatchCase, GotoStateMachine, StateMachine, SwitchStateMachine};
8+
use crate::state_machine::{
9+
Arm, DispatchCase, GotoStateMachine, StateMachine, StateMachineNames, SwitchStateMachine,
10+
};
911

1012
pub fn expand(input: TokenStream) -> TokenStream {
1113
let SwitchInput { condition, arms } = parse_macro_input!(input as SwitchInput);
@@ -24,7 +26,10 @@ pub fn expand(input: TokenStream) -> TokenStream {
2426
});
2527
}
2628
SwitchStateMachine {
27-
goto: GotoStateMachine { arms: cfg_arms },
29+
goto: GotoStateMachine {
30+
names: StateMachineNames::fresh(),
31+
arms: cfg_arms,
32+
},
2833
condition,
2934
cases,
3035
}

0 commit comments

Comments
 (0)