Skip to content

Commit 49ba5e3

Browse files
committed
Export an array of all tokens from ct_token_map
This helps with writing structured input adapters for fuzzing. When fuzzing a parser specifically (as opposed to fuzzing lexer and parser at the same time), we'd like to supply it with an array of valid lexemes. This export helps us build such an array as we don't have to manually list all tokens in a fuzzing entry point. Note that I didn't implement this functionality for generated lexers because there's already a way to get all tokens via `mod_l::lexerdef().iter_rules()`.
1 parent d307a07 commit 49ba5e3

File tree

1 file changed

+15
-10
lines changed

1 file changed

+15
-10
lines changed

lrlex/src/lib/ctbuilder.rs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,31 +1215,36 @@ pub fn ct_token_map<StorageT: Display + ToTokens>(
12151215
let timestamp = env!("VERGEN_BUILD_TIMESTAMP");
12161216
let mod_ident = format_ident!("{}", mod_name);
12171217
write!(outs, "// lrlex build time: {}\n\n", quote!(#timestamp),).ok();
1218+
let storaget = str::parse::<TokenStream>(type_name::<StorageT>()).unwrap();
12181219
// Sort the tokens so that they're always in the same order.
12191220
// This will prevent unneeded rebuilds.
12201221
let mut token_map_sorted = Vec::from_iter(token_map.borrow().iter());
12211222
token_map_sorted.sort_by_key(|(k, _)| *k);
1222-
let tokens = &token_map_sorted
1223-
.into_iter()
1223+
let (token_array, tokens): (TokenStream, TokenStream) = token_map_sorted
1224+
.iter()
12241225
.map(|(k, id)| {
12251226
let name = match rename_map {
12261227
Some(rmap) => *rmap.get(k.as_str()).unwrap_or(&k.as_str()),
1227-
_ => k,
1228+
_ => &k,
12281229
};
12291230
let tok_ident = format_ident!("T_{}", name.to_ascii_uppercase());
1230-
let storaget = str::parse::<TokenStream>(type_name::<StorageT>()).unwrap();
1231-
// Code gen for the constant token values.
1232-
quote! {
1233-
pub const #tok_ident: #storaget = #id;
1234-
}
1231+
(
1232+
quote! {
1233+
#tok_ident,
1234+
},
1235+
quote! {
1236+
pub const #tok_ident: #storaget = #id;
1237+
},
1238+
)
12351239
})
1236-
.collect::<Vec<_>>();
1240+
.unzip();
12371241
// Since the formatter doesn't preserve comments and we don't want to lose build time,
12381242
// just format the module contents.
12391243
let unformatted = quote! {
12401244
mod #mod_ident {
12411245
#![allow(dead_code)]
1242-
#(#tokens)*
1246+
#tokens
1247+
pub const TOK_IDS: &[#storaget] = &[#token_array];
12431248
}
12441249
}
12451250
.to_string();

0 commit comments

Comments
 (0)