Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Handles #59

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
362 changes: 362 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ members = [
"tool",
"example",
"common",
"bench",
]
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,3 +235,32 @@ pub struct CommaSeparatedExprs {

### `Box<T>`
Boxes are automatically constructed around the inner type when parsing, but Rust Sitter doesn't do anything extra beyond that.


### `rust_sitter::Handle<T>`
Wherever you might use a `Box<T>`, you can instead use a `Handle<T>` to switch to an Arena pattern. If you use one or more `Handle`s in your grammar declaration, you need to also declare an Arena type for the objects to be stored in. This is done by adding a `#[rust_sitter::arena]` attribute to an empty struct declaration. For example:

```rust
#[rust_sitter::grammar("arithmetic")]
pub mod grammar {
use rust_sitter::Handle;

#[rust_sitter::arena]
#[derive(Default, Debug)]
pub struct MyArena;

#[rust_sitter::language]
#[derive(PartialEq, Eq, Debug)]
pub enum Expression {
Number(#[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] i32),
#[rust_sitter::prec_left(1)]
Add(
Handle<Expression>,
#[rust_sitter::leaf(text = "+")] (),
Handle<Expression>,
),
}
}
```

The `grammar::parse` method will then also return an instance of `MyArena`, which can be indexed into by the `Handle<Expression>` values.
23 changes: 23 additions & 0 deletions bench/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "rust-sitter-benchmarking"
version = "0.4.4"
authors = ["Shadaj Laddad <[email protected]>"]
edition = "2021"
publish = false

[features]
default = ["tree-sitter-c2rust"]
tree-sitter-c2rust = ["rust-sitter/tree-sitter-c2rust"]
tree-sitter-standard = ["rust-sitter/tree-sitter-standard"]

[dependencies]
rust-sitter = { path = "../runtime", default-features = false }
rand = "0.9"
criterion = { version = "0.5", features = ["html_reports"] }

[build-dependencies]
rust-sitter-tool = { path = "../tool" }

[[bench]]
name = "arena_benchmark"
harness = false
60 changes: 60 additions & 0 deletions bench/benches/arena_benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use criterion::{criterion_group, criterion_main, Criterion};
use rand::{RngCore, SeedableRng};
use std::{fmt::Write, hint::black_box};

fn gen_big_arithmetic_expr(seed: u32, len: usize) -> String {
let mut big_seed = [0u8; 32];
big_seed[0..4].copy_from_slice(&u32::to_le_bytes(seed));
let mut rng = rand::rngs::SmallRng::from_seed(big_seed);

let mut s = String::new();
for i in 0..len {
if i != 0 {
match rng.next_u32() % 16 {
0 => write!(&mut s, " - "),
1..3 => write!(&mut s, " + "),
3..7 => write!(&mut s, " * "),
_ => write!(&mut s, " * "),
}
.unwrap()
}
write!(&mut s, "{}", rng.next_u32() % (i32::MAX as u32)).unwrap();
}
s
}

/// Just parse the expression - don't generate AST.
fn parse(src: &str) -> rust_sitter::tree_sitter::Tree {
let mut parser = rust_sitter::tree_sitter::Parser::new();
parser
.set_language(&rust_sitter_benchmarking::box_grammar::language())
.unwrap();
parser.parse(src, None).unwrap()
}

/// Traverse and place AST into boxes.
fn boxes(src: &str) -> rust_sitter_benchmarking::box_grammar::Expression {
rust_sitter_benchmarking::box_grammar::parse(src).unwrap()
}

/// Traverse and place AST into handles in an arena.
fn handles(
src: &str,
) -> (
rust_sitter_benchmarking::handle_grammar::Expression,
rust_sitter_benchmarking::handle_grammar::Arena,
) {
rust_sitter_benchmarking::handle_grammar::parse(src).unwrap()
}

fn criterion_benchmark(c: &mut Criterion) {
const SEED: u32 = 0;
const LEN: usize = 1000;
let src = gen_big_arithmetic_expr(SEED, LEN);
c.bench_function("parse", |b| b.iter(|| parse(black_box(&src))));
c.bench_function("boxes", |b| b.iter(|| boxes(black_box(&src))));
c.bench_function("handles", |b| b.iter(|| handles(black_box(&src))));
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
6 changes: 6 additions & 0 deletions bench/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
use std::path::PathBuf;

fn main() {
println!("cargo:rerun-if-changed=src");
rust_sitter_tool::build_parsers(&PathBuf::from("src/lib.rs"));
}
83 changes: 83 additions & 0 deletions bench/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#[rust_sitter::grammar("arithmetic")]
pub mod box_grammar {
#[rust_sitter::language]
#[derive(PartialEq, Eq, Debug)]
pub enum Expression {
Number(#[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] i32),
#[rust_sitter::prec_left(0)]
Sub(
Box<Expression>,
#[rust_sitter::leaf(text = "-")] (),
Box<Expression>,
),
#[rust_sitter::prec_left(1)]
Add(
Box<Expression>,
#[rust_sitter::leaf(text = "+")] (),
Box<Expression>,
),
#[rust_sitter::prec_left(2)]
Mul(
Box<Expression>,
#[rust_sitter::leaf(text = "*")] (),
Box<Expression>,
),
#[rust_sitter::prec_left(3)]
Div(
Box<Expression>,
#[rust_sitter::leaf(text = "/")] (),
Box<Expression>,
),
}

#[rust_sitter::extra]
struct Whitespace {
#[rust_sitter::leaf(pattern = r"\s")]
_whitespace: (),
}
}

#[rust_sitter::grammar("arithmetic_handles")]
pub mod handle_grammar {
use rust_sitter::Handle;

#[rust_sitter::arena]
#[derive(Default, Debug)]
pub struct Arena;

#[rust_sitter::language]
#[derive(PartialEq, Eq, Debug)]
pub enum Expression {
Number(#[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] i32),
#[rust_sitter::prec_left(0)]
Sub(
Handle<Expression>,
#[rust_sitter::leaf(text = "-")] (),
Handle<Expression>,
),
#[rust_sitter::prec_left(1)]
Add(
Handle<Expression>,
#[rust_sitter::leaf(text = "+")] (),
Handle<Expression>,
),
#[rust_sitter::prec_left(2)]
Mul(
Handle<Expression>,
#[rust_sitter::leaf(text = "*")] (),
Handle<Expression>,
),
#[rust_sitter::prec_left(3)]
Div(
Handle<Expression>,
#[rust_sitter::leaf(text = "/")] (),
Handle<Expression>,
),
}

#[rust_sitter::extra]
struct Whitespace {
#[rust_sitter::leaf(pattern = r"\s")]
_whitespace: (),
}
}
2 changes: 1 addition & 1 deletion example/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ rust-sitter-tool = { path = "../tool" }

[dev-dependencies]
insta = "1.39"
wasm-bindgen-test = "0.3.0"
wasm-bindgen-test = "0.3.0"
89 changes: 89 additions & 0 deletions example/src/handles.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#[rust_sitter::grammar("handles")]
pub mod grammar {
use rust_sitter::Handle;

#[rust_sitter::arena]
#[derive(Default, Debug)]
pub struct Arena;

#[rust_sitter::language]
#[derive(PartialEq, Eq, Debug)]
pub enum Expression {
Literal(rust_sitter::Handle<Literal>),
#[rust_sitter::prec_left(1)]
Sub(
rust_sitter::Handle<Expression>,
#[rust_sitter::leaf(text = "-")] (),
Handle<Expression>,
),
#[rust_sitter::prec_left(2)]
Mul(
Handle<Expression>,
#[rust_sitter::leaf(text = "*")] (),
rust_sitter::Handle<Expression>,
),
Array(
#[rust_sitter::leaf(text = "[")] (),
#[rust_sitter::repeat]
#[rust_sitter::delimited(
#[rust_sitter::leaf(text = ",")]
()
)]
Vec<Handle<Expression>>,
#[rust_sitter::leaf(text = "]")] (),
),
}

#[derive(PartialEq, Debug)]
pub enum Literal {
Number(#[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] i32),
Float(#[rust_sitter::leaf(pattern = r"\d+\.\d*", transform = |v| v.parse().unwrap())] f32),
}

#[rust_sitter::extra]
struct Whitespace {
#[rust_sitter::leaf(pattern = r"\s")]
_whitespace: (),
}
}

#[cfg(test)]
mod tests {
use super::*;
use grammar::*;

#[wasm_bindgen_test::wasm_bindgen_test]
#[test]
fn successful_parses() {
let (node, arena) = grammar::parse("1").unwrap();
assert!(
matches!(node, Expression::Literal(lit) if matches!(arena[lit], Literal::Number(1)))
);

let (node, arena) = grammar::parse(" 1.1").unwrap();
assert!(
matches!(node, Expression::Literal(lit) if matches!(arena[lit], Literal::Float(1.1)))
);

let (node, arena) = grammar::parse("1 - 2").unwrap();
assert!(matches!(node, Expression::Sub(lhs, _, rhs)
if matches!(arena[lhs], Expression::Literal(lit) if matches!(arena[lit], Literal::Number(1)))
&& matches!(arena[rhs], Expression::Literal(lit) if matches!(arena[lit], Literal::Number(2)))
));

let (node, arena) = grammar::parse("[1, 2, 3]").unwrap();
assert!(matches!(node, Expression::Array(_, elements, _)
if matches!(arena[elements[0]], Expression::Literal(lit) if matches!(arena[lit], Literal::Number(1)))
&& matches!(arena[elements[1]], Expression::Literal(lit) if matches!(arena[lit], Literal::Number(2)))
&& matches!(arena[elements[2]], Expression::Literal(lit) if matches!(arena[lit], Literal::Number(3)))
));
}

#[test]
fn failed_parses() {
insta::assert_debug_snapshot!(grammar::parse("1 + 2"));
insta::assert_debug_snapshot!(grammar::parse("1 - 2 -"));
insta::assert_debug_snapshot!(grammar::parse("a1"));
insta::assert_debug_snapshot!(grammar::parse("1a"));
}
}
2 changes: 2 additions & 0 deletions example/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ use codemap_diagnostic::{ColorConfig, Diagnostic, Emitter, Level, SpanLabel, Spa
use rust_sitter::errors::{ParseError, ParseErrorReason};

mod arithmetic;
mod handles;
mod optionals;
mod partial_handles;
mod repetitions;
mod words;

Expand Down
Loading