diff --git a/Cargo.lock b/Cargo.lock index 71ca3a8..a4333ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -210,12 +210,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "monkey-rust" version = "0.5.0" @@ -250,12 +244,11 @@ dependencies = [ [[package]] name = "nom" -version = "7.1.1" +version = "8.0.0-alpha2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" +checksum = "c2785486691910e746b3d84220af18362bd15ba71f9b7bc0a47ac549550b7fe2" dependencies = [ "memchr", - "minimal-lexical", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b63cfa0..506f67e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ name = "monkey_exe" path = "src/main.rs" [dependencies] -nom = "^7.1.1" +nom = "8.0.0-alpha2" clap = "~2.31.2" rustyline = "9.1.2" rustyline-derive = "0.6.0" diff --git a/lib/evaluator/builtins.rs b/lib/evaluator/builtins.rs index b50f773..3b9459b 100644 --- a/lib/evaluator/builtins.rs +++ b/lib/evaluator/builtins.rs @@ -31,7 +31,7 @@ fn add_builtin(name: &str, param_num: usize, func: BuiltinFunction) -> (Ident, O } fn bprint_fn(args: Vec) -> Result { - match args.get(0) { + match args.first() { Some(Object::String(t)) => { println!("{}", t); Ok(Object::Null) @@ -45,7 +45,7 @@ fn bprint_fn(args: Vec) -> Result { } fn blen_fn(args: Vec) -> Result { - match args.get(0) { + match args.first() { Some(Object::String(s)) => Ok(Object::Integer(s.len() as i64)), Some(Object::Array(arr)) => Ok(Object::Integer(arr.len() as i64)), _ => Err(String::from("invalid arguments for len")), diff --git a/lib/evaluator/mod.rs b/lib/evaluator/mod.rs index 838cdbd..38eb4be 100644 --- a/lib/evaluator/mod.rs +++ b/lib/evaluator/mod.rs @@ -250,9 +250,9 @@ impl Evaluator { let old_env = Rc::clone(&self.env); let mut new_env = Environment::new_with_outer(Rc::clone(f_env)); let zipped = params.into_iter().zip(args); - for (_, (Ident(name), o)) in zipped.enumerate() { + zipped.for_each(|(Ident(name), o)| { new_env.set(&name, o); - } + }); self.env = Rc::new(RefCell::new(new_env)); let object = self.eval_blockstmt(body); self.env = old_env; @@ -379,7 +379,7 @@ mod tests { fn compare(input: &[u8], object: Object) { let (_, r) = Lexer::lex_tokens(input).unwrap(); let tokens = Tokens::new(&r); - let (_, result_parse) = Parser::parse_tokens(tokens).unwrap(); + let (_, result_parse) = MyParser::parse_tokens(tokens).unwrap(); let mut evaluator = Evaluator::new(); let eval = evaluator.eval_program(result_parse); assert_eq!(eval, object); @@ -723,10 +723,7 @@ mod tests { (input_beg.clone() + "let s = \"two\"; h[s]").as_bytes(), Object::Integer(2), ); - compare( - (input_beg.clone() + "h[3]").as_bytes(), - Object::Integer(3), - ); + compare((input_beg.clone() + "h[3]").as_bytes(), Object::Integer(3)); compare( (input_beg.clone() + "h[2 + 2]").as_bytes(), Object::Integer(4), @@ -739,10 +736,7 @@ mod tests { (input_beg.clone() + "h[5 < 1]").as_bytes(), Object::Boolean(false), ); - compare( - (input_beg.clone() + "h[100]").as_bytes(), - Object::Null, - ); + compare((input_beg.clone() + "h[100]").as_bytes(), Object::Null); compare( (input_beg.clone() + "h[[]]").as_bytes(), Object::Error("[] is not hashable".to_string()), diff --git a/lib/lexer/mod.rs b/lib/lexer/mod.rs index 999dce7..1e739bb 100644 --- a/lib/lexer/mod.rs +++ b/lib/lexer/mod.rs @@ -3,7 +3,7 @@ use nom::bytes::complete::{tag, take}; use nom::character::complete::{alpha1, alphanumeric1, digit1, multispace0}; use nom::combinator::{map, map_res, recognize}; use nom::multi::many0; -use nom::sequence::{delimited, pair}; +use nom::sequence::delimited; use nom::*; use std::str; @@ -15,8 +15,8 @@ use crate::lexer::token::*; macro_rules! syntax { ($func_name: ident, $tag_string: literal, $output_token: expr) => { - fn $func_name<'a>(s: &'a [u8]) -> IResult<&[u8], Token> { - map(tag($tag_string), |_| $output_token)(s) + fn $func_name(s: &[u8]) -> IResult<&[u8], Token> { + map(tag($tag_string), |_| $output_token).parse(s) } }; } @@ -49,7 +49,8 @@ pub fn lex_operator(input: &[u8]) -> IResult<&[u8], Token> { lesser_operator_equal, greater_operator, lesser_operator, - ))(input) + )) + .parse(input) } // punctuations @@ -74,7 +75,8 @@ pub fn lex_punctuations(input: &[u8]) -> IResult<&[u8], Token> { rbrace_punctuation, lbracket_punctuation, rbracket_punctuation, - ))(input) + )) + .parse(input) } // Strings @@ -106,17 +108,17 @@ fn complete_byte_slice_str_from_utf8(c: &[u8]) -> Result<&str, Utf8Error> { str::from_utf8(c) } fn string(input: &[u8]) -> IResult<&[u8], String> { - delimited(tag("\""), map_res(pis, convert_vec_utf8), tag("\""))(input) + delimited(tag("\""), map_res(pis, convert_vec_utf8), tag("\"")).parse(input) } fn lex_string(input: &[u8]) -> IResult<&[u8], Token> { - map(string, Token::StringLiteral)(input) + map(string, Token::StringLiteral).parse(input) } // Reserved or ident fn lex_reserved_ident(input: &[u8]) -> IResult<&[u8], Token> { map_res( - recognize(pair( + recognize(( alt((alpha1, tag("_"))), many0(alt((alphanumeric1, tag("_")))), )), @@ -133,7 +135,8 @@ fn lex_reserved_ident(input: &[u8]) -> IResult<&[u8], Token> { _ => Token::Ident(syntax.to_string()), }) }, - )(input) + ) + .parse(input) } fn complete_str_from_str(c: &str) -> Result { @@ -148,12 +151,13 @@ fn lex_integer(input: &[u8]) -> IResult<&[u8], Token> { complete_str_from_str, ), Token::IntLiteral, - )(input) + ) + .parse(input) } // Illegal tokens fn lex_illegal(input: &[u8]) -> IResult<&[u8], Token> { - map(take(1usize), |_| Token::Illegal)(input) + map(take(1usize), |_| Token::Illegal).parse(input) } fn lex_token(input: &[u8]) -> IResult<&[u8], Token> { @@ -164,11 +168,12 @@ fn lex_token(input: &[u8]) -> IResult<&[u8], Token> { lex_reserved_ident, lex_integer, lex_illegal, - ))(input) + )) + .parse(input) } fn lex_tokens(input: &[u8]) -> IResult<&[u8], Vec> { - many0(delimited(multispace0, lex_token, multispace0))(input) + many0(delimited(multispace0, lex_token, multispace0)).parse(input) } pub struct Lexer; diff --git a/lib/lexer/token.rs b/lib/lexer/token.rs index 2980fcc..5a18a88 100644 --- a/lib/lexer/token.rs +++ b/lib/lexer/token.rs @@ -1,6 +1,5 @@ use nom::*; use std::iter::Enumerate; -use std::ops::{Range, RangeFrom, RangeFull, RangeTo}; #[derive(PartialEq, Debug, Clone)] pub enum Token { @@ -61,14 +60,11 @@ impl<'a> Tokens<'a> { } } -impl<'a> InputLength for Tokens<'a> { +impl<'a> Input for Tokens<'a> { #[inline] fn input_len(&self) -> usize { self.tok.len() } -} - -impl<'a> InputTake for Tokens<'a> { #[inline] fn take(&self, count: usize) -> Self { Tokens { @@ -93,55 +89,10 @@ impl<'a> InputTake for Tokens<'a> { }; (second, first) } -} -impl InputLength for Token { - #[inline] - fn input_len(&self) -> usize { - 1 - } -} - -impl<'a> Slice> for Tokens<'a> { - #[inline] - fn slice(&self, range: Range) -> Self { - Tokens { - tok: self.tok.slice(range.clone()), - start: self.start + range.start, - end: self.start + range.end, - } - } -} - -impl<'a> Slice> for Tokens<'a> { - #[inline] - fn slice(&self, range: RangeTo) -> Self { - self.slice(0..range.end) - } -} - -impl<'a> Slice> for Tokens<'a> { - #[inline] - fn slice(&self, range: RangeFrom) -> Self { - self.slice(range.start..self.end - self.start) - } -} - -impl<'a> Slice for Tokens<'a> { - #[inline] - fn slice(&self, _: RangeFull) -> Self { - Tokens { - tok: self.tok, - start: self.start, - end: self.end, - } - } -} - -impl<'a> InputIter for Tokens<'a> { type Item = &'a Token; - type Iter = Enumerate<::std::slice::Iter<'a, Token>>; - type IterElem = ::std::slice::Iter<'a, Token>; + type IterIndices = Enumerate<::std::slice::Iter<'a, Token>>; + type Iter = ::std::slice::Iter<'a, Token>; #[inline] fn iter_indices(&self) -> Enumerate<::std::slice::Iter<'a, Token>> { @@ -166,4 +117,12 @@ impl<'a> InputIter for Tokens<'a> { Err(Needed::Unknown) } } + + fn take_from(&self, index: usize) -> Self { + Self { + tok: self.tok.split_at(index).1, + start: index, + end: self.end, + } + } } diff --git a/lib/parser/mod.rs b/lib/parser/mod.rs index 891802c..efb760f 100644 --- a/lib/parser/mod.rs +++ b/lib/parser/mod.rs @@ -1,4 +1,4 @@ -use nom::*; +use nom::{Parser, *}; pub mod ast; use crate::lexer::token::*; @@ -15,32 +15,24 @@ use std::result::Result::*; macro_rules! tag_token ( ($func_name:ident, $tag: expr) => ( fn $func_name(tokens: Tokens) -> IResult { - verify(take(1usize), |t: &Tokens| t.tok[0] == $tag)(tokens) + verify(take(1usize), |t: &Tokens| t.tok[0] == $tag).parse(tokens) } ) ); fn parse_literal(input: Tokens) -> IResult { let (i1, t1) = take(1usize)(input)?; - if t1.tok.is_empty() { - Err(Err::Error(Error::new(input, ErrorKind::Tag))) - } else { - match t1.tok[0].clone() { - Token::IntLiteral(name) => Ok((i1, Literal::IntLiteral(name))), - Token::StringLiteral(s) => Ok((i1, Literal::StringLiteral(s))), - Token::BoolLiteral(b) => Ok((i1, Literal::BoolLiteral(b))), - _ => Err(Err::Error(Error::new(input, ErrorKind::Tag))), - } + match t1.tok[0].clone() { + Token::IntLiteral(name) => Ok((i1, Literal::IntLiteral(name))), + Token::StringLiteral(s) => Ok((i1, Literal::StringLiteral(s))), + Token::BoolLiteral(b) => Ok((i1, Literal::BoolLiteral(b))), + _ => Err(Err::Error(Error::new(input, ErrorKind::Tag))), } } fn parse_ident(input: Tokens) -> IResult { let (i1, t1) = take(1usize)(input)?; - if t1.tok.is_empty() { - Err(Err::Error(Error::new(input, ErrorKind::Tag))) - } else { - match t1.tok[0].clone() { - Token::Ident(name) => Ok((i1, Ident(name))), - _ => Err(Err::Error(Error::new(input, ErrorKind::Tag))), - } + match t1.tok[0].clone() { + Token::Ident(name) => Ok((i1, Ident(name))), + _ => Err(Err::Error(Error::new(input, ErrorKind::Tag))), } } tag_token!(let_tag, Token::Let); @@ -82,7 +74,7 @@ fn infix_op(t: &Token) -> (Precedence, Option) { } fn parse_program(input: Tokens) -> IResult { - terminated(many0(parse_stmt), eof_tag)(input) + terminated(many0(parse_stmt), eof_tag).parse(input) } fn parse_expr(input: Tokens) -> IResult { @@ -90,37 +82,40 @@ fn parse_expr(input: Tokens) -> IResult { } fn parse_stmt(input: Tokens) -> IResult { - alt((parse_let_stmt, parse_return_stmt, parse_expr_stmt))(input) + alt((parse_let_stmt, parse_return_stmt, parse_expr_stmt)).parse(input) } fn parse_let_stmt(input: Tokens) -> IResult { map( - tuple(( + ( let_tag, parse_ident, assign_tag, parse_expr, opt(semicolon_tag), - )), + ), |(_, ident, _, expr, _)| Stmt::LetStmt(ident, expr), - )(input) + ) + .parse(input) } fn parse_return_stmt(input: Tokens) -> IResult { map( delimited(return_tag, parse_expr, opt(semicolon_tag)), Stmt::ReturnStmt, - )(input) + ) + .parse(input) } fn parse_expr_stmt(input: Tokens) -> IResult { map(terminated(parse_expr, opt(semicolon_tag)), |expr| { Stmt::ExprStmt(expr) - })(input) + }) + .parse(input) } fn parse_block_stmt(input: Tokens) -> IResult { - delimited(lbrace_tag, many0(parse_stmt), rbrace_tag)(input) + delimited(lbrace_tag, many0(parse_stmt), rbrace_tag).parse(input) } fn parse_atom_expr(input: Tokens) -> IResult { @@ -133,27 +128,28 @@ fn parse_atom_expr(input: Tokens) -> IResult { parse_hash_expr, parse_if_expr, parse_fn_expr, - ))(input) + )) + .parse(input) } fn parse_paren_expr(input: Tokens) -> IResult { - delimited(lparen_tag, parse_expr, rparen_tag)(input) + delimited(lparen_tag, parse_expr, rparen_tag).parse(input) } fn parse_lit_expr(input: Tokens) -> IResult { - map(parse_literal, Expr::LitExpr)(input) + map(parse_literal, Expr::LitExpr).parse(input) } fn parse_ident_expr(input: Tokens) -> IResult { - map(parse_ident, Expr::IdentExpr)(input) + map(parse_ident, Expr::IdentExpr).parse(input) } fn parse_comma_exprs(input: Tokens) -> IResult { - preceded(comma_tag, parse_expr)(input) + preceded(comma_tag, parse_expr).parse(input) } fn parse_exprs(input: Tokens) -> IResult> { - map( - pair(parse_expr, many0(parse_comma_exprs)), - |(first, second)| [&vec![first][..], &second[..]].concat(), - )(input) + map((parse_expr, many0(parse_comma_exprs)), |(first, second)| { + [&vec![first][..], &second[..]].concat() + }) + .parse(input) } fn empty_boxed_vec(input: Tokens) -> IResult> { Ok((input, vec![])) @@ -166,20 +162,22 @@ fn parse_array_expr(input: Tokens) -> IResult { rbracket_tag, ), Expr::ArrayExpr, - )(input) + ) + .parse(input) } fn parse_hash_pair(input: Tokens) -> IResult { - separated_pair(parse_literal, colon_tag, parse_expr)(input) + separated_pair(parse_literal, colon_tag, parse_expr).parse(input) } fn parse_hash_comma_expr(input: Tokens) -> IResult { - preceded(comma_tag, parse_hash_pair)(input) + preceded(comma_tag, parse_hash_pair).parse(input) } fn parse_hash_pairs(input: Tokens) -> IResult> { map( pair(parse_hash_pair, many0(parse_hash_comma_expr)), |(first, second)| [&vec![first][..], &second[..]].concat(), - )(input) + ) + .parse(input) } fn empty_pairs(input: Tokens) -> IResult> { Ok((input, vec![])) @@ -188,21 +186,18 @@ fn parse_hash_expr(input: Tokens) -> IResult { map( delimited(lbrace_tag, alt((parse_hash_pairs, empty_pairs)), rbrace_tag), Expr::HashExpr, - )(input) + ) + .parse(input) } fn parse_prefix_expr(input: Tokens) -> IResult { - let (i1, t1) = alt((plus_tag, minus_tag, not_tag))(input)?; - if t1.tok.is_empty() { - Err(Err::Error(error_position!(input, ErrorKind::Tag))) - } else { - let (i2, e) = parse_atom_expr(i1)?; - match t1.tok[0].clone() { - Token::Plus => Ok((i2, Expr::PrefixExpr(Prefix::PrefixPlus, Box::new(e)))), - Token::Minus => Ok((i2, Expr::PrefixExpr(Prefix::PrefixMinus, Box::new(e)))), - Token::Not => Ok((i2, Expr::PrefixExpr(Prefix::Not, Box::new(e)))), - _ => Err(Err::Error(error_position!(input, ErrorKind::Tag))), - } + let (i1, t1) = alt((plus_tag, minus_tag, not_tag)).parse(input)?; + let (i2, e) = parse_atom_expr(i1)?; + match t1.tok[0].clone() { + Token::Plus => Ok((i2, Expr::PrefixExpr(Prefix::PrefixPlus, Box::new(e)))), + Token::Minus => Ok((i2, Expr::PrefixExpr(Prefix::PrefixMinus, Box::new(e)))), + Token::Not => Ok((i2, Expr::PrefixExpr(Prefix::Not, Box::new(e)))), + _ => Err(Err::Error(error_position!(input, ErrorKind::Tag))), } } @@ -212,28 +207,24 @@ fn parse_pratt_expr(input: Tokens, precedence: Precedence) -> IResult IResult { - let (i1, t1) = take(1usize)(input)?; - - if t1.tok.is_empty() { - Ok((i1, left)) - } else { - let preview = &t1.tok[0]; - let p = infix_op(preview); - match p { - (Precedence::PCall, _) if precedence < Precedence::PCall => { - let (i2, left2) = parse_call_expr(input, left)?; - go_parse_pratt_expr(i2, precedence, left2) - } - (Precedence::PIndex, _) if precedence < Precedence::PIndex => { - let (i2, left2) = parse_index_expr(input, left)?; - go_parse_pratt_expr(i2, precedence, left2) - } - (ref peek_precedence, _) if precedence < *peek_precedence => { - let (i2, left2) = parse_infix_expr(input, left)?; - go_parse_pratt_expr(i2, precedence, left2) - } - _ => Ok((input, left)), + let (_i1, t1) = take(1usize)(input)?; + + let preview = &t1.tok[0]; + let p = infix_op(preview); + match p { + (Precedence::PCall, _) if precedence < Precedence::PCall => { + let (i2, left2) = parse_call_expr(input, left)?; + go_parse_pratt_expr(i2, precedence, left2) } + (Precedence::PIndex, _) if precedence < Precedence::PIndex => { + let (i2, left2) = parse_index_expr(input, left)?; + go_parse_pratt_expr(i2, precedence, left2) + } + (ref peek_precedence, _) if precedence < *peek_precedence => { + let (i2, left2) = parse_infix_expr(input, left)?; + go_parse_pratt_expr(i2, precedence, left2) + } + _ => Ok((input, left)), } } @@ -261,7 +252,8 @@ fn parse_call_expr(input: Tokens, fn_handle: Expr) -> IResult { function: Box::new(fn_handle.clone()), arguments: e, }, - )(input) + ) + .parse(input) } fn parse_index_expr(input: Tokens, arr: Expr) -> IResult { @@ -270,54 +262,58 @@ fn parse_index_expr(input: Tokens, arr: Expr) -> IResult { array: Box::new(arr.clone()), index: Box::new(idx), } - })(input) + }) + .parse(input) } fn parse_if_expr(input: Tokens) -> IResult { map( - tuple(( + ( if_tag, lparen_tag, parse_expr, rparen_tag, parse_block_stmt, parse_else_expr, - )), + ), |(_, _, expr, _, c, a)| Expr::IfExpr { cond: Box::new(expr), consequence: c, alternative: a, }, - )(input) + ) + .parse(input) } fn parse_else_expr(input: Tokens) -> IResult> { - opt(preceded(else_tag, parse_block_stmt))(input) + opt(preceded(else_tag, parse_block_stmt)).parse(input) } fn empty_params(input: Tokens) -> IResult> { Ok((input, vec![])) } fn parse_fn_expr(input: Tokens) -> IResult { map( - tuple(( + ( function_tag, lparen_tag, alt((parse_params, empty_params)), rparen_tag, parse_block_stmt, - )), + ), |(_, _, p, _, b)| Expr::FnExpr { params: p, body: b }, - )(input) + ) + .parse(input) } fn parse_params(input: Tokens) -> IResult> { map( pair(parse_ident, many0(preceded(comma_tag, parse_ident))), |(p, ps)| [&vec![p][..], &ps[..]].concat(), - )(input) + ) + .parse(input) } -pub struct Parser; +pub struct MyParser; -impl Parser { +impl MyParser { pub fn parse_tokens(tokens: Tokens) -> IResult { parse_program(tokens) } @@ -331,18 +327,18 @@ mod tests { fn assert_input_with_program(input: &[u8], expected_results: Program) { let (_, r) = Lexer::lex_tokens(input).unwrap(); let tokens = Tokens::new(&r); - let (_, result) = Parser::parse_tokens(tokens).unwrap(); + let (_, result) = MyParser::parse_tokens(tokens).unwrap(); assert_eq!(result, expected_results); } fn compare_inputs(input: &[u8], input2: &[u8]) { let (_, r) = Lexer::lex_tokens(input).unwrap(); let tokens = Tokens::new(&r); - let (_, result) = Parser::parse_tokens(tokens).unwrap(); + let (_, result) = MyParser::parse_tokens(tokens).unwrap(); let (_, r) = Lexer::lex_tokens(input2).unwrap(); let tokens = Tokens::new(&r); - let (_, expected_results) = Parser::parse_tokens(tokens).unwrap(); + let (_, expected_results) = MyParser::parse_tokens(tokens).unwrap(); assert_eq!(result, expected_results); } diff --git a/repl/main.rs b/repl/main.rs index 471c3f3..e130ea1 100644 --- a/repl/main.rs +++ b/repl/main.rs @@ -128,7 +128,7 @@ fn main() -> rustyline::Result<()> { match lex_tokens { Ok((_, r)) => { let tokens = Tokens::new(&r); - let parsed = Parser::parse_tokens(tokens); + let parsed = MyParser::parse_tokens(tokens); match parsed { Ok((_, program)) => { let eval = evaluator.eval_program(program); diff --git a/src/main.rs b/src/main.rs index 468e123..033157c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -34,7 +34,7 @@ fn main() { match lex_tokens { Ok((_, r)) => { let tokens = Tokens::new(&r); - let parsed = Parser::parse_tokens(tokens); + let parsed = MyParser::parse_tokens(tokens); match parsed { Ok((_, program)) => { let eval = evaluator.eval_program(program); diff --git a/tests/monkey_test.rs b/tests/monkey_test.rs index a5bb086..75e13fd 100644 --- a/tests/monkey_test.rs +++ b/tests/monkey_test.rs @@ -21,7 +21,7 @@ fn test_example_hash() { let mut evaluator = Evaluator::new(); let (_, lex_tokens) = Lexer::lex_tokens(code_string.as_bytes()).unwrap(); let tokens = Tokens::new(&lex_tokens); - let (_, program) = Parser::parse_tokens(tokens).unwrap(); + let (_, program) = MyParser::parse_tokens(tokens).unwrap(); let eval = evaluator.eval_program(program); assert_eq!(eval, Object::Null); } @@ -32,7 +32,7 @@ fn test_reduce() { let mut evaluator = Evaluator::new(); let (_, lex_tokens) = Lexer::lex_tokens(code_string.as_bytes()).unwrap(); let tokens = Tokens::new(&lex_tokens); - let (_, program) = Parser::parse_tokens(tokens).unwrap(); + let (_, program) = MyParser::parse_tokens(tokens).unwrap(); let eval = evaluator.eval_program(program); assert_eq!(eval, Object::Null); }