diff --git a/src/lazy/expanded/macro_evaluator.rs b/src/lazy/expanded/macro_evaluator.rs index 37b89771..132c8f37 100644 --- a/src/lazy/expanded/macro_evaluator.rs +++ b/src/lazy/expanded/macro_evaluator.rs @@ -943,10 +943,7 @@ impl<'top, D: Decoder> StackedMacroEvaluator<'top, D> { /// current encoding context and push the resulting `MacroExpansion` onto the stack. pub fn push(&mut self, invocation: impl Into>) -> IonResult<()> { let macro_expr = invocation.into(); - let expansion = match macro_expr.expand() { - Ok(expansion) => expansion, - Err(e) => return Err(e), - }; + let expansion = macro_expr.expand()?; self.macro_stack.push(expansion); Ok(()) } @@ -989,10 +986,7 @@ impl<'top, D: Decoder> StackedMacroEvaluator<'top, D> { Some(expansion) => expansion, }; // Ask that expansion to continue its evaluation by one step. - let step = match current_expansion.next_step() { - Ok(step) => step, - Err(e) => return Err(e), - }; + let step = current_expansion.next_step()?; current_expansion.is_complete = step.is_final(); use ValueExpr::*; let maybe_output_value = match step.value_expr() { diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index 0b8d0569..72e186ba 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -558,10 +558,7 @@ impl ExpandingReader { // It's another macro invocation, we'll add it to the evaluator so it will be evaluated // on the next call and then we'll return the e-expression itself. EExp(e_exp) => { - let resolved_e_exp = match e_exp.resolve(context_ref) { - Ok(resolved) => resolved, - Err(e) => return Err(e), - }; + let resolved_e_exp = e_exp.resolve(context_ref)?; // Get the current evaluator or make a new one let evaluator = match self.evaluator_ptr.get() { @@ -639,10 +636,7 @@ impl ExpandingReader { } // It's another macro invocation, we'll start evaluating it. EExp(e_exp) => { - let resolved_e_exp = match e_exp.resolve(context_ref) { - Ok(resolved) => resolved, - Err(e) => return Err(e), - }; + let resolved_e_exp = e_exp.resolve(context_ref)?; // If this e-expression invokes a template with a non-system, singleton expansion, we can use the // e-expression to back a LazyExpandedValue. It will only be evaluated if the user calls `read()`. @@ -664,11 +658,7 @@ impl ExpandingReader { }; // Try to get a value by starting to evaluate the e-expression. - let next_value = match evaluator.next() { - Ok(value) => value, - Err(e) => return Err(e), - }; - if let Some(value) = next_value { + if let Some(value) = evaluator.next()? { // If we get a value and the evaluator isn't empty yet, save its pointer // so we can try to get more out of it when `next_at_or_above_depth` is called again. if !evaluator.is_empty() { diff --git a/src/lazy/expanded/struct.rs b/src/lazy/expanded/struct.rs index 9ae10aca..0bbc802d 100644 --- a/src/lazy/expanded/struct.rs +++ b/src/lazy/expanded/struct.rs @@ -93,7 +93,7 @@ impl<'top, D: Decoder> LazyExpandedField<'top, D> { self.name } - pub fn to_field_expr(&self) -> FieldExpr<'top, D> { + pub fn to_field_expr(self) -> FieldExpr<'top, D> { FieldExpr::NameValue(self.name(), self.value()) } } diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index 2ba7a3e7..9090c2aa 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -27,24 +27,20 @@ use crate::lazy::text::matched::{ use crate::lazy::text::parse_result::{fatal_parse_error, InvalidInputError, IonParseError}; use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult}; use crate::lazy::text::raw::v1_1::arg_group::{EExpArg, EExpArgExpr, TextEExpArgGroup}; -use crate::lazy::text::raw::v1_1::reader::{ - MacroIdRef, - SystemMacroAddress, TextEExpression_1_1 -}; +use crate::lazy::text::raw::v1_1::reader::{MacroIdRef, SystemMacroAddress, TextEExpression_1_1}; use crate::lazy::text::value::{ LazyRawTextValue, LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker, }; use crate::result::DecodingError; -use crate::{ - Encoding, HasRange, IonError, IonResult, IonType, RawSymbolRef, TimestampPrecision, -}; +use crate::{Encoding, HasRange, IonError, IonResult, IonType, RawSymbolRef, TimestampPrecision}; use crate::lazy::expanded::macro_table::{Macro, ION_1_1_SYSTEM_MACROS}; use crate::lazy::expanded::template::{Parameter, RestSyntaxPolicy}; use crate::lazy::text::as_utf8::AsUtf8; +use crate::lazy::text::raw::sequence::RawTextSExpIterator; +use crate::lazy::text::token_kind::{ValueTokenKind, TEXT_ION_TOKEN_KINDS}; use bumpalo::collections::Vec as BumpVec; use winnow::ascii::{digit0, digit1}; -use crate::lazy::text::raw::sequence::RawTextSExpIterator; /// Generates parser functions that map from an Ion type representation (`Decimal`, `Int`, etc) /// to an `EncodedTextValue`. @@ -402,7 +398,10 @@ impl<'top> TextBuffer<'top> { // int `3` while recognizing the input `-3` as the int `-3`. If `match_operator` runs before // `match_value`, it will consume the sign (`-`) of negative number values, treating // `-3` as an operator (`-`) and an int (`3`). Thus, we run `match_value` first. - whitespace_and_then(alt((Self::match_value::, Self::match_operator))), + whitespace_and_then(alt(( + Self::match_value::, + Self::match_operator, + ))), ) .map(|(maybe_annotations, value)| input.apply_annotations(maybe_annotations, value)) .map(RawValueExpr::ValueLiteral) @@ -446,7 +445,9 @@ impl<'top> TextBuffer<'top> { } /// Matches an optional annotation sequence and a trailing value. - pub fn match_annotated_value>(&mut self) -> IonParseResult<'top, E::Value<'top>> { + pub fn match_annotated_value>( + &mut self, + ) -> IonParseResult<'top, E::Value<'top>> { let input = *self; ( opt(Self::match_annotations), @@ -524,49 +525,34 @@ impl<'top> TextBuffer<'top> { /// Matches a single Ion 1.0 value. pub fn match_value>(&mut self) -> IonParseResult<'top, E::Value<'top>> { + use ValueTokenKind::*; dispatch! { - |input: &mut TextBuffer<'top>| input.peek_byte(); - byte if byte.is_ascii_digit() || byte == b'-' => { - alt(( - Self::match_int_value, - Self::match_float_value, - Self::match_decimal_value, - Self::match_timestamp_value, - )) - }, - byte if byte.is_ascii_alphabetic() => { - alt(( - Self::match_null_value, - Self::match_bool_value, - Self::match_identifier_value, - Self::match_float_special_value, // nan - )) - }, - b'$' | b'_' => { - Self::match_symbol_value // identifiers and symbol IDs - }, - b'"' | b'\'' => { - alt(( - Self::match_string_value, - Self::match_symbol_value, - )) - }, - b'[' => E::list_matcher(), - b'(' => E::sexp_matcher(), - b'{' => { - alt(( - Self::match_blob_value, - Self::match_clob_value, - E::struct_matcher(), - )) - }, - b'+' => Self::match_float_special_value, // +inf - _other => { - // `other` is not a legal start-of-value byte. - |input: &mut TextBuffer<'top>| { - let error = InvalidInputError::new(*input); - Err(ErrMode::Backtrack(IonParseError::Invalid(error))) - } + |input: &mut TextBuffer<'top>| Ok(TEXT_ION_TOKEN_KINDS[input.peek_byte()? as usize]); + NumberOrTimestamp => alt(( + Self::match_int_value, + Self::match_float_value, + Self::match_decimal_value, + Self::match_timestamp_value, + )), + Letter => alt(( + Self::match_null_value, + Self::match_bool_value, + Self::match_identifier_value, + Self::match_float_special_value, // nan + )), + Symbol => Self::match_symbol_value, + QuotedText => alt((Self::match_string_value, Self::match_symbol_value)), + List => E::list_matcher(), + SExp => E::sexp_matcher(), + LobOrStruct => alt(( + Self::match_blob_value, + Self::match_clob_value, + E::struct_matcher(), + )), + Invalid(byte) => |input: &mut TextBuffer<'top>| { + let error = InvalidInputError::new(*input) + .with_label(format!("a value cannot begin with '{}'", char::from(byte))); + Err(ErrMode::Backtrack(IonParseError::Invalid(error))) }, } .with_taken() @@ -598,16 +584,15 @@ impl<'top> TextBuffer<'top> { &mut self, parameter: &'top Parameter, ) -> IonParseResult<'top, TextEExpArgGroup<'top>> { - TextEncoding_1_1::container_matcher( "an explicit argument group", "(::", RawTextSExpIterator::::new, - whitespace_and_then(")") + whitespace_and_then(")"), ) - .with_taken() - .map(|(expr_cache, input)| TextEExpArgGroup::new(parameter, input, expr_cache)) - .parse_next(self) + .with_taken() + .map(|(expr_cache, input)| TextEExpArgGroup::new(parameter, input, expr_cache)) + .parse_next(self) } pub fn match_e_expression_name(&mut self) -> IonParseResult<'top, MacroIdRef<'top>> { @@ -819,8 +804,6 @@ impl<'top> TextBuffer<'top> { } } - - pub fn match_empty_arg_group( &mut self, parameter: &'top Parameter, @@ -1127,10 +1110,7 @@ impl<'top> TextBuffer<'top> { /// Matches an Ion float of any syntax fn match_float(&mut self) -> IonParseResult<'top, MatchedFloat> { terminated( - alt(( - Self::match_float_special, - Self::match_float_numeric_value, - )), + alt((Self::match_float_special, Self::match_float_numeric_value)), Self::peek_stop_character, ) .parse_next(self) diff --git a/src/lazy/text/mod.rs b/src/lazy/text/mod.rs index a9a2cea2..6547ab7d 100644 --- a/src/lazy/text/mod.rs +++ b/src/lazy/text/mod.rs @@ -4,4 +4,5 @@ pub mod encoded_value; pub mod matched; pub mod parse_result; pub mod raw; +mod token_kind; pub mod value; diff --git a/src/lazy/text/token_kind.rs b/src/lazy/text/token_kind.rs new file mode 100644 index 00000000..eccc3549 --- /dev/null +++ b/src/lazy/text/token_kind.rs @@ -0,0 +1,46 @@ +#[derive(Debug, Clone, Copy)] +pub enum ValueTokenKind { + // An ASCII decimal digit, 0-9 inclusive, as well as `-` and `+` + // Could be the start of an int, float, decimal, or timestamp. + NumberOrTimestamp, + // An ASCII letter, [a-zA-Z] inclusive. + // Could be the start of a null, bool, identifier, or float (`nan`). + Letter, + // A `$` or `_`, which could be either a symbol ID (`$10`) + // or an identifier (`$foo`, `_`). + Symbol, + // A `"` or `'`, which could be either a string or symbol. + QuotedText, + // `[` + List, + // `(` + SExp, + // `{` + LobOrStruct, + // Any other byte + Invalid(u8), +} + +/// A table of `ValueTokenKind` instances that can be queried by using the +/// byte in question as an index. +pub(crate) static TEXT_ION_TOKEN_KINDS: &[ValueTokenKind] = &init_value_token_cache(); + +pub(crate) const fn init_value_token_cache() -> [ValueTokenKind; 256] { + let mut jump_table = [ValueTokenKind::Invalid(0); 256]; + let mut index: usize = 0; + while index < 256 { + let byte = index as u8; + jump_table[index] = match byte { + b'0'..=b'9' | b'-' | b'+' => ValueTokenKind::NumberOrTimestamp, + b'a'..=b'z' | b'A'..=b'Z' => ValueTokenKind::Letter, + b'$' | b'_' => ValueTokenKind::Symbol, + b'"' | b'\'' => ValueTokenKind::QuotedText, + b'[' => ValueTokenKind::List, + b'(' => ValueTokenKind::SExp, + b'{' => ValueTokenKind::LobOrStruct, + other_byte => ValueTokenKind::Invalid(other_byte), + }; + index += 1; + } + jump_table +}