diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 616ab49b..606ec5b8 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -11,6 +11,7 @@ use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; use crate::lazy::binary::raw::sequence::{ LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, RawBinarySequenceIterator_1_0, }; +use crate::lazy::binary::raw::v1_1::e_expression::RawBinaryEExpression_1_1; use crate::lazy::binary::raw::v1_1::r#struct::{ LazyRawBinaryFieldName_1_1, LazyRawBinaryStruct_1_1, RawBinaryStructIterator_1_1, }; @@ -33,7 +34,7 @@ use crate::lazy::encoding::{ BinaryEncoding_1_0, BinaryEncoding_1_1, TextEncoding_1_0, TextEncoding_1_1, }; use crate::lazy::expanded::macro_evaluator::RawEExpression; -use crate::lazy::never::Never; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::span::Span; @@ -54,7 +55,6 @@ use crate::lazy::text::value::{ LazyRawTextVersionMarker_1_1, RawTextAnnotationsIterator, }; use crate::{Encoding, IonResult, IonType, RawSymbolRef}; -use bumpalo::Bump as BumpAllocator; /// An implementation of the `LazyDecoder` trait that can read any encoding of Ion. #[derive(Debug, Clone, Copy)] @@ -174,7 +174,7 @@ pub struct LazyRawAnyEExpression<'top> { #[derive(Debug, Copy, Clone)] pub enum LazyRawAnyEExpressionKind<'top> { Text_1_1(RawTextEExpression_1_1<'top>), - Binary_1_1(Never), // TODO: RawBinaryEExpression_1_1 + Binary_1_1(RawBinaryEExpression_1_1<'top>), } impl<'top> LazyRawAnyEExpression<'top> { @@ -194,6 +194,13 @@ impl<'top> From> for LazyRawAnyEExpression<'top> { } } } +impl<'top> From> for LazyRawAnyEExpression<'top> { + fn from(binary_invocation: RawBinaryEExpression_1_1<'top>) -> Self { + LazyRawAnyEExpression { + encoding: LazyRawAnyEExpressionKind::Binary_1_1(binary_invocation), + } + } +} impl<'top> HasSpan<'top> for LazyRawAnyEExpression<'top> { fn span(&self) -> Span<'top> { @@ -222,21 +229,19 @@ impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { use LazyRawAnyEExpressionKind::*; match self.encoding { Text_1_1(ref m) => m.id(), - Binary_1_1(_) => { - todo!("macros in binary Ion 1.1 are not implemented") - } + Binary_1_1(ref m) => m.id(), } } fn raw_arguments(&self) -> Self::RawArgumentsIterator<'_> { use LazyRawAnyEExpressionKind::*; match self.encoding { - Text_1_1(m) => LazyRawAnyMacroArgsIterator { - encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(m.raw_arguments()), + Text_1_1(e) => LazyRawAnyMacroArgsIterator { + encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(e.raw_arguments()), + }, + Binary_1_1(e) => LazyRawAnyMacroArgsIterator { + encoding: LazyRawAnyMacroArgsIteratorKind::Binary_1_1(e.raw_arguments()), }, - Binary_1_1(_) => { - todo!("macros in binary Ion 1.1 are not yet implemented") - } } } } @@ -248,6 +253,12 @@ pub enum LazyRawAnyMacroArgsIteratorKind<'top> { TextEncoding_1_1, >>::RawArgumentsIterator<'top>, ), + Binary_1_1( + as RawEExpression< + 'top, + BinaryEncoding_1_1, + >>::RawArgumentsIterator<'top>, + ), } pub struct LazyRawAnyMacroArgsIterator<'top> { encoding: LazyRawAnyMacroArgsIteratorKind<'top>, @@ -257,19 +268,31 @@ impl<'top> Iterator for LazyRawAnyMacroArgsIterator<'top> { type Item = IonResult>; fn next(&mut self) -> Option { - match self.encoding { - LazyRawAnyMacroArgsIteratorKind::Text_1_1(mut iter) => match iter.next() { + match &mut self.encoding { + LazyRawAnyMacroArgsIteratorKind::Text_1_1(ref mut iter) => match iter.next() { Some(Ok(RawValueExpr::ValueLiteral(value))) => { Some(Ok(RawValueExpr::ValueLiteral(LazyRawAnyValue::from(value)))) } - Some(Ok(RawValueExpr::MacroInvocation(invocation))) => { - Some(Ok(RawValueExpr::MacroInvocation(LazyRawAnyEExpression { + Some(Ok(RawValueExpr::EExp(invocation))) => { + Some(Ok(RawValueExpr::EExp(LazyRawAnyEExpression { encoding: LazyRawAnyEExpressionKind::Text_1_1(invocation), }))) } Some(Err(e)) => Some(Err(e)), None => None, }, + LazyRawAnyMacroArgsIteratorKind::Binary_1_1(ref mut iter) => match iter.next() { + Some(Ok(RawValueExpr::ValueLiteral(value))) => { + Some(Ok(RawValueExpr::ValueLiteral(LazyRawAnyValue::from(value)))) + } + Some(Ok(RawValueExpr::EExp(invocation))) => { + Some(Ok(RawValueExpr::EExp(LazyRawAnyEExpression { + encoding: LazyRawAnyEExpressionKind::Binary_1_1(invocation), + }))) + } + Some(Err(e)) => Some(Err(e)), + None => None, + }, } } } @@ -408,17 +431,17 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> { fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top, { use RawReaderKind::*; match &mut self.encoding { - Text_1_0(r) => Ok(r.next(allocator)?.into()), + Text_1_0(r) => Ok(r.next(context)?.into()), Binary_1_0(r) => Ok(r.next()?.into()), - Text_1_1(r) => Ok(r.next(allocator)?.into()), - Binary_1_1(r) => Ok(r.next()?.into()), + Text_1_1(r) => Ok(r.next(context)?.into()), + Binary_1_1(r) => Ok(r.next(context)?.into()), } } @@ -517,7 +540,7 @@ impl<'top> From> for LazyRawValueExpr<' fn from(value: LazyRawValueExpr<'top, TextEncoding_1_0>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), - RawValueExpr::MacroInvocation(_) => unreachable!("macro invocation in text Ion 1.0"), + RawValueExpr::EExp(_) => unreachable!("macro invocation in text Ion 1.0"), } } } @@ -528,7 +551,7 @@ impl<'top> From> fn from(value: LazyRawValueExpr<'top, BinaryEncoding_1_0>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), - RawValueExpr::MacroInvocation(_) => unreachable!("macro invocation in binary Ion 1.0"), + RawValueExpr::EExp(_) => unreachable!("macro invocation in binary Ion 1.0"), } } } @@ -537,11 +560,11 @@ impl<'top> From> for LazyRawValueExpr<' fn from(value: LazyRawValueExpr<'top, TextEncoding_1_1>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), - RawValueExpr::MacroInvocation(m) => { + RawValueExpr::EExp(m) => { let invocation = LazyRawAnyEExpression { encoding: LazyRawAnyEExpressionKind::Text_1_1(m), }; - RawValueExpr::MacroInvocation(invocation) + RawValueExpr::EExp(invocation) } } } @@ -553,11 +576,11 @@ impl<'top> From> fn from(value: LazyRawValueExpr<'top, BinaryEncoding_1_1>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), - RawValueExpr::MacroInvocation(m) => { + RawValueExpr::EExp(m) => { let invocation = LazyRawAnyEExpression { encoding: LazyRawAnyEExpressionKind::Binary_1_1(m), }; - RawValueExpr::MacroInvocation(invocation) + RawValueExpr::EExp(invocation) } } } @@ -723,8 +746,8 @@ impl<'top> From> LazyRawStreamItem::::Value(value) => { LazyRawStreamItem::::Value(value.into()) } - LazyRawStreamItem::::EExpression(_) => { - todo!("Macro invocations not yet implemented in binary 1.1") + LazyRawStreamItem::::EExpression(eexp) => { + LazyRawStreamItem::::EExpression(eexp.into()) } LazyRawStreamItem::::EndOfStream(end) => { LazyRawStreamItem::::EndOfStream(end) @@ -1464,6 +1487,7 @@ mod tests { use crate::lazy::any_encoding::LazyRawAnyReader; use crate::lazy::binary::test_utilities::to_binary_ion; use crate::lazy::decoder::{LazyRawReader, LazyRawSequence, LazyRawValue}; + use crate::lazy::expanded::EncodingContext; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::{IonResult, RawSymbolRef, Timestamp}; @@ -1473,41 +1497,42 @@ mod tests { #[test] fn any_encoding() -> IonResult<()> { fn test_input(data: &[u8]) -> IonResult<()> { - let allocator = BumpAllocator::new(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawAnyReader::new(data); - assert_eq!(reader.next(&allocator)?.expect_ivm()?.version(), (1, 0)); + assert_eq!(reader.next(context)?.expect_ivm()?.version(), (1, 0)); let _strukt = reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_struct()?; - let name = reader.next(&allocator)?.expect_value()?; + let name = reader.next(context)?.expect_value()?; assert_eq!( name.annotations().next().unwrap()?, RawSymbolRef::SymbolId(4) ); assert_eq!(name.read()?.expect_string()?.text(), "Gary"); assert_eq!( - reader.next(&allocator)?.expect_value()?.read()?, + reader.next(context)?.expect_value()?.read()?, RawValueRef::String("foo".into()) ); assert_eq!( - reader.next(&allocator)?.expect_value()?.read()?, + reader.next(context)?.expect_value()?.read()?, RawValueRef::Int(5.into()) ); assert_eq!( - reader.next(&allocator)?.expect_value()?.read()?, + reader.next(context)?.expect_value()?.read()?, RawValueRef::Timestamp(Timestamp::with_year(2023).with_month(8).build()?) ); assert_eq!( - reader.next(&allocator)?.expect_value()?.read()?, + reader.next(context)?.expect_value()?.read()?, RawValueRef::Bool(false) ); let mut sum = 0; for lazy_value_result in reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_list()? @@ -1521,7 +1546,7 @@ mod tests { // local symbol table and the raw reader interprets that as a different value. assert!(matches!( - reader.next(&allocator)?, + reader.next(context)?, LazyRawStreamItem::::EndOfStream(_) )); Ok(()) diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index 531cfef2..f4608ffd 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -76,7 +76,7 @@ pub(crate) struct EncodedValue { // value. If `annotations` is empty, `annotations_header_length` will be zero. The annotations // wrapper contains several fields: an opcode, a wrapper length, a sequence length, and the // sequence itself. - pub annotations_header_length: u8, + pub annotations_header_length: u16, // The number of bytes used to encode the series of symbol IDs inside the annotations wrapper. pub annotations_sequence_length: u16, // Whether the annotations sequence is encoded as `FlexSym`s or as symbol addresses. diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs index 7b1c4d6f..ad3e56c0 100644 --- a/src/lazy/binary/immutable_buffer.rs +++ b/src/lazy/binary/immutable_buffer.rs @@ -746,7 +746,7 @@ impl<'a> ImmutableBuffer<'a> { ); } - lazy_value.encoded_value.annotations_header_length = wrapper.header_length; + lazy_value.encoded_value.annotations_header_length = wrapper.header_length as u16; lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length as u16; lazy_value.encoded_value.total_length += wrapper.header_length as usize; // Modify the input to include the annotations diff --git a/src/lazy/binary/raw/reader.rs b/src/lazy/binary/raw/reader.rs index 341b15c2..5b9b5a5b 100644 --- a/src/lazy/binary/raw/reader.rs +++ b/src/lazy/binary/raw/reader.rs @@ -9,7 +9,7 @@ use crate::result::IonFailure; use crate::{Encoding, IonResult}; use crate::lazy::any_encoding::IonEncoding; -use bumpalo::Bump as BumpAllocator; +use crate::lazy::expanded::EncodingContextRef; /// A binary Ion 1.0 reader that yields [`LazyRawBinaryValue_1_0`]s representing the top level values found /// in the provided input stream. @@ -125,7 +125,7 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_0> for LazyRawBinaryReader_1_0 fn next<'top>( &'top mut self, - _allocator: &'top BumpAllocator, + _context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top, diff --git a/src/lazy/binary/raw/v1_1/e_expression.rs b/src/lazy/binary/raw/v1_1/e_expression.rs new file mode 100644 index 00000000..55a29cc8 --- /dev/null +++ b/src/lazy/binary/raw/v1_1/e_expression.rs @@ -0,0 +1,103 @@ +#![allow(non_camel_case_types)] + +use std::fmt::{Debug, Formatter}; +use std::ops::Range; + +use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; +use crate::lazy::decoder::LazyRawValueExpr; +use crate::lazy::expanded::macro_evaluator::RawEExpression; +use crate::lazy::text::raw::v1_1::reader::MacroIdRef; +use crate::{v1_1, HasRange, HasSpan, IonResult, Span}; + +#[derive(Copy, Clone)] +pub struct EncodedBinaryEExp { + // The number of bytes that were used to encode the e-expression's header (including its ID) + header_length: u16, +} + +impl EncodedBinaryEExp { + pub fn new(header_length: u16) -> Self { + Self { header_length } + } +} + +#[derive(Copy, Clone)] +pub struct RawBinaryEExpression_1_1<'top> { + pub(crate) encoded_expr: EncodedBinaryEExp, + pub(crate) input: ImmutableBuffer<'top>, + pub(crate) id: MacroIdRef<'top>, + pub(crate) arg_expr_cache: &'top [LazyRawValueExpr<'top, v1_1::Binary>], +} + +impl<'top> RawBinaryEExpression_1_1<'top> { + pub fn new( + id: MacroIdRef<'top>, + encoded_expr: EncodedBinaryEExp, + input: ImmutableBuffer<'top>, + arg_expr_cache: &'top [LazyRawValueExpr<'top, v1_1::Binary>], + ) -> Self { + Self { + encoded_expr, + input, + id, + arg_expr_cache, + } + } +} + +impl<'top> HasSpan<'top> for RawBinaryEExpression_1_1<'top> { + fn span(&self) -> Span<'top> { + Span::with_offset(self.input.offset(), self.input.bytes()) + } +} + +impl<'top> HasRange for RawBinaryEExpression_1_1<'top> { + fn range(&self) -> Range { + self.input.range() + } +} + +impl<'top> Debug for RawBinaryEExpression_1_1<'top> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "", self.id()) + } +} + +impl<'top> RawEExpression<'top, v1_1::Binary> for RawBinaryEExpression_1_1<'top> { + type RawArgumentsIterator<'a> = RawBinarySequenceCacheIterator_1_1<'top> + where + Self: 'a; + + fn id(&self) -> MacroIdRef<'top> { + self.id + } + + fn raw_arguments(&self) -> Self::RawArgumentsIterator<'top> { + RawBinarySequenceCacheIterator_1_1::new(self.arg_expr_cache) + } +} + +#[derive(Debug, Clone)] +pub struct RawBinarySequenceCacheIterator_1_1<'top> { + child_exprs: &'top [LazyRawValueExpr<'top, v1_1::Binary>], + index: usize, +} + +impl<'top> RawBinarySequenceCacheIterator_1_1<'top> { + pub fn new(child_exprs: &'top [LazyRawValueExpr<'top, v1_1::Binary>]) -> Self { + Self { + child_exprs, + index: 0, + } + } +} + +impl<'top> Iterator for RawBinarySequenceCacheIterator_1_1<'top> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + let next_expr = self.child_exprs.get(self.index)?; + self.index += 1; + Some(Ok(*next_expr)) + } +} diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index 94acddbc..5cb60868 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -1,18 +1,26 @@ +use std::fmt::{Debug, Formatter}; +use std::ops::Range; + +use bumpalo::collections::Vec as BumpVec; + use crate::binary::constants::v1_1::IVM; use crate::lazy::binary::encoded_value::EncodedValue; +use crate::lazy::binary::raw::v1_1::e_expression::{EncodedBinaryEExp, RawBinaryEExpression_1_1}; use crate::lazy::binary::raw::v1_1::value::{ LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1, }; use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, OpcodeType, ION_1_1_OPCODES}; +use crate::lazy::decoder::{LazyRawValueExpr, RawValueExpr}; use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt; use crate::lazy::encoder::binary::v1_1::fixed_uint::FixedUInt; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSym; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; +use crate::lazy::expanded::macro_table::MacroKind; +use crate::lazy::expanded::EncodingContextRef; +use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::result::IonFailure; -use crate::{IonError, IonResult}; -use std::fmt::{Debug, Formatter}; -use std::ops::Range; +use crate::{v1_1, HasRange, IonError, IonResult}; /// A buffer of unsigned bytes that can be cheaply copied and which defines methods for parsing /// the various encoding elements of a binary Ion stream. @@ -21,7 +29,7 @@ use std::ops::Range; /// and a copy of the `ImmutableBuffer` that starts _after_ the bytes that were parsed. /// /// Methods that `peek` at the input stream do not return a copy of the buffer. -#[derive(PartialEq, Clone, Copy)] +#[derive(Clone, Copy)] pub struct ImmutableBuffer<'a> { // `data` is a slice of remaining data in the larger input stream. // `offset` is the position in the overall input stream where that slice begins. @@ -32,6 +40,7 @@ pub struct ImmutableBuffer<'a> { // offset: 6 data: &'a [u8], offset: usize, + context: EncodingContextRef<'a>, } impl<'a> Debug for ImmutableBuffer<'a> { @@ -49,12 +58,20 @@ pub(crate) type ParseResult<'a, T> = IonResult<(T, ImmutableBuffer<'a>)>; impl<'a> ImmutableBuffer<'a> { /// Constructs a new `ImmutableBuffer` that wraps `data`. #[inline] - pub fn new(data: &[u8]) -> ImmutableBuffer { - Self::new_with_offset(data, 0) + pub fn new(context: EncodingContextRef<'a>, data: &'a [u8]) -> ImmutableBuffer<'a> { + Self::new_with_offset(context, data, 0) } - pub fn new_with_offset(data: &[u8], offset: usize) -> ImmutableBuffer { - ImmutableBuffer { data, offset } + pub fn new_with_offset( + context: EncodingContextRef<'a>, + data: &'a [u8], + offset: usize, + ) -> ImmutableBuffer<'a> { + ImmutableBuffer { + data, + offset, + context, + } } /// Returns a slice containing all of the buffer's bytes. @@ -75,6 +92,7 @@ impl<'a> ImmutableBuffer<'a> { ImmutableBuffer { data: self.bytes_range(offset, length), offset: self.offset + offset, + context: self.context, } } @@ -119,6 +137,7 @@ impl<'a> ImmutableBuffer<'a> { Self { data: &self.data[num_bytes_to_consume..], offset: self.offset + num_bytes_to_consume, + context: self.context, } } @@ -143,7 +162,7 @@ impl<'a> ImmutableBuffer<'a> { match bytes { [0xE0, major, minor, 0xEA] => { - let matched = ImmutableBuffer::new_with_offset(bytes, self.offset); + let matched = ImmutableBuffer::new_with_offset(self.context, bytes, self.offset); let marker = LazyRawBinaryVersionMarker_1_1::new(matched, *major, *minor); Ok((marker, self.consume(IVM.len()))) } @@ -249,7 +268,9 @@ impl<'a> ImmutableBuffer<'a> { /// Reads a value without a field name from the buffer. This is applicable in lists, s-expressions, /// and at the top level. - pub(crate) fn peek_sequence_value(self) -> IonResult>> { + pub(crate) fn peek_sequence_value_expr( + self, + ) -> IonResult>> { if self.is_empty() { return Ok(None); } @@ -266,7 +287,14 @@ impl<'a> ImmutableBuffer<'a> { // Otherwise, there's a value. type_descriptor = input.peek_opcode()?; } - Ok(Some(input.read_value(type_descriptor)?)) + if type_descriptor.is_e_expression() { + return Ok(Some(RawValueExpr::EExp( + self.read_e_expression(type_descriptor)?, + ))); + } + Ok(Some(RawValueExpr::ValueLiteral( + input.read_value(type_descriptor)?, + ))) } /// Reads a value from the buffer. The caller must confirm that the buffer is not empty and that @@ -339,11 +367,12 @@ impl<'a> ImmutableBuffer<'a> { let (annotations_seq, input_after_annotations) = self.read_annotations_sequence(opcode)?; let opcode = input_after_annotations.peek_opcode()?; let mut value = input_after_annotations.read_value_without_annotations(opcode)?; - value.encoded_value.annotations_header_length = annotations_seq.header_length; + let total_annotations_length = + annotations_seq.header_length as usize + annotations_seq.sequence_length as usize; + value.encoded_value.annotations_header_length = total_annotations_length as u16; value.encoded_value.annotations_sequence_length = annotations_seq.sequence_length; value.encoded_value.annotations_encoding = annotations_seq.encoding; - value.encoded_value.total_length += - annotations_seq.header_length as usize + annotations_seq.sequence_length as usize; + value.encoded_value.total_length += total_annotations_length; // Rewind the input to include the annotations sequence value.input = self; Ok(value) @@ -425,6 +454,66 @@ impl<'a> ImmutableBuffer<'a> { ) -> ParseResult<'a, EncodedAnnotations> { todo!() } + + fn read_e_expression(self, opcode: Opcode) -> IonResult> { + use OpcodeType::*; + let (macro_id, buffer_after_id) = match opcode.opcode_type { + EExpressionWithAddress => ( + MacroIdRef::LocalAddress(opcode.byte as usize), + self.consume(1), + ), + EExpressionAddressFollows => todo!("e-expr with trailing address; {opcode:#0x?}",), + _ => unreachable!("read_e_expression called with invalid opcode"), + }; + + // TODO: When we support untagged parameter encodings, we need to use the signature's + // parameter encodings to drive this process. For now--while everything is tagged + // and cardinality is always required--we just loop `num_parameters` times. + let macro_def = self + .context + .macro_table + .macro_with_id(macro_id) + .ok_or_else(|| { + IonError::decoding_error(format!("invocation of unknown macro '{macro_id:?}'")) + })?; + use MacroKind::*; + let num_parameters = match macro_def.kind() { + Template(t) => t.signature().parameters().len(), + // Many system macros like `values`, `make_string`, etc take a variadic number of args. + _ => todo!("system macros require support for argument group encoding"), + }; + + let args_cache = self + .context + .allocator() + .alloc_with(|| BumpVec::with_capacity_in(num_parameters, self.context.allocator())); + // `args_buffer` will be partially consumed in each iteration of the loop below. + let mut args_buffer = buffer_after_id; + for _ in 0..num_parameters { + let value_expr = match args_buffer.peek_sequence_value_expr()? { + Some(expr) => expr, + None => { + return IonResult::incomplete( + "found an incomplete e-expression", + buffer_after_id.offset(), + ) + } + }; + args_buffer = args_buffer.consume(value_expr.range().len()); + args_cache.push(value_expr); + } + let macro_id_encoded_length = buffer_after_id.offset() - self.offset(); + let args_length = args_buffer.offset() + args_buffer.len() - buffer_after_id.offset(); + let e_expression_buffer = self.slice(0, macro_id_encoded_length + args_length); + + let e_expression = RawBinaryEExpression_1_1::new( + macro_id, + EncodedBinaryEExp::new(macro_id_encoded_length as u16), + e_expression_buffer, + args_cache, + ); + Ok(e_expression) + } } #[derive(Clone, Copy, Debug, PartialEq)] @@ -448,9 +537,15 @@ pub struct EncodedAnnotations { #[cfg(test)] mod tests { use super::*; + use crate::lazy::expanded::compiler::TemplateCompiler; + use crate::lazy::expanded::macro_evaluator::RawEExpression; + use crate::lazy::expanded::EncodingContext; + use crate::lazy::text::raw::v1_1::reader::MacroAddress; fn input_test>(input: A) { - let input = ImmutableBuffer::new(input.as_ref()); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); + let input = ImmutableBuffer::new(context, input.as_ref()); // We can peek at the first byte... assert_eq!(input.peek_next_byte(), Some(b'f')); // ...without modifying the input. Looking at the next 3 bytes still includes 'f'. @@ -485,13 +580,127 @@ mod tests { fn validate_nop_length() { // read_nop_pad reads a single NOP value, this test ensures that we're tracking the right // size for these values. - - let buffer = ImmutableBuffer::new(&[0xECu8]); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); + let buffer = ImmutableBuffer::new(context, &[0xECu8]); let (pad_size, _) = buffer.read_nop_pad().expect("unable to read NOP pad"); assert_eq!(pad_size, 1); - let buffer = ImmutableBuffer::new(&[0xEDu8, 0x05, 0x00, 0x00]); + let buffer = ImmutableBuffer::new(context, &[0xEDu8, 0x05, 0x00, 0x00]); let (pad_size, _) = buffer.read_nop_pad().expect("unable to read NOP pad"); assert_eq!(pad_size, 4); } + + fn eexp_test( + macro_source: &str, + encode_macro_fn: impl FnOnce(MacroAddress) -> Vec, + test_fn: impl FnOnce(RawBinaryEExpression_1_1) -> IonResult<()>, + ) -> IonResult<()> { + let mut context = EncodingContext::empty(); + let template_macro = TemplateCompiler::compile_from_text(context.get_ref(), macro_source)?; + let macro_address = context.macro_table.add_macro(template_macro)?; + let opcode_byte = u8::try_from(macro_address).unwrap(); + let binary_ion = encode_macro_fn(opcode_byte as usize); + let buffer = ImmutableBuffer::new(context.get_ref(), &binary_ion); + let eexp = buffer.read_e_expression(Opcode::from_byte(opcode_byte))?; + assert_eq!(eexp.id(), MacroIdRef::LocalAddress(macro_address)); + println!("{:?}", eexp); + assert_eq!(eexp.id, MacroIdRef::LocalAddress(opcode_byte as usize)); + test_fn(eexp) + } + + #[test] + fn read_eexp_without_args() -> IonResult<()> { + let macro_source = r#" + (macro seventeen () 17) + "#; + let encode_eexp_fn = |address: MacroAddress| vec![address as u8]; + eexp_test( + macro_source, + encode_eexp_fn, + |eexp: RawBinaryEExpression_1_1| { + let mut args = eexp.raw_arguments(); + assert!(args.next().is_none()); + Ok(()) + }, + ) + } + + #[test] + fn read_eexp_with_one_arg() -> IonResult<()> { + let macro_source = r#" + (macro greet (name) + (make_string "Hello, " name "!") + ) + "#; + + #[rustfmt::skip] + let encode_eexp_fn = |address: MacroAddress| vec![ + address as u8, + // === 8-byte string ==== + 0x98, + // M i c h e l l e + 0x4D, 0x69, 0x63, 0x68, 0x65, 0x6C, 0x6C, 0x65, + ]; + + let args_test = |eexp: RawBinaryEExpression_1_1| { + let mut args = eexp.raw_arguments(); + assert_eq!( + args.next() + .unwrap()? + .expect_value()? + .read()? + .expect_string()?, + "Michelle" + ); + Ok(()) + }; + + eexp_test(macro_source, encode_eexp_fn, args_test) + } + + #[test] + fn read_eexp_with_two_args() -> IonResult<()> { + let macro_source = r#" + (macro greet (name day) + (make_string "Hello, " name "! Have a pleasant " day ".") + ) + "#; + + #[rustfmt::skip] + let encode_eexp_fn = |address: MacroAddress| vec![ + address as u8, + // === 8-byte string ==== + 0x98, + // M i c h e l l e + 0x4D, 0x69, 0x63, 0x68, 0x65, 0x6C, 0x6C, 0x65, + // === 7-byte string === + 0x97, + // T u e s d a y + 0x54, 0x75, 0x65, 0x73, 0x64, 0x61, 0x79, + ]; + + let args_test = |eexp: RawBinaryEExpression_1_1| { + let mut args = eexp.raw_arguments(); + assert_eq!( + args.next() + .unwrap()? + .expect_value()? + .read()? + .expect_string()?, + "Michelle" + ); + assert_eq!( + args.next() + .unwrap()? + .expect_value()? + .read()? + .expect_string()?, + "Tuesday" + ); + Ok(()) + }; + + eexp_test(macro_source, encode_eexp_fn, args_test) + } } diff --git a/src/lazy/binary/raw/v1_1/mod.rs b/src/lazy/binary/raw/v1_1/mod.rs index a27fedd4..03247b90 100644 --- a/src/lazy/binary/raw/v1_1/mod.rs +++ b/src/lazy/binary/raw/v1_1/mod.rs @@ -7,5 +7,7 @@ pub mod r#struct; mod type_code; pub mod value; pub use type_code::*; +pub mod e_expression; pub mod type_descriptor; + pub use type_descriptor::*; diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index 200a15d6..d372158e 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -1,139 +1,105 @@ #![allow(non_camel_case_types)] +use crate::lazy::any_encoding::IonEncoding; use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; -use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; -use crate::lazy::decoder::{Decoder, LazyRawReader, RawVersionMarker}; +use crate::lazy::decoder::{Decoder, LazyRawReader, RawValueExpr, RawVersionMarker}; use crate::lazy::encoder::private::Sealed; use crate::lazy::encoding::BinaryEncoding_1_1; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::result::IonFailure; -use crate::{Encoding, IonResult}; - -use crate::lazy::any_encoding::IonEncoding; -use bumpalo::Bump as BumpAllocator; +use crate::{Encoding, HasRange, IonResult}; pub struct LazyRawBinaryReader_1_1<'data> { - data: ImmutableBuffer<'data>, - bytes_to_skip: usize, // Bytes to skip in order to advance to the next item. + input: &'data [u8], + // The offset from the beginning of the overall stream at which the `input` slice begins + stream_offset: usize, + // The offset from the beginning of `input` at which the reader is positioned + local_offset: usize, } impl<'data> LazyRawBinaryReader_1_1<'data> { - fn new(data: &'data [u8]) -> Self { - Self::new_with_offset(data, 0) + fn new(input: &'data [u8]) -> Self { + Self::new_with_offset(input, 0) } - fn new_with_offset(data: &'data [u8], offset: usize) -> Self { - let data = ImmutableBuffer::new_with_offset(data, offset); + fn new_with_offset(input: &'data [u8], stream_offset: usize) -> Self { Self { - data, - bytes_to_skip: 0, + input, + stream_offset, + local_offset: 0, } } + fn end_of_stream(&self, position: usize) -> LazyRawStreamItem<'data, BinaryEncoding_1_1> { + RawStreamItem::EndOfStream(EndPosition::new(BinaryEncoding_1_1.encoding(), position)) + } + fn read_ivm<'top>( &mut self, - buffer: ImmutableBuffer<'data>, + buffer: ImmutableBuffer<'top>, ) -> IonResult> where 'data: 'top, { - let (marker, _buffer_after_ivm) = buffer.read_ivm()?; + let (marker, buffer_after_ivm) = buffer.read_ivm()?; let (major, minor) = marker.version(); if (major, minor) != (1, 1) { return IonResult::decoding_error(format!( "unsupported version of Ion: v{major}.{minor}; only 1.1 is supported by this reader", )); } - self.data = buffer; - self.bytes_to_skip = 4; + self.local_offset = buffer_after_ivm.offset() - self.stream_offset; Ok(LazyRawStreamItem::::VersionMarker( marker, )) } - fn read_value<'top>( - &mut self, - buffer: ImmutableBuffer<'data>, + fn read_value_expr<'top>( + &'top mut self, + buffer: ImmutableBuffer<'top>, ) -> IonResult> where 'data: 'top, { - let lazy_value = match ImmutableBuffer::peek_sequence_value(buffer)? { - Some(lazy_value) => lazy_value, - None => { - return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(BinaryEncoding_1_1.encoding(), self.position()), - )) - } + let item = match buffer.peek_sequence_value_expr()? { + Some(RawValueExpr::ValueLiteral(lazy_value)) => RawStreamItem::Value(lazy_value), + Some(RawValueExpr::EExp(eexpr)) => RawStreamItem::EExpression(eexpr), + None => self.end_of_stream(buffer.offset()), }; - self.data = buffer; - self.bytes_to_skip = lazy_value.encoded_value.total_length(); - Ok(RawStreamItem::Value(lazy_value)) + let item_range = item.range(); + self.local_offset = item_range.end - self.stream_offset; + Ok(item) } - fn advance_to_next_item(&self) -> IonResult> { - if self.data.len() < self.bytes_to_skip { - return IonResult::incomplete( - "cannot advance to next item, insufficient data in buffer", - self.data.offset(), - ); - } - - if self.bytes_to_skip > 0 { - Ok(self.data.consume(self.bytes_to_skip)) - } else { - Ok(self.data) - } - } - - pub fn next<'top>(&'top mut self) -> IonResult> + pub fn next<'top>( + &'top mut self, + context: EncodingContextRef<'top>, + ) -> IonResult> where 'data: 'top, { - let mut buffer = self.advance_to_next_item()?; + let mut buffer = ImmutableBuffer::new_with_offset( + context, + self.input.get(self.local_offset..).unwrap(), + self.position(), + ); + if buffer.is_empty() { - return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(BinaryEncoding_1_1.encoding(), buffer.offset()), - )); + return Ok(self.end_of_stream(buffer.offset())); } let type_descriptor = buffer.peek_opcode()?; if type_descriptor.is_nop() { (_, buffer) = buffer.consume_nop_padding(type_descriptor)?; if buffer.is_empty() { - return Ok(LazyRawStreamItem::::EndOfStream( - EndPosition::new(BinaryEncoding_1_1.encoding(), buffer.offset()), - )); + return Ok(self.end_of_stream(buffer.offset())); } } if type_descriptor.is_ivm_start() { return self.read_ivm(buffer); } - self.read_value(buffer) - } - - /// Runs the provided parsing function on this reader's buffer. - /// If it succeeds, marks the reader as ready to advance by the 'n' bytes - /// that were consumed. - /// If it does not succeed, the `DataSource` remains unchanged. - pub(crate) fn try_parse_next< - F: Fn(ImmutableBuffer) -> IonResult>>, - >( - &mut self, - parser: F, - ) -> IonResult>> { - let buffer = self.advance_to_next_item()?; - - let lazy_value = match parser(buffer) { - Ok(Some(output)) => output, - Ok(None) => return Ok(None), - Err(e) => return Err(e), - }; - - // If the value we read doesn't start where we began reading, there was a NOP. - // let num_nop_bytes = lazy_value.input.offset() - buffer.offset(); - self.bytes_to_skip = lazy_value.encoded_value.total_length(); - Ok(Some(lazy_value)) + self.read_value_expr(buffer) } } @@ -144,16 +110,6 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 Self::new(data) } - fn next<'top>( - &'top mut self, - _allocator: &'top BumpAllocator, - ) -> IonResult> - where - 'data: 'top, - { - self.next() - } - fn resume_at_offset( data: &'data [u8], offset: usize, @@ -162,8 +118,18 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 Self::new_with_offset(data, offset) } + fn next<'top>( + &'top mut self, + context: EncodingContextRef<'top>, + ) -> IonResult> + where + 'data: 'top, + { + self.next(context) + } + fn position(&self) -> usize { - self.data.offset() + self.bytes_to_skip + self.stream_offset + self.local_offset } fn encoding(&self) -> IonEncoding { @@ -173,10 +139,12 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 #[cfg(test)] mod tests { + use rstest::*; + use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; + use crate::lazy::expanded::EncodingContext; use crate::raw_symbol_ref::RawSymbolRef; use crate::{IonResult, IonType}; - use rstest::*; #[test] fn nop() -> IonResult<()> { @@ -189,11 +157,17 @@ mod tests { 0xEA, // null.null ]; + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; assert_eq!( - reader.next()?.expect_value()?.read()?.expect_null()?, + reader + .next(context)? + .expect_value()? + .read()? + .expect_null()?, IonType::Null ); @@ -207,13 +181,24 @@ mod tests { 0x6E, // true 0x6F, // false ]; - + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; - assert!(reader.next()?.expect_value()?.read()?.expect_bool()?); + assert!(reader + .next(context)? + .expect_value()? + .read()? + .expect_bool()?); - assert!(!(reader.next()?.expect_value()?.read()?.expect_bool()?)); + assert!( + !(reader + .next(context)? + .expect_value()? + .read()? + .expect_bool()?) + ); Ok(()) } @@ -240,30 +225,31 @@ mod tests { // Integer: 147573952589676412929 0xF6, 0x13, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, ]; - + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; assert_eq!( - reader.next()?.expect_value()?.read()?.expect_int()?, + reader.next(context)?.expect_value()?.read()?.expect_int()?, 0.into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_int()?, + reader.next(context)?.expect_value()?.read()?.expect_int()?, 17.into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_int()?, + reader.next(context)?.expect_value()?.read()?.expect_int()?, (-944).into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_int()?, + reader.next(context)?.expect_value()?.read()?.expect_int()?, 1.into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_int()?, + reader.next(context)?.expect_value()?.read()?.expect_int()?, 147573952589676412929i128.into() ); Ok(()) @@ -290,24 +276,44 @@ mod tests { 0xF9, 0x31, 0x76, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x6C, 0x65, 0x6E, 0x67, 0x74, 0x68, 0x20, 0x65, 0x6E, 0x63, 0x6f, 0x64, 0x69, 0x6E, 0x67, ]; - + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; - assert_eq!(reader.next()?.expect_value()?.read()?.expect_string()?, ""); + assert_eq!( + reader + .next(context)? + .expect_value()? + .read()? + .expect_string()?, + "" + ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_string()?, + reader + .next(context)? + .expect_value()? + .read()? + .expect_string()?, "hello" ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_string()?, + reader + .next(context)? + .expect_value()? + .read()? + .expect_string()?, "fourteen bytes" ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_string()?, + reader + .next(context)? + .expect_value()? + .read()? + .expect_string()?, "variable length encoding" ); @@ -341,37 +347,62 @@ mod tests { // Symbol ID: 65,793 0xE3, 0x01, 0x00, 0x00, ]; - + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(context)? + .expect_value()? + .read()? + .expect_symbol()?, "".into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(context)? + .expect_value()? + .read()? + .expect_symbol()?, "fourteen bytes".into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(context)? + .expect_value()? + .read()? + .expect_symbol()?, "variable length encoding".into() ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(context)? + .expect_value()? + .read()? + .expect_symbol()?, RawSymbolRef::SymbolId(1) ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(context)? + .expect_value()? + .read()? + .expect_symbol()?, RawSymbolRef::SymbolId(257) ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_symbol()?, + reader + .next(context)? + .expect_value()? + .read()? + .expect_symbol()?, RawSymbolRef::SymbolId(65793) ); @@ -397,22 +428,38 @@ mod tests { // 3.141592653589793 (double-precision) 0x6D, 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, ]; - + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; - assert_eq!(reader.next()?.expect_value()?.read()?.expect_float()?, 0.0); + assert_eq!( + reader + .next(context)? + .expect_value()? + .read()? + .expect_float()?, + 0.0 + ); // TODO: Implement Half-precision. - // assert_eq!(reader.next()?.expect_value()?.read()?.expect_float()?, 3.14); + // assert_eq!(reader.next(context)?.expect_value()?.read()?.expect_float()?, 3.14); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_float()? as f32, + reader + .next(context)? + .expect_value()? + .read()? + .expect_float()? as f32, 3.1415927f32, ); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_float()?, + reader + .next(context)? + .expect_value()? + .read()? + .expect_float()?, std::f64::consts::PI, ); @@ -473,19 +520,20 @@ mod tests { fn decimals(#[case] expected_txt: &str, #[case] ion_data: &[u8]) -> IonResult<()> { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let bump = bumpalo::Bump::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); assert_eq!( reader_bin - .next()? + .next(context)? .expect_value()? .read()? .expect_decimal()?, reader_txt - .next(&bump)? + .next(context)? .expect_value()? .read()? .expect_decimal()?, @@ -517,13 +565,14 @@ mod tests { use crate::ion_data::IonEq; use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let bump = bumpalo::Bump::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); - let expected_value = reader_txt.next(&bump)?.expect_value()?.read()?; - let actual_value = reader_bin.next()?.expect_value()?.read()?; + let expected_value = reader_txt.next(context)?.expect_value()?.read()?; + let actual_value = reader_bin.next(context)?.expect_value()?.read()?; assert!(actual_value .expect_decimal()? @@ -551,18 +600,19 @@ mod tests { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let bump = bumpalo::Bump::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); assert_eq!( reader_bin - .next()? + .next(context)? .expect_value()? .read()? .expect_timestamp()?, reader_txt - .next(&bump)? + .next(context)? .expect_value()? .read()? .expect_timestamp()?, @@ -583,18 +633,19 @@ mod tests { use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; - let bump = bumpalo::Bump::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); assert_eq!( reader_bin - .next()? + .next(context)? .expect_value()? .read()? .expect_timestamp()?, reader_txt - .next(&bump)? + .next(context)? .expect_value()? .read()? .expect_timestamp()?, @@ -610,14 +661,23 @@ mod tests { 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; let bytes: &[u8] = &[ 0x49, 0x20, 0x61, 0x70, 0x70, 0x6c, 0x61, 0x75, 0x64, 0x20, 0x79, 0x6f, 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; - assert_eq!(reader.next()?.expect_value()?.read()?.expect_blob()?, bytes); + assert_eq!( + reader + .next(context)? + .expect_value()? + .read()? + .expect_blob()?, + bytes + ); Ok(()) } @@ -630,15 +690,24 @@ mod tests { 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + let _ivm = reader.next(context)?.expect_ivm()?; let bytes: &[u8] = &[ 0x49, 0x20, 0x61, 0x70, 0x70, 0x6c, 0x61, 0x75, 0x64, 0x20, 0x79, 0x6f, 0x75, 0x72, 0x20, 0x63, 0x75, 0x72, 0x69, 0x6f, 0x73, 0x69, 0x74, 0x79, ]; - assert_eq!(reader.next()?.expect_value()?.read()?.expect_clob()?, bytes); + assert_eq!( + reader + .next(context)? + .expect_value()? + .read()? + .expect_clob()?, + bytes + ); Ok(()) } @@ -695,8 +764,14 @@ mod tests { ]; for (ion_data, expected_types) in tests { + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); - let container = reader.next()?.expect_value()?.read()?.expect_list()?; + let container = reader + .next(context)? + .expect_value()? + .read()? + .expect_list()?; let mut count = 0; for (actual_lazy_value, expected_type) in container.iter().zip(expected_types.iter()) { let value = actual_lazy_value?.expect_value()?; @@ -746,8 +821,14 @@ mod tests { ]; for (ion_data, expected_types) in tests { + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); - let container = reader.next()?.expect_value()?.read()?.expect_sexp()?; + let container = reader + .next(context)? + .expect_value()? + .read()? + .expect_sexp()?; let mut count = 0; for (actual_lazy_value, expected_type) in container.iter().zip(expected_types.iter()) { let value = actual_lazy_value?.expect_value()?; @@ -779,8 +860,14 @@ mod tests { ]; for (data, expected_type) in data { + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(&data); - let actual_type = reader.next()?.expect_value()?.read()?.expect_null()?; + let actual_type = reader + .next(context)? + .expect_value()? + .read()? + .expect_null()?; assert_eq!(actual_type, expected_type); } Ok(()) @@ -890,8 +977,14 @@ mod tests { ]; for (ion_data, field_pairs) in tests { + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawBinaryReader_1_1::new(ion_data); - let actual_data = reader.next()?.expect_value()?.read()?.expect_struct()?; + let actual_data = reader + .next(context)? + .expect_value()? + .read()? + .expect_struct()?; for (actual_field, expected_field) in actual_data.iter().zip(field_pairs.iter()) { let (expected_name, expected_value_type) = expected_field; diff --git a/src/lazy/binary/raw/v1_1/sequence.rs b/src/lazy/binary/raw/v1_1/sequence.rs index df4bead1..fc58f9b9 100644 --- a/src/lazy/binary/raw/v1_1/sequence.rs +++ b/src/lazy/binary/raw/v1_1/sequence.rs @@ -4,11 +4,9 @@ use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIt use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; use crate::lazy::decoder::private::LazyContainerPrivate; -use crate::lazy::decoder::{ - Decoder, LazyRawContainer, LazyRawSequence, LazyRawValueExpr, RawValueExpr, -}; +use crate::lazy::decoder::{Decoder, LazyRawContainer, LazyRawSequence, LazyRawValueExpr}; use crate::lazy::encoding::BinaryEncoding_1_1; -use crate::{IonResult, IonType}; +use crate::{HasRange, IonResult, IonType}; use std::fmt::{Debug, Formatter}; #[derive(Debug, Copy, Clone)] @@ -151,13 +149,12 @@ impl<'top> Iterator for RawBinarySequenceIterator_1_1<'top> { fn next(&mut self) -> Option { self.source = self.source.consume(self.bytes_to_skip); - match self.source.peek_sequence_value() { - Ok(Some(output)) => { - self.bytes_to_skip = output.encoded_value.total_length; - Some(Ok(RawValueExpr::ValueLiteral(output))) - } - Ok(None) => None, - Err(e) => Some(Err(e)), - } + let item = match self.source.peek_sequence_value_expr() { + Ok(Some(expr)) => expr, + Ok(None) => return None, + Err(e) => return Some(Err(e)), + }; + self.bytes_to_skip = item.range().len(); + Some(Ok(item)) } } diff --git a/src/lazy/binary/raw/v1_1/type_code.rs b/src/lazy/binary/raw/v1_1/type_code.rs index 41c44a2c..ff91f24f 100644 --- a/src/lazy/binary/raw/v1_1/type_code.rs +++ b/src/lazy/binary/raw/v1_1/type_code.rs @@ -13,8 +13,8 @@ use crate::IonType; /// * Whether the next type code is reserved. #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum OpcodeType { - EExpressionWithAddress, // 0x00-0x4F - - EExpressionAddressFollows, // 0x40-0x4F - + EExpressionWithAddress, // 0x00-0x50 - + EExpressionAddressFollows, // 0x50-0x5F - Integer, // 0x60-0x68 - Integer up to 8 bytes wide Float, // 0x6A-0x6D - Boolean, // 0x6E-0x6F - diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 2a6c1f8d..f68c129e 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -9,6 +9,7 @@ pub struct Opcode { pub opcode_type: OpcodeType, pub ion_type: Option, pub low_nibble: u8, + pub byte: u8, } /// A statically defined array of TypeDescriptor that allows a binary reader to map a given @@ -35,6 +36,7 @@ const DEFAULT_HEADER: Opcode = Opcode { opcode_type: OpcodeType::Nop, ion_type: None, low_nibble: 0, + byte: 0, }; pub(crate) const fn init_opcode_cache() -> [Opcode; 256] { @@ -56,6 +58,8 @@ impl Opcode { use OpcodeType::*; let (opcode_type, length_code, ion_type) = match (high_nibble, low_nibble) { + (0x0..=0x4, _) => (EExpressionWithAddress, low_nibble, None), + (0x5, _) => (EExpressionAddressFollows, low_nibble, None), (0x6, 0x0..=0x8) => (Integer, low_nibble, Some(IonType::Int)), (0x6, 0xA..=0xD) => (Float, low_nibble, Some(IonType::Float)), (0x6, 0xE..=0xF) => (Boolean, low_nibble, Some(IonType::Bool)), @@ -88,6 +92,7 @@ impl Opcode { ion_type, opcode_type, low_nibble: length_code, + byte, } } @@ -99,6 +104,14 @@ impl Opcode { self.opcode_type == OpcodeType::Nop } + pub fn is_e_expression(&self) -> bool { + use OpcodeType::*; + matches!( + self.opcode_type, + EExpressionWithAddress | EExpressionAddressFollows + ) + } + pub fn is_ivm_start(&self) -> bool { self.opcode_type == OpcodeType::IonVersionMarker } diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 93f9121c..66418e6c 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -158,10 +158,11 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Returns an `ImmutableBuffer` that contains the bytes comprising this value's encoded /// annotations sequence. fn annotations_sequence(&self) -> ImmutableBuffer<'top> { - let sequence = self.input.slice( - self.encoded_value.annotations_header_length as usize, - self.encoded_value.annotations_sequence_length as usize, - ); + let annotations_header_length = self.encoded_value.annotations_header_length as usize; + let sequence_length = self.encoded_value.annotations_sequence_length as usize; + let sequence = self + .input + .slice(annotations_header_length - sequence_length, sequence_length); sequence } diff --git a/src/lazy/decoder.rs b/src/lazy/decoder.rs index 8bddb85a..3658b332 100644 --- a/src/lazy/decoder.rs +++ b/src/lazy/decoder.rs @@ -1,11 +1,10 @@ use std::fmt::Debug; use std::ops::Range; -use bumpalo::Bump as BumpAllocator; - use crate::lazy::any_encoding::IonEncoding; use crate::lazy::encoding::{BinaryEncoding_1_0, RawValueLiteral, TextEncoding_1_0}; use crate::lazy::expanded::macro_evaluator::RawEExpression; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::span::Span; @@ -77,11 +76,11 @@ pub trait RawVersionMarker<'top>: Debug + Copy + Clone + HasSpan<'top> { /// When working with `RawValueExpr`s that always use a given decoder's `Value` and /// `MacroInvocation` associated types, consider using [`LazyRawValueExpr`] instead. #[derive(Copy, Clone, Debug, PartialEq)] -pub enum RawValueExpr { +pub enum RawValueExpr { /// A value literal. For example: `5`, `foo`, or `"hello"` in text. ValueLiteral(V), /// An Ion 1.1+ macro invocation. For example: `(:employee 12345 "Sarah" "Gonzalez")` in text. - MacroInvocation(M), + EExp(E), } // `RawValueExpr` above has no ties to a particular encoding. The `LazyRawValueExpr` type alias @@ -102,7 +101,7 @@ impl RawValueExpr { pub fn expect_value(self) -> IonResult { match self { RawValueExpr::ValueLiteral(v) => Ok(v), - RawValueExpr::MacroInvocation(_m) => IonResult::decoding_error( + RawValueExpr::EExp(_m) => IonResult::decoding_error( "expected a value literal, but found a macro invocation ({:?})", ), } @@ -114,7 +113,7 @@ impl RawValueExpr { "expected a macro invocation but found a value literal ({:?})", v )), - RawValueExpr::MacroInvocation(m) => Ok(m), + RawValueExpr::EExp(m) => Ok(m), } } } @@ -123,7 +122,7 @@ impl HasRange for RawValueExpr { fn range(&self) -> Range { match self { RawValueExpr::ValueLiteral(value) => value.range(), - RawValueExpr::MacroInvocation(eexp) => eexp.range(), + RawValueExpr::EExp(eexp) => eexp.range(), } } } @@ -132,7 +131,7 @@ impl<'top, V: HasSpan<'top>, M: HasSpan<'top>> HasSpan<'top> for RawValueExpr Span<'top> { match self { RawValueExpr::ValueLiteral(value) => value.span(), - RawValueExpr::MacroInvocation(eexp) => eexp.span(), + RawValueExpr::EExp(eexp) => eexp.span(), } } } @@ -352,7 +351,7 @@ pub trait LazyRawReader<'data, D: Decoder>: Sized { fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top; diff --git a/src/lazy/encoder/text/v1_1/writer.rs b/src/lazy/encoder/text/v1_1/writer.rs index 62777ec5..94d24ebd 100644 --- a/src/lazy/encoder/text/v1_1/writer.rs +++ b/src/lazy/encoder/text/v1_1/writer.rs @@ -100,6 +100,7 @@ mod tests { use crate::lazy::encoder::write_as_ion::WriteAsSExp; use crate::lazy::encoder::LazyRawWriter; use crate::lazy::expanded::macro_evaluator::RawEExpression; + use crate::lazy::expanded::EncodingContext; use crate::lazy::text::raw::v1_1::reader::{LazyRawTextReader_1_1, MacroIdRef}; use crate::symbol_ref::AsSymbolRef; use crate::{ @@ -263,9 +264,10 @@ mod tests { println!("{encoded_text}"); let mut reader = LazyRawTextReader_1_1::new(encoded_text.as_bytes()); - let bump = bumpalo::Bump::new(); - let _marker = reader.next(&bump)?.expect_ivm()?; - let eexp = reader.next(&bump)?.expect_macro_invocation()?; + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); + let _marker = reader.next(context)?.expect_ivm()?; + let eexp = reader.next(context)?.expect_macro_invocation()?; assert_eq!(MacroIdRef::LocalName("foo"), eexp.id()); let mut args = eexp.raw_arguments(); let int_arg = args.next().unwrap()?.expect_value()?.read()?.expect_int()?; diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index 7810463f..ef0451f9 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -34,6 +34,7 @@ use crate::lazy::text::value::{ LazyRawTextVersionMarker_1_1, RawTextAnnotationsIterator, }; +use crate::lazy::binary::raw::v1_1::e_expression::RawBinaryEExpression_1_1; use crate::{IonResult, TextFormat, WriteConfig}; /// Marker trait for types that represent an Ion encoding. @@ -242,11 +243,11 @@ impl Decoder for BinaryEncoding_1_1 { type Value<'top> = LazyRawBinaryValue_1_1<'top>; type SExp<'top> = LazyRawBinarySExp_1_1<'top>; type List<'top> = LazyRawBinaryList_1_1<'top>; - type FieldName<'top> = LazyRawBinaryFieldName_1_1<'top>; type Struct<'top> = LazyRawBinaryStruct_1_1<'top>; + type FieldName<'top> = LazyRawBinaryFieldName_1_1<'top>; type AnnotationsIterator<'top> = RawBinaryAnnotationsIterator_1_1<'top>; // TODO: implement macros in 1.1 - type EExp<'top> = Never; + type EExp<'top> = RawBinaryEExpression_1_1<'top>; type VersionMarker<'top> = LazyRawBinaryVersionMarker_1_1<'top>; } diff --git a/src/lazy/expanded/compiler.rs b/src/lazy/expanded/compiler.rs index dc31e9fa..14667204 100644 --- a/src/lazy/expanded/compiler.rs +++ b/src/lazy/expanded/compiler.rs @@ -506,15 +506,15 @@ impl TemplateCompiler { #[cfg(test)] mod tests { + use std::collections::HashMap; + use crate::lazy::expanded::compiler::TemplateCompiler; - use crate::lazy::expanded::macro_table::MacroTable; use crate::lazy::expanded::template::{ ExprRange, TemplateBodyMacroInvocation, TemplateBodyValueExpr, TemplateBodyVariableReference, TemplateMacro, TemplateValue, }; - use crate::lazy::expanded::EncodingContext; - use crate::{Int, IntoAnnotations, IonResult, Symbol, SymbolTable}; - use std::collections::HashMap; + use crate::lazy::expanded::{EncodingContext, EncodingContextRef}; + use crate::{Int, IntoAnnotations, IonResult, Symbol}; // This function only looks at the value portion of the TemplateElement. To compare annotations, // see the `expect_annotations` method. @@ -602,26 +602,18 @@ mod tests { } struct TestResources { - macro_table: MacroTable, - symbol_table: SymbolTable, - allocator: bumpalo::Bump, + context: EncodingContext, } impl TestResources { fn new() -> Self { Self { - macro_table: MacroTable::new(), - symbol_table: SymbolTable::new(), - allocator: bumpalo::Bump::new(), + context: EncodingContext::empty(), } } - fn context(&self) -> EncodingContext { - EncodingContext { - macro_table: &self.macro_table, - symbol_table: &self.symbol_table, - allocator: &self.allocator, - } + fn context(&self) -> EncodingContextRef { + self.context.get_ref() } } diff --git a/src/lazy/expanded/e_expression.rs b/src/lazy/expanded/e_expression.rs index 0bc4bdd2..cde06907 100644 --- a/src/lazy/expanded/e_expression.rs +++ b/src/lazy/expanded/e_expression.rs @@ -87,7 +87,7 @@ impl<'top, D: Decoder> Iterator for EExpressionArgsIterator<'top, D> { LazyRawValueExpr::::ValueLiteral(value) => { ValueExpr::ValueLiteral(LazyExpandedValue::from_literal(self.context, value)) } - LazyRawValueExpr::::MacroInvocation(raw_invocation) => { + LazyRawValueExpr::::EExp(raw_invocation) => { let invocation = match raw_invocation.resolve(self.context) { Ok(invocation) => invocation, Err(e) => return Some(Err(e)), diff --git a/src/lazy/expanded/macro_evaluator.rs b/src/lazy/expanded/macro_evaluator.rs index 0ce3deed..df6cec59 100644 --- a/src/lazy/expanded/macro_evaluator.rs +++ b/src/lazy/expanded/macro_evaluator.rs @@ -53,7 +53,7 @@ pub trait RawEExpression<'top, D: Decoder = Self>>: /// If the ID cannot be found in the `EncodingContext`, returns `Err`. fn resolve(self, context: EncodingContextRef<'top>) -> IonResult> { let invoked_macro = context - .macro_table + .macro_table() .macro_with_id(self.id()) .ok_or_else(|| { IonError::decoding_error(format!("unrecognized macro ID {:?}", self.id())) @@ -269,8 +269,8 @@ pub struct MacroEvaluator<'top, D: Decoder> { impl<'top, D: Decoder> MacroEvaluator<'top, D> { pub fn new(context: EncodingContextRef<'top>, environment: Environment<'top, D>) -> Self { - let macro_stack = BumpVec::new_in(context.allocator); - let mut env_stack = BumpVec::new_in(context.allocator); + let macro_stack = BumpVec::new_in(context.allocator()); + let mut env_stack = BumpVec::new_in(context.allocator()); env_stack.push(environment); Self { macro_stack, @@ -308,7 +308,7 @@ impl<'top, D: Decoder> MacroEvaluator<'top, D> { let capacity_hint = num_args_hint.1.unwrap_or(num_args_hint.0); let mut args = BumpVec::with_capacity_in(capacity_hint, allocator); - for arg in invocation.arguments(self.environment()) { + for arg in args_iter { args.push(arg?); } let environment = Environment::new(args); @@ -574,7 +574,7 @@ impl<'top, D: Decoder> MakeStringExpansion<'top, D> { } // Create a bump-allocated buffer to hold our constructed string - let mut buffer = BumpString::new_in(context.allocator); + let mut buffer = BumpString::new_in(context.allocator()); // We need to eagerly evaluate all of the arguments to `make_string` to produce its next // (and only) value. However, because `&mut self` (the expansion state) lives in a stack @@ -602,7 +602,7 @@ impl<'top, D: Decoder> MakeStringExpansion<'top, D> { // Convert our BumpString<'bump> into a &'bump str that we can wrap in an `ExpandedValueRef` let constructed_text = buffer.into_bump_str(); let expanded_value_ref: &'top ExpandedValueRef<'top, D> = context - .allocator + .allocator() .alloc_with(|| ExpandedValueRef::String(StrRef::from(constructed_text))); static EMPTY_ANNOTATIONS: &[&str] = &[]; diff --git a/src/lazy/expanded/macro_table.rs b/src/lazy/expanded/macro_table.rs index c5eba3d1..236c3276 100644 --- a/src/lazy/expanded/macro_table.rs +++ b/src/lazy/expanded/macro_table.rs @@ -59,7 +59,7 @@ impl<'top> MacroRef<'top> { /// Allows callers to resolve a macro ID (that is: name or address) to a [`MacroKind`], confirming /// its validity and allowing evaluation to begin. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct MacroTable { macros_by_address: Vec, // Maps names to an address that can be used to query the Vec above. @@ -85,6 +85,10 @@ impl MacroTable { } } + pub fn len(&self) -> usize { + self.macros_by_address.len() + } + pub fn macro_with_id(&'_ self, id: MacroIdRef<'_>) -> Option> { match id { MacroIdRef::LocalName(name) => self.macro_with_name(name), diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index b6fa9eb9..ad98a22f 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -90,18 +90,18 @@ pub mod template; // happens to be available in the buffer OR the set that leads up to the next encoding directive. // The value proposition of being able to lazily explore multiple top level values concurrently // would need to be proved out first. -#[derive(Copy, Clone, Debug)] -pub struct EncodingContext<'top> { - pub(crate) macro_table: &'top MacroTable, - pub(crate) symbol_table: &'top SymbolTable, - pub(crate) allocator: &'top BumpAllocator, +#[derive(Debug)] +pub struct EncodingContext { + pub(crate) macro_table: MacroTable, + pub(crate) symbol_table: SymbolTable, + pub(crate) allocator: BumpAllocator, } -impl<'top> EncodingContext<'top> { +impl EncodingContext { pub fn new( - macro_table: &'top MacroTable, - symbol_table: &'top SymbolTable, - allocator: &'top BumpAllocator, + macro_table: MacroTable, + symbol_table: SymbolTable, + allocator: BumpAllocator, ) -> Self { Self { macro_table, @@ -110,24 +110,40 @@ impl<'top> EncodingContext<'top> { } } - pub fn get_ref(&'top self) -> EncodingContextRef<'top> { + pub fn empty() -> Self { + Self::new(MacroTable::new(), SymbolTable::new(), BumpAllocator::new()) + } + + pub fn get_ref(&self) -> EncodingContextRef { EncodingContextRef { context: self } } } #[derive(Debug, Copy, Clone)] pub struct EncodingContextRef<'top> { - context: &'top EncodingContext<'top>, + context: &'top EncodingContext, } impl<'top> EncodingContextRef<'top> { - pub fn new(context: &'top EncodingContext<'top>) -> Self { + pub fn new(context: &'top EncodingContext) -> Self { Self { context } } + + pub fn allocator(&self) -> &'top BumpAllocator { + &self.context.allocator + } + + pub fn symbol_table(&self) -> &'top SymbolTable { + &self.context.symbol_table + } + + pub fn macro_table(&self) -> &'top MacroTable { + &self.context.macro_table + } } impl<'top> Deref for EncodingContextRef<'top> { - type Target = EncodingContext<'top>; + type Target = EncodingContext; fn deref(&self) -> &Self::Target { self.context @@ -215,12 +231,7 @@ pub struct ExpandingReader { // Holds information found in symbol tables and encoding directives (TODO) that can be applied // to the encoding context the next time the reader is between top-level expressions. pending_lst: UnsafeCell, - // A bump allocator that is cleared between top-level expressions. - allocator: UnsafeCell, - // TODO: Make the symbol and macro tables traits on `Encoding` such that they can be configured - // statically. Then 1.0 types can use `Never` for the macro table. - symbol_table: UnsafeCell, - macro_table: UnsafeCell, + encoding_context: UnsafeCell, catalog: Box, } @@ -232,10 +243,8 @@ impl ExpandingReader { Self { raw_reader: raw_reader.into(), evaluator_ptr: None.into(), - allocator: BumpAllocator::new().into(), + encoding_context: EncodingContext::empty().into(), pending_lst: PendingLst::new().into(), - symbol_table: SymbolTable::new().into(), - macro_table: MacroTable::new().into(), catalog, } } @@ -243,34 +252,54 @@ impl ExpandingReader { // TODO: This method is temporary. It will be removed when the ability to read 1.1 encoding // directives from the input stream is available. Until then, template creation is manual. pub fn register_template(&mut self, template_definition: &str) -> IonResult { - let context = self.context(); - let template_macro: TemplateMacro = - { TemplateCompiler::compile_from_text(context.get_ref(), template_definition)? }; + let template_macro: TemplateMacro = self.compile_template(template_definition)?; + self.add_macro(template_macro) + } + + fn compile_template(&self, template_definition: &str) -> IonResult { + TemplateCompiler::compile_from_text(self.context(), template_definition) + } - let macro_table = self.macro_table.get_mut(); + fn add_macro(&mut self, template_macro: TemplateMacro) -> IonResult { + let macro_table = &mut self.context_mut().macro_table; macro_table.add_macro(template_macro) } - pub fn context(&self) -> EncodingContext<'_> { + pub fn context(&self) -> EncodingContextRef<'_> { // SAFETY: The only time that the macro table, symbol table, and allocator can be modified // is in the body of the method `between_top_level_expressions`. As long as nothing holds // a reference to the `EncodingContext` we create here when that method is running, // this is safe. - unsafe { - EncodingContext::new( - &*self.macro_table.get(), - &*self.symbol_table.get(), - &*self.allocator.get(), - ) - } + unsafe { (*self.encoding_context.get()).get_ref() } + } + + pub fn context_mut(&mut self) -> &mut EncodingContext { + // SAFETY: If the caller has a `&mut` reference to `self`, it is the only mutable reference + // that can modify `self.encoding_context`. + unsafe { &mut *self.encoding_context.get() } + } + + // SAFETY: This method takes an immutable reference to `self` and then modifies the + // EncodingContext's bump allocator via `UnsafeCell`. This should only be called from + // `between_top_level_values`, and the caller must confirm that nothing else holds a + // reference to any structures within `EncodingContext`. + unsafe fn reset_bump_allocator(&self) { + let context: &mut EncodingContext = &mut *self.encoding_context.get(); + context.allocator.reset(); } - pub fn pending_symtab_changes(&self) -> &PendingLst { + pub fn pending_lst(&self) -> &PendingLst { // If the user is able to call this method, the PendingLst is not being modified and it's // safe to immutably reference. unsafe { &*self.pending_lst.get() } } + pub fn pending_lst_mut(&mut self) -> &mut PendingLst { + // SAFETY: If the caller has a `&mut` reference to `self`, it is the only mutable reference + // that can modify `self.pending_lst`. + unsafe { &mut *self.pending_lst.get() } + } + fn ptr_to_mut_ref<'a, T>(ptr: *mut ()) -> &'a mut T { let typed_ptr: *mut T = ptr.cast(); unsafe { &mut *typed_ptr } @@ -341,21 +370,24 @@ impl ExpandingReader { /// /// This is the reader's opportunity to make any pending changes to the encoding context. fn between_top_level_expressions(&self) { - // SAFETY: This is the only place where we modify the encoding context. Take care not to - // alias the allocator, symbol table, or macro table in this scope. - // We're going to clear the bump allocator, so drop our reference to the evaluator that // lives there. self.evaluator_ptr.set(None); - // Clear the allocator. - let allocator: &mut BumpAllocator = unsafe { &mut *self.allocator.get() }; - allocator.reset(); + // Clear the bump allocator. + // SAFETY: This is the only place where we modify the encoding context. Take care not to + // alias the allocator, symbol table, or macro table inside this `unsafe` scope. + unsafe { self.reset_bump_allocator() }; // If the pending LST has changes to apply, do so. + // SAFETY: Nothing else holds a reference to the `PendingLst`'s contents, so we can use the + // `UnsafeCell` to get a mutable reference to it. let pending_lst: &mut PendingLst = unsafe { &mut *self.pending_lst.get() }; if pending_lst.has_changes { - let symbol_table: &mut SymbolTable = unsafe { &mut *self.symbol_table.get() }; + // SAFETY: Nothing else holds a reference to the `EncodingContext`'s contents, so we can use the + // `UnsafeCell` to get a mutable reference to its symbol table. + let symbol_table: &mut SymbolTable = + &mut unsafe { &mut *self.encoding_context.get() }.symbol_table; Self::apply_pending_lst(pending_lst, symbol_table); } } @@ -402,13 +434,13 @@ impl ExpandingReader { // to find an expression that yields no values (for example: `(:void)`), so we perform this // step in a loop until we get a value or end-of-stream. - let allocator: &BumpAllocator = unsafe { &*self.allocator.get() }; + let allocator: &BumpAllocator = self.context().allocator(); let context_ref = EncodingContextRef::new(allocator.alloc_with(|| self.context())); loop { // Pull another top-level expression from the input stream if one is available. use crate::lazy::raw_stream_item::RawStreamItem::*; let raw_reader = unsafe { &mut *self.raw_reader.get() }; - match raw_reader.next(allocator)? { + match raw_reader.next(context_ref)? { VersionMarker(marker) => return Ok(SystemStreamItem::VersionMarker(marker)), // We got our value; return it. Value(raw_value) => { diff --git a/src/lazy/expanded/sequence.rs b/src/lazy/expanded/sequence.rs index 9f297625..38b7f810 100644 --- a/src/lazy/expanded/sequence.rs +++ b/src/lazy/expanded/sequence.rs @@ -336,7 +336,7 @@ fn expand_next_sequence_value<'top, D: Decoder>( Some(Ok(RawValueExpr::ValueLiteral(value))) => { return Some(Ok(LazyExpandedValue::from_literal(context, value))) } - Some(Ok(RawValueExpr::MacroInvocation(invocation))) => { + Some(Ok(RawValueExpr::EExp(invocation))) => { let resolved_invocation = match invocation.resolve(context) { Ok(resolved) => resolved, Err(e) => return Some(Err(e)), diff --git a/src/lazy/expanded/struct.rs b/src/lazy/expanded/struct.rs index 013f1dc1..ab09c810 100644 --- a/src/lazy/expanded/struct.rs +++ b/src/lazy/expanded/struct.rs @@ -100,7 +100,7 @@ impl<'top, D: Decoder> LazyExpandedFieldName<'top, D> { LazyExpandedFieldName::RawName(context, name) => match name.read()? { RawSymbolRef::Text(text) => Ok(text.into()), RawSymbolRef::SymbolId(sid) => context - .symbol_table + .symbol_table() .symbol_for(sid) .map(AsSymbolRef::as_symbol_ref) .ok_or_else(|| { @@ -237,7 +237,7 @@ impl<'top, D: Decoder> LazyExpandedStruct<'top, D> { } pub fn bump_iter(&self) -> &'top mut ExpandedStructIterator<'top, D> { - self.context.allocator.alloc_with(|| self.iter()) + self.context.allocator().alloc_with(|| self.iter()) } pub fn find(&self, name: &str) -> IonResult>> { diff --git a/src/lazy/expanded/template.rs b/src/lazy/expanded/template.rs index 0e24cc7a..68d51373 100644 --- a/src/lazy/expanded/template.rs +++ b/src/lazy/expanded/template.rs @@ -203,7 +203,7 @@ impl<'top, D: Decoder> Iterator for TemplateSequenceIterator<'top, D> { // to the top of the loop. let invoked_macro = self .context - .macro_table + .macro_table() .macro_at_address(body_invocation.invoked_macro_address) .unwrap(); let invocation = TemplateMacroInvocation::new( @@ -317,7 +317,7 @@ impl<'top, D: Decoder> Iterator for TemplateStructUnexpandedFieldsIterator<'top, TemplateBodyValueExpr::MacroInvocation(body_invocation) => { let invoked_macro = self .context - .macro_table + .macro_table() .macro_at_address(body_invocation.invoked_macro_address) .unwrap(); let invocation = TemplateMacroInvocation::new( @@ -636,7 +636,7 @@ impl TemplateBodyMacroInvocation { context: EncodingContextRef<'top>, ) -> TemplateMacroInvocation<'top> { let invoked_macro = context - .macro_table + .macro_table() .macro_at_address(self.invoked_macro_address) .unwrap(); diff --git a/src/lazy/reader.rs b/src/lazy/reader.rs index bec2a901..2978bc97 100644 --- a/src/lazy/reader.rs +++ b/src/lazy/reader.rs @@ -3,7 +3,6 @@ use crate::element::reader::ElementReader; use crate::element::Element; use crate::lazy::decoder::Decoder; -use crate::lazy::encoding::TextEncoding_1_1; use crate::lazy::streaming_raw_reader::IonInput; use crate::lazy::system_reader::SystemReader; use crate::lazy::text::raw::v1_1::reader::MacroAddress; @@ -127,8 +126,10 @@ impl Reader { } } -impl Reader { - // Temporary method for defining/testing templates. +impl Reader { + // Temporary method for defining/testing templates. This method does not confirm that the + // reader's encoding supports macros--that check will happen when encoding directives are + // supported. // TODO: Remove this when the reader can understand 1.1 encoding directives. pub fn register_template(&mut self, template_definition: &str) -> IonResult { self.system_reader @@ -178,9 +179,10 @@ mod tests { use crate::element::Element; use crate::lazy::encoder::writer::Writer; use crate::lazy::encoding::BinaryEncoding_1_0; + use crate::lazy::expanded::EncodingContext; use crate::lazy::value_ref::ValueRef; use crate::write_config::WriteConfig; - use crate::{ion_list, ion_sexp, ion_struct, v1_0, Int, IonResult, IonType}; + use crate::{ion_list, ion_sexp, ion_struct, v1_0, AnyEncoding, Int, IonResult, IonType}; use super::*; @@ -275,4 +277,107 @@ mod tests { assert_eq!(reader.read_next_element()?, None); Ok(()) } + + fn expand_macro_test( + macro_source: &str, + encode_macro_fn: impl FnOnce(MacroAddress) -> Vec, + test_fn: impl FnOnce(Reader) -> IonResult<()>, + ) -> IonResult<()> { + // Because readers do not yet understand encoding directives, we'll pre-calculate the + // macro ID that will be assigned. Make an empty encoding context... + let context = EncodingContext::empty(); + // ...and see how many macros it contains. This will change as development continues. + let macro_address = context.macro_table.len(); + let opcode_byte = u8::try_from(macro_address).unwrap(); + // Using that ID, encode a binary stream containing an invocation of the new macro. + // This function must add an IVM and the encoded e-expression ID, followed by any number + // of arguments that matches the provided signature. + let binary_ion = encode_macro_fn(opcode_byte as usize); + // Construct a reader for the encoded data. + let mut reader = Reader::new(AnyEncoding, binary_ion.as_slice())?; + // Register the template definition, getting the same ID we used earlier. + let actual_address = reader.register_template(macro_source)?; + assert_eq!( + macro_address, actual_address, + "Assigned macro address did not match expected address." + ); + // Use the provided test function to confirm that the data expands to the expected stream. + test_fn(reader) + } + + #[test] + fn expand_binary_template_macro() -> IonResult<()> { + let macro_source = "(macro seventeen () 17)"; + let encode_macro_fn = |address| vec![0xE0, 0x01, 0x01, 0xEA, address as u8]; + expand_macro_test(macro_source, encode_macro_fn, |mut reader| { + assert_eq!(reader.expect_next()?.read()?.expect_i64()?, 17); + Ok(()) + }) + } + + #[test] + fn expand_binary_template_macro_with_one_arg() -> IonResult<()> { + let macro_source = r#" + (macro greet (name) + (make_string "Hello, " name "!") + ) + "#; + #[rustfmt::skip] + let encode_macro_fn = |address| vec![ + // === 1.1 IVM === + 0xE0, 0x01, 0x01, 0xEA, + // === Macro ID === + address as u8, + // === Arg 1 === + // 8-byte string + 0x98, + // M i c h e l l e + 0x4D, 0x69, 0x63, 0x68, 0x65, 0x6C, 0x6C, 0x65, + ]; + expand_macro_test(macro_source, encode_macro_fn, |mut reader| { + assert_eq!( + reader.expect_next()?.read()?.expect_string()?, + "Hello, Michelle!" + ); + Ok(()) + }) + } + + #[test] + fn expand_binary_template_macro_with_multiple_outputs() -> IonResult<()> { + let macro_source = r#" + (macro questions (food) + (values + (make_string "What color is a " food "?") + (make_string "How much potassium is in a " food "?") + (make_string "What wine should I pair with a " food "?"))) + "#; + #[rustfmt::skip] + let encode_macro_fn = |address| vec![ + // === 1.1 IVM === + 0xE0, 0x01, 0x01, 0xEA, + // === Macro ID === + address as u8, + // === Arg 1 === + // 6-byte string + 0x96, + // b a n a n a + 0x62, 0x61, 0x6E, 0x61, 0x6E, 0x61 + ]; + expand_macro_test(macro_source, encode_macro_fn, |mut reader| { + assert_eq!( + reader.expect_next()?.read()?.expect_string()?, + "What color is a banana?" + ); + assert_eq!( + reader.expect_next()?.read()?.expect_string()?, + "How much potassium is in a banana?" + ); + assert_eq!( + reader.expect_next()?.read()?.expect_string()?, + "What wine should I pair with a banana?" + ); + Ok(()) + }) + } } diff --git a/src/lazy/sequence.rs b/src/lazy/sequence.rs index b00a7be5..3943309c 100644 --- a/src/lazy/sequence.rs +++ b/src/lazy/sequence.rs @@ -110,7 +110,7 @@ impl<'top, D: Decoder> LazyList<'top, D> { pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_list.annotations(), - symbol_table: self.expanded_list.context.symbol_table, + symbol_table: self.expanded_list.context.symbol_table(), } } } @@ -259,7 +259,7 @@ impl<'top, D: Decoder> LazySExp<'top, D> { pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_sexp.annotations(), - symbol_table: self.expanded_sexp.context.symbol_table, + symbol_table: self.expanded_sexp.context.symbol_table(), } } } diff --git a/src/lazy/streaming_raw_reader.rs b/src/lazy/streaming_raw_reader.rs index 1d9974be..e9a4a41a 100644 --- a/src/lazy/streaming_raw_reader.rs +++ b/src/lazy/streaming_raw_reader.rs @@ -3,10 +3,9 @@ use std::fs::File; use std::io; use std::io::{BufReader, Read, StdinLock}; -use bumpalo::Bump as BumpAllocator; - use crate::lazy::any_encoding::IonEncoding; use crate::lazy::decoder::{Decoder, LazyRawReader}; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::{AnyEncoding, IonError, IonResult, LazyRawValue}; @@ -79,7 +78,7 @@ impl StreamingRawReader { pub fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> { let mut input_source_exhausted = false; loop { @@ -102,7 +101,7 @@ impl StreamingRawReader { )); let slice_reader = unsafe { &mut *unsafe_cell_reader.get() }; let starting_position = slice_reader.position(); - let result = slice_reader.next(allocator); + let result = slice_reader.next(context); // We're done modifying `slice_reader`, but we need to read some of its fields. These // fields are _not_ the data to which `result` holds a reference. We have to circumvent // the borrow checker's limitation (described in a comment on the StreamingRawReader type) @@ -431,10 +430,9 @@ mod tests { use std::io; use std::io::{BufReader, Cursor, Read}; - use bumpalo::Bump as BumpAllocator; - use crate::lazy::any_encoding::AnyEncoding; use crate::lazy::decoder::{Decoder, LazyRawValue}; + use crate::lazy::expanded::EncodingContext; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::streaming_raw_reader::{IonInput, StreamingRawReader}; @@ -466,23 +464,25 @@ mod tests { #[test] fn read_empty_slice() -> IonResult<()> { - let bump = BumpAllocator::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let ion = ""; let mut reader = StreamingRawReader::new(AnyEncoding, ion.as_bytes()); // We expect `Ok(EndOfStream)`, not `Err(Incomplete)`. - expect_end_of_stream(reader.next(&bump)?)?; + expect_end_of_stream(reader.next(context)?)?; Ok(()) } fn read_example_stream(input: impl IonInput) -> IonResult<()> { - let bump = BumpAllocator::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = StreamingRawReader::new(AnyEncoding, input); - expect_string(reader.next(&bump)?, "foo")?; - expect_string(reader.next(&bump)?, "bar")?; - expect_string(reader.next(&bump)?, "baz")?; - expect_string(reader.next(&bump)?, "quux")?; - expect_string(reader.next(&bump)?, "quuz")?; - expect_end_of_stream(reader.next(&bump)?) + expect_string(reader.next(context)?, "foo")?; + expect_string(reader.next(context)?, "bar")?; + expect_string(reader.next(context)?, "baz")?; + expect_string(reader.next(context)?, "quux")?; + expect_string(reader.next(context)?, "quuz")?; + expect_end_of_stream(reader.next(context)?) } // This stream is 104 bytes long @@ -522,9 +522,10 @@ mod tests { const INVALID_EXAMPLE_STREAM: &str = "2024-03-12T16:33.000-05:"; // Missing offset minutes fn read_invalid_example_stream(input: impl IonInput) -> IonResult<()> { - let bump = BumpAllocator::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = StreamingRawReader::new(AnyEncoding, input); - let result = reader.next(&bump); + let result = reader.next(context); // Because the input stream is exhausted, the incomplete value is illegal data and raises // a decoding error. assert!(matches!(result, Err(IonError::Decoding(_))), "{:?}", result); @@ -569,19 +570,20 @@ mod tests { } // This guarantees that there are several intermediate reading states in which the buffer // contains incomplete data that could be misinterpreted by a reader. - let allocator = BumpAllocator::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let mut reader = StreamingRawReader::new(v1_0::Text, IonStream::new(input)); - assert_eq!(reader.next(&allocator)?.expect_ivm()?.version(), (1, 0)); + assert_eq!(reader.next(context)?.expect_ivm()?.version(), (1, 0)); assert_eq!( reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_decimal()?, Decimal::new(87125, -2) ); - let value = reader.next(&allocator)?.expect_value()?; + let value = reader.next(context)?.expect_value()?; let annotations = value .annotations() .collect::>>()?; diff --git a/src/lazy/struct.rs b/src/lazy/struct.rs index a5e28a19..a6affb02 100644 --- a/src/lazy/struct.rs +++ b/src/lazy/struct.rs @@ -267,7 +267,7 @@ impl<'top, D: Decoder> LazyStruct<'top, D> { pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_struct.annotations(), - symbol_table: self.expanded_struct.context.symbol_table, + symbol_table: self.expanded_struct.context.symbol_table(), } } } diff --git a/src/lazy/system_reader.rs b/src/lazy/system_reader.rs index 45197b44..93e17002 100644 --- a/src/lazy/system_reader.rs +++ b/src/lazy/system_reader.rs @@ -131,11 +131,11 @@ impl SystemReader { } pub fn symbol_table(&self) -> &SymbolTable { - self.expanding_reader.context().symbol_table + self.expanding_reader.context().symbol_table() } pub fn pending_symtab_changes(&self) -> &PendingLst { - self.expanding_reader.pending_symtab_changes() + self.expanding_reader.pending_lst() } /// Returns the next top-level stream item (IVM, Symbol Table, Value, or Nothing) as a diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index ae5073cc..c7f415b1 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -4,7 +4,6 @@ use std::ops::{Range, RangeFrom, RangeTo}; use std::slice::Iter; use std::str::FromStr; -use bumpalo::Bump as BumpAllocator; use nom::branch::alt; use nom::bytes::complete::{ is_a as complete_is_a, is_not as complete_is_not, tag as complete_tag, @@ -23,6 +22,7 @@ use nom::{AsBytes, CompareResult, IResult, InputLength, InputTake, Needed, Parse use crate::lazy::decoder::{LazyRawFieldExpr, LazyRawValueExpr, RawValueExpr}; use crate::lazy::encoding::{TextEncoding, TextEncoding_1_0, TextEncoding_1_1}; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::lazy::text::encoded_value::EncodedTextValue; use crate::lazy::text::matched::{ @@ -104,7 +104,7 @@ pub struct TextBufferView<'top> { // offset: 6 data: &'top [u8], offset: usize, - pub(crate) allocator: &'top BumpAllocator, + pub(crate) context: EncodingContextRef<'top>, } impl<'a> PartialEq for TextBufferView<'a> { @@ -116,8 +116,8 @@ impl<'a> PartialEq for TextBufferView<'a> { impl<'top> TextBufferView<'top> { /// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to zero. #[inline] - pub fn new(allocator: &'top BumpAllocator, data: &'top [u8]) -> TextBufferView<'top> { - Self::new_with_offset(allocator, data, 0) + pub fn new(context: EncodingContextRef<'top>, data: &'top [u8]) -> TextBufferView<'top> { + Self::new_with_offset(context, data, 0) } /// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to the @@ -125,12 +125,12 @@ impl<'top> TextBufferView<'top> { /// Note that `offset` is the index of the larger stream at which `data` begins and not an /// offset _into_ `data`. pub fn new_with_offset( - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, data: &'top [u8], offset: usize, ) -> TextBufferView<'top> { TextBufferView { - allocator, + context, data, offset, } @@ -152,7 +152,7 @@ impl<'top> TextBufferView<'top> { TextBufferView { data: &self.data[offset..offset + length], offset: self.offset + offset, - allocator: self.allocator, + context: self.context, } } @@ -165,7 +165,7 @@ impl<'top> TextBufferView<'top> { TextBufferView { data: &self.data[offset..], offset: self.offset + offset, - allocator: self.allocator, + context: self.context, } } @@ -367,7 +367,7 @@ impl<'top> TextBufferView<'top> { self, ) -> IonParseResult<'top, Option>> { whitespace_and_then(alt(( - Self::match_e_expression.map(|matched| Some(RawValueExpr::MacroInvocation(matched))), + Self::match_e_expression.map(|matched| Some(RawValueExpr::EExp(matched))), value(None, tag(")")), pair( opt(Self::match_annotations), @@ -641,19 +641,19 @@ impl<'top> TextBufferView<'top> { map(Self::match_list, |_matched_list| { // TODO: Cache child expressions found in 1.0 list let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + bumpalo::collections::Vec::new_in(self.context.allocator()).into_bump_slice(); EncodedTextValue::new(MatchedValue::List(not_yet_used_in_1_0)) }), map(Self::match_sexp, |_matched_sexp| { // TODO: Cache child expressions found in 1.0 sexp let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + bumpalo::collections::Vec::new_in(self.context.allocator()).into_bump_slice(); EncodedTextValue::new(MatchedValue::SExp(not_yet_used_in_1_0)) }), map(Self::match_struct, |_matched_struct| { // TODO: Cache child expressions found in 1.0 struct let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + bumpalo::collections::Vec::new_in(self.context.allocator()).into_bump_slice(); EncodedTextValue::new(MatchedValue::Struct(not_yet_used_in_1_0)) }), ))) @@ -776,22 +776,26 @@ impl<'top> TextBufferView<'top> { // Scan ahead to find the end of this list. let list_body = self.slice_to_end(1); let sequence_iter = RawTextListIterator_1_1::new(list_body); - let (span, child_exprs) = - match TextListSpanFinder_1_1::new(self.allocator, sequence_iter).find_span() { - Ok((span, child_exprs)) => (span, child_exprs), - // If the complete container isn't available, return an incomplete. - Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), - // If invalid syntax was encountered, return a failure to prevent nom from trying - // other parser kinds. - Err(e) => { - return { - let error = InvalidInputError::new(self) - .with_label("matching a v1.1 list") - .with_description(format!("{}", e)); - Err(nom::Err::Failure(IonParseError::Invalid(error))) - } + let (span, child_exprs) = match TextListSpanFinder_1_1::new( + self.context.allocator(), + sequence_iter, + ) + .find_span() + { + Ok((span, child_exprs)) => (span, child_exprs), + // If the complete container isn't available, return an incomplete. + Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), + // If invalid syntax was encountered, return a failure to prevent nom from trying + // other parser kinds. + Err(e) => { + return { + let error = InvalidInputError::new(self) + .with_label("matching a v1.1 list") + .with_description(format!("{}", e)); + Err(nom::Err::Failure(IonParseError::Invalid(error))) } - }; + } + }; // For the matched span, we use `self` again to include the opening `[` let matched = self.slice(0, span.len()); @@ -817,7 +821,7 @@ impl<'top> TextBufferView<'top> { let sexp_body = self.slice_to_end(1); let sexp_iter = RawTextSExpIterator_1_1::new(sexp_body); let (span, child_expr_cache) = - match TextSExpSpanFinder_1_1::new(self.allocator, sexp_iter).find_span(1) { + match TextSExpSpanFinder_1_1::new(self.context.allocator(), sexp_iter).find_span(1) { Ok((span, child_expr_cache)) => (span, child_expr_cache), // If the complete container isn't available, return an incomplete. Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), @@ -871,7 +875,7 @@ impl<'top> TextBufferView<'top> { Self::match_e_expression, Self::match_delimiter_after_list_value, ) - .map(|matched| Some(RawValueExpr::MacroInvocation(matched))), + .map(|matched| Some(RawValueExpr::EExp(matched))), value(None, tag("]")), terminated( Self::match_annotated_value_1_1.map(Some), @@ -975,22 +979,26 @@ impl<'top> TextBufferView<'top> { // Scan ahead to find the end of this struct. let struct_body = self.slice_to_end(1); let struct_iter = RawTextStructIterator_1_1::new(struct_body); - let (span, fields) = - match TextStructSpanFinder_1_1::new(self.allocator, struct_iter).find_span() { - Ok((span, fields)) => (span, fields), - // If the complete container isn't available, return an incomplete. - Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), - // If invalid syntax was encountered, return a failure to prevent nom from trying - // other parser kinds. - Err(e) => { - return { - let error = InvalidInputError::new(self) - .with_label("matching a v1.1 struct") - .with_description(format!("{}", e)); - Err(nom::Err::Failure(IonParseError::Invalid(error))) - } + let (span, fields) = match TextStructSpanFinder_1_1::new( + self.context.allocator(), + struct_iter, + ) + .find_span() + { + Ok((span, fields)) => (span, fields), + // If the complete container isn't available, return an incomplete. + Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), + // If invalid syntax was encountered, return a failure to prevent nom from trying + // other parser kinds. + Err(e) => { + return { + let error = InvalidInputError::new(self) + .with_label("matching a v1.1 struct") + .with_description(format!("{}", e)); + Err(nom::Err::Failure(IonParseError::Invalid(error))) } - }; + } + }; // For the matched span, we use `self` again to include the opening `{` let matched = self.slice(0, span.len()); @@ -1017,26 +1025,27 @@ impl<'top> TextBufferView<'top> { // we tell the iterator how many bytes comprised the head of the expression: two bytes // for `(:` plus the length of the macro ID. let initial_bytes_skipped = 2 + macro_id_bytes.len(); - let (span, child_expr_cache) = match TextSExpSpanFinder_1_1::new(self.allocator, sexp_iter) - .find_span(initial_bytes_skipped) - { - Ok((span, child_expr_cache)) => (span, child_expr_cache), - // If the complete container isn't available, return an incomplete. - Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), - // If invalid syntax was encountered, return a failure to prevent nom from trying - // other parser kinds. - Err(e) => { - return { - let error = InvalidInputError::new(self) - .with_label(format!( - "matching an e-expression invoking macro {}", - macro_name - )) - .with_description(format!("{}", e)); - Err(nom::Err::Failure(IonParseError::Invalid(error))) + let (span, child_expr_cache) = + match TextSExpSpanFinder_1_1::new(self.context.allocator(), sexp_iter) + .find_span(initial_bytes_skipped) + { + Ok((span, child_expr_cache)) => (span, child_expr_cache), + // If the complete container isn't available, return an incomplete. + Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), + // If invalid syntax was encountered, return a failure to prevent nom from trying + // other parser kinds. + Err(e) => { + return { + let error = InvalidInputError::new(self) + .with_label(format!( + "matching an e-expression invoking macro {}", + macro_name + )) + .with_description(format!("{}", e)); + Err(nom::Err::Failure(IonParseError::Invalid(error))) + } } - } - }; + }; // For the matched span, we use `self` again to include the opening `(:` let matched = self.slice(0, span.len()); let remaining = self.slice_to_end(span.len()); @@ -2014,9 +2023,9 @@ impl<'data> nom::InputTake for TextBufferView<'data> { fn take_split(&self, count: usize) -> (Self, Self) { let (before, after) = self.data.split_at(count); - let buffer_before = TextBufferView::new_with_offset(self.allocator, before, self.offset()); + let buffer_before = TextBufferView::new_with_offset(self.context, before, self.offset()); let buffer_after = - TextBufferView::new_with_offset(self.allocator, after, self.offset() + count); + TextBufferView::new_with_offset(self.context, after, self.offset() + count); // Nom's convention is to place the remaining portion of the buffer first, which leads to // a potentially surprising reversed tuple order. (buffer_after, buffer_before) @@ -2220,6 +2229,7 @@ where #[cfg(test)] mod tests { + use crate::lazy::expanded::EncodingContext; use rstest::rstest; use super::*; @@ -2227,7 +2237,7 @@ mod tests { /// Stores an input string that can be tested against a given parser. struct MatchTest { input: String, - allocator: BumpAllocator, + context: EncodingContextRef<'static>, } impl MatchTest { @@ -2236,7 +2246,9 @@ mod tests { fn new(input: &str) -> Self { MatchTest { input: input.to_string(), - allocator: BumpAllocator::new(), + // This uses `leak` to get an `EncodingContextRef` with a `static` lifetime + // for the sake of unit test simplicity. + context: EncodingContextRef::new(Box::leak(Box::new(EncodingContext::empty()))), } } @@ -2244,7 +2256,7 @@ mod tests { where P: Parser, O, IonParseError<'data>>, { - let buffer = TextBufferView::new(&self.allocator, self.input.as_bytes()); + let buffer = TextBufferView::new(self.context, self.input.as_bytes()); match_length(parser).parse(buffer) } @@ -2890,8 +2902,9 @@ mod tests { } fn test_match_text_until_unescaped_str() { - let allocator = BumpAllocator::new(); - let input = TextBufferView::new(&allocator, r" foo bar \''' baz''' quux ".as_bytes()); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); + let input = TextBufferView::new(context, r" foo bar \''' baz''' quux ".as_bytes()); let (_remaining, (matched, contains_escapes)) = input.match_text_until_unescaped_str(r#"'''"#).unwrap(); assert_eq!(matched.as_text().unwrap(), " foo bar \\''' baz"); diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs index 0ade70d5..8b1e2c98 100644 --- a/src/lazy/text/matched.rs +++ b/src/lazy/text/matched.rs @@ -131,7 +131,7 @@ impl<'top> MatchedFieldName<'top> { } pub fn read(&self) -> IonResult> { - self.syntax.read(self.input.allocator, self.input) + self.syntax.read(self.input.context.allocator(), self.input) } pub fn range(&self) -> Range { @@ -1221,9 +1221,9 @@ impl MatchedClob { #[cfg(test)] mod tests { - use bumpalo::Bump as BumpAllocator; use crate::lazy::bytes_ref::BytesRef; + use crate::lazy::expanded::{EncodingContext, EncodingContextRef}; use crate::lazy::text::buffer::TextBufferView; use crate::{Decimal, Int, IonResult, Timestamp}; @@ -1231,8 +1231,9 @@ mod tests { fn read_ints() -> IonResult<()> { fn expect_int(data: &str, expected: impl Into) { let expected: Int = expected.into(); - let allocator = BumpAllocator::new(); - let buffer = TextBufferView::new(&allocator, data.as_bytes()); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); + let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_int().unwrap(); let actual = matched.read(buffer).unwrap(); assert_eq!( @@ -1265,8 +1266,9 @@ mod tests { fn read_timestamps() -> IonResult<()> { fn expect_timestamp(data: &str, expected: Timestamp) { let data = format!("{data} "); // Append a space - let allocator = BumpAllocator::new(); - let buffer = TextBufferView::new(&allocator, data.as_bytes()); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); + let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_timestamp().unwrap(); let actual = matched.read(buffer).unwrap(); assert_eq!( @@ -1367,8 +1369,9 @@ mod tests { #[test] fn read_decimals() -> IonResult<()> { fn expect_decimal(data: &str, expected: Decimal) { - let allocator = BumpAllocator::new(); - let buffer = TextBufferView::new(&allocator, data.as_bytes()); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); + let buffer = TextBufferView::new(context, data.as_bytes()); let result = buffer.match_decimal(); assert!( result.is_ok(), @@ -1422,10 +1425,11 @@ mod tests { fn read_blobs() -> IonResult<()> { fn expect_blob(data: &str, expected: &str) { let data = format!("{data} "); // Append a space - let allocator = BumpAllocator::new(); - let buffer = TextBufferView::new(&allocator, data.as_bytes()); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); + let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_blob().unwrap(); - let actual = matched.read(&allocator, buffer).unwrap(); + let actual = matched.read(context.allocator(), buffer).unwrap(); assert_eq!( actual, expected.as_ref(), @@ -1460,11 +1464,12 @@ mod tests { // stream so the parser knows that the long-form strings are complete. We then trim // our fabricated value off of the input before reading. let data = format!("{data}\n0"); - let allocator = BumpAllocator::new(); - let buffer = TextBufferView::new(&allocator, data.as_bytes()); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); + let buffer = TextBufferView::new(context, data.as_bytes()); let (_remaining, matched) = buffer.match_string().unwrap(); let matched_input = buffer.slice(0, buffer.len() - 2); - let actual = matched.read(&allocator, matched_input).unwrap(); + let actual = matched.read(context.allocator(), matched_input).unwrap(); assert_eq!( actual, expected, "Actual didn't match expected for input '{}'.\n{:?}\n!=\n{:?}", @@ -1496,25 +1501,28 @@ mod tests { #[test] fn read_clobs() -> IonResult<()> { - fn read_clob<'a>(allocator: &'a BumpAllocator, data: &'a str) -> IonResult> { - let buffer = TextBufferView::new(allocator, data.as_bytes()); + fn read_clob<'a>( + context: EncodingContextRef<'a>, + data: &'a str, + ) -> IonResult> { + let buffer = TextBufferView::new(context, data.as_bytes()); // All `read_clob` usages should be accepted by the matcher, so we can `unwrap()` the // call to `match_clob()`. let (_remaining, matched) = buffer.match_clob().unwrap(); // The resulting buffer slice may be rejected during reading. - matched.read(allocator, buffer) + matched.read(context.allocator(), buffer) } - fn expect_clob_error(allocator: &BumpAllocator, data: &str) { - let actual = read_clob(allocator, data); + fn expect_clob_error(context: EncodingContextRef, data: &str) { + let actual = read_clob(context, data); assert!( actual.is_err(), "Successfully read a clob from illegal input." ); } - fn expect_clob(allocator: &BumpAllocator, data: &str, expected: &str) { - let result = read_clob(allocator, data); + fn expect_clob(context: EncodingContextRef, data: &str, expected: &str) { + let result = read_clob(context, data); assert!( result.is_ok(), "Unexpected read failure for input '{data}': {:?}", @@ -1561,10 +1569,10 @@ mod tests { ("{{\"foo\rbar\rbaz\"}}", "foo\rbar\rbaz"), ]; - let mut allocator = BumpAllocator::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); for (input, expected) in tests { - expect_clob(&allocator, input, expected); - allocator.reset(); + expect_clob(context, input, expected); } let illegal_inputs = [ @@ -1585,8 +1593,7 @@ mod tests { ]; for input in illegal_inputs { - expect_clob_error(&allocator, input); - allocator.reset(); + expect_clob_error(context, input); } Ok(()) diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs index 2262ff49..75f868b6 100644 --- a/src/lazy/text/raw/reader.rs +++ b/src/lazy/text/raw/reader.rs @@ -1,10 +1,9 @@ #![allow(non_camel_case_types)] -use bumpalo::Bump as BumpAllocator; - use crate::lazy::any_encoding::IonEncoding; use crate::lazy::decoder::{Decoder, LazyRawReader, RawVersionMarker}; use crate::lazy::encoding::TextEncoding_1_0; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::parse_result::AddContext; @@ -44,13 +43,13 @@ impl<'data> LazyRawTextReader_1_0<'data> { pub fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top, { let input = TextBufferView::new_with_offset( - allocator, + context, &self.input[self.local_offset..], self.stream_offset + self.local_offset, ); @@ -107,12 +106,12 @@ impl<'data> LazyRawReader<'data, TextEncoding_1_0> for LazyRawTextReader_1_0<'da fn next<'top>( &'top mut self, - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, ) -> IonResult> where 'data: 'top, { - self.next(allocator) + self.next(context) } fn position(&self) -> usize { @@ -127,6 +126,7 @@ impl<'data> LazyRawReader<'data, TextEncoding_1_0> for LazyRawTextReader_1_0<'da #[cfg(test)] mod tests { use crate::lazy::decoder::{HasRange, HasSpan, LazyRawFieldName, LazyRawStruct, LazyRawValue}; + use crate::lazy::expanded::EncodingContext; use crate::lazy::raw_value_ref::RawValueRef; use crate::raw_symbol_ref::AsRawSymbolRef; use crate::{Decimal, IonType, RawSymbolRef, Timestamp}; @@ -134,21 +134,21 @@ mod tests { use super::*; struct TestReader<'data> { - allocator: BumpAllocator, + context: EncodingContextRef<'data>, reader: LazyRawTextReader_1_0<'data>, } impl<'data> TestReader<'data> { fn next(&mut self) -> IonResult> { - self.reader.next(&self.allocator) + self.reader.next(self.context) } fn expect_next<'a>(&'a mut self, expected: RawValueRef<'a, TextEncoding_1_0>) where 'data: 'a, { - let TestReader { allocator, reader } = self; + let TestReader { context, reader } = self; let lazy_value = reader - .next(allocator) + .next(*context) .expect("advancing the reader failed") .expect_value() .expect("expected a value"); @@ -298,9 +298,10 @@ mod tests { "#, ); + let encoding_context = EncodingContext::empty(); let reader = &mut TestReader { reader: LazyRawTextReader_1_0::new(data.as_bytes()), - allocator: BumpAllocator::new(), + context: encoding_context.get_ref(), }; assert_eq!(reader.next()?.expect_ivm()?.version(), (1, 0)); @@ -474,23 +475,24 @@ mod tests { #[test] fn ranges_and_spans() -> IonResult<()> { - let bump = bumpalo::Bump::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let data = b"foo 2024T bar::38 [1, 2, 3]"; let mut reader = LazyRawTextReader_1_0::new(data); - let foo = reader.next(&bump)?.expect_value()?; + let foo = reader.next(context)?.expect_value()?; assert_eq!(foo.span(), b"foo"); assert_eq!(foo.range(), 0..3); - let timestamp = reader.next(&bump)?.expect_value()?; + let timestamp = reader.next(context)?.expect_value()?; assert_eq!(timestamp.span(), b"2024T"); assert_eq!(timestamp.range(), 4..9); - let annotated_int = reader.next(&bump)?.expect_value()?; + let annotated_int = reader.next(context)?.expect_value()?; assert_eq!(annotated_int.span(), b"bar::38"); assert_eq!(annotated_int.range(), 10..17); - let list_value = reader.next(&bump)?.expect_value()?; + let list_value = reader.next(context)?.expect_value()?; assert_eq!(list_value.span(), b"[1, 2, 3]"); assert_eq!(list_value.range(), 18..27); diff --git a/src/lazy/text/raw/sequence.rs b/src/lazy/text/raw/sequence.rs index 1187e77b..77bfa373 100644 --- a/src/lazy/text/raw/sequence.rs +++ b/src/lazy/text/raw/sequence.rs @@ -1,4 +1,5 @@ #![allow(non_camel_case_types)] + use std::fmt; use std::fmt::{Debug, Formatter}; use std::ops::Range; @@ -301,15 +302,15 @@ impl<'a> Debug for LazyRawTextSExp_1_0<'a> { mod tests { use std::ops::Range; + use crate::lazy::expanded::EncodingContext; use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::IonResult; - use bumpalo::Bump as BumpAllocator; - fn expect_sequence_range(ion_data: &str, expected: Range) -> IonResult<()> { - let allocator = BumpAllocator::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let reader = &mut LazyRawTextReader_1_0::new(ion_data.as_bytes()); - let value = reader.next(&allocator)?.expect_value()?; + let value = reader.next(context)?.expect_value()?; let actual_range = value.data_range(); assert_eq!( actual_range, expected, diff --git a/src/lazy/text/raw/struct.rs b/src/lazy/text/raw/struct.rs index 8e9e29af..31ed46c0 100644 --- a/src/lazy/text/raw/struct.rs +++ b/src/lazy/text/raw/struct.rs @@ -157,16 +157,16 @@ impl<'top> IntoIterator for LazyRawTextStruct_1_0<'top> { mod tests { use std::ops::Range; - use bumpalo::Bump as BumpAllocator; - use crate::lazy::decoder::{HasRange, HasSpan, LazyRawStruct, LazyRawValue}; + use crate::lazy::expanded::EncodingContext; use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::IonResult; fn expect_struct_range(ion_data: &str, expected: Range) -> IonResult<()> { - let allocator = BumpAllocator::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let reader = &mut LazyRawTextReader_1_0::new(ion_data.as_bytes()); - let value = reader.next(&allocator)?.expect_value()?; + let value = reader.next(context)?.expect_value()?; let actual_range = value.data_range(); assert_eq!( actual_range, expected, @@ -231,10 +231,11 @@ mod tests { ), ]; for (input, field_name_ranges) in tests { - let bump = bumpalo::Bump::new(); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); let mut reader = LazyRawTextReader_1_0::new(input.as_bytes()); let struct_ = reader - .next(&bump)? + .next(context)? .expect_value()? .read()? .expect_struct()?; diff --git a/src/lazy/text/raw/v1_1/reader.rs b/src/lazy/text/raw/v1_1/reader.rs index f333247a..164e3ccf 100644 --- a/src/lazy/text/raw/v1_1/reader.rs +++ b/src/lazy/text/raw/v1_1/reader.rs @@ -5,7 +5,6 @@ use std::fmt::{Debug, Display, Formatter}; use std::ops::Range; use bumpalo::collections::Vec as BumpVec; -use bumpalo::Bump as BumpAllocator; use nom::character::streaming::satisfy; use crate::lazy::any_encoding::IonEncoding; @@ -17,6 +16,7 @@ use crate::lazy::decoder::{ }; use crate::lazy::encoding::TextEncoding_1_1; use crate::lazy::expanded::macro_evaluator::RawEExpression; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::lazy::span::Span; use crate::lazy::text::buffer::TextBufferView; @@ -34,6 +34,84 @@ pub struct LazyRawTextReader_1_1<'data> { local_offset: usize, } +impl<'data> LazyRawReader<'data, TextEncoding_1_1> for LazyRawTextReader_1_1<'data> { + fn resume_at_offset( + data: &'data [u8], + offset: usize, + _config: ::ReaderSavedState, + ) -> Self { + LazyRawTextReader_1_1 { + input: data, + // `data` begins at position `offset` within some larger stream. If `data` contains + // the entire stream, this will be zero. + stream_offset: offset, + // Start reading from the beginning of the slice `data` + local_offset: 0, + } + } + + fn next<'top>( + &'top mut self, + context: EncodingContextRef<'top>, + ) -> IonResult> + where + 'data: 'top, + { + let input = TextBufferView::new_with_offset( + context, + &self.input[self.local_offset..], + self.stream_offset + self.local_offset, + ); + let (buffer_after_whitespace, _whitespace) = input + .match_optional_comments_and_whitespace() + .with_context("reading v1.1 whitespace/comments at the top level", input)?; + if buffer_after_whitespace.is_empty() { + return Ok(RawStreamItem::EndOfStream(EndPosition::new( + TextEncoding_1_1.encoding(), + buffer_after_whitespace.offset(), + ))); + } + + // Consume any trailing whitespace that followed this item. Doing this allows us to check + // whether this was the last item in the buffer by testing `buffer.is_empty()` afterward. + let (buffer_after_item, matched_item) = buffer_after_whitespace + .match_top_level_item_1_1() + .with_context("reading a v1.1 top-level value", buffer_after_whitespace)?; + + let (buffer_after_trailing_ws, _trailing_ws) = buffer_after_item + .match_optional_comments_and_whitespace() + .with_context( + "reading trailing top-level whitespace/comments in v1.1", + buffer_after_item, + )?; + + if let RawStreamItem::VersionMarker(marker) = matched_item { + // TODO: It is not the raw reader's responsibility to report this error. It should + // surface the IVM to the caller, who can then either create a different reader + // for the reported version OR raise an error. + // See: https://github.com/amazon-ion/ion-rust/issues/644 + let (major, minor) = marker.version(); + if (major, minor) != (1, 1) { + return IonResult::decoding_error(format!( + "Ion version {major}.{minor} is not supported" + )); + } + } + // Since we successfully matched the next value, we'll update the buffer + // so a future call to `next()` will resume parsing the remaining input. + self.local_offset = buffer_after_trailing_ws.offset() - self.stream_offset; + Ok(matched_item) + } + + fn position(&self) -> usize { + self.stream_offset + self.local_offset + } + + fn encoding(&self) -> IonEncoding { + IonEncoding::Text_1_1 + } +} + /// The index at which this macro can be found in the macro table. pub type MacroAddress = usize; @@ -135,84 +213,6 @@ impl EncodedTextMacroInvocation { } } -impl<'data> LazyRawReader<'data, TextEncoding_1_1> for LazyRawTextReader_1_1<'data> { - fn resume_at_offset( - data: &'data [u8], - offset: usize, - _config: ::ReaderSavedState, - ) -> Self { - LazyRawTextReader_1_1 { - input: data, - // `data` begins at position `offset` within some larger stream. If `data` contains - // the entire stream, this will be zero. - stream_offset: offset, - // Start reading from the beginning of the slice `data` - local_offset: 0, - } - } - - fn next<'top>( - &'top mut self, - allocator: &'top BumpAllocator, - ) -> IonResult> - where - 'data: 'top, - { - let input = TextBufferView::new_with_offset( - allocator, - &self.input[self.local_offset..], - self.stream_offset + self.local_offset, - ); - let (buffer_after_whitespace, _whitespace) = input - .match_optional_comments_and_whitespace() - .with_context("reading v1.1 whitespace/comments at the top level", input)?; - if buffer_after_whitespace.is_empty() { - return Ok(RawStreamItem::EndOfStream(EndPosition::new( - TextEncoding_1_1.encoding(), - buffer_after_whitespace.offset(), - ))); - } - - // Consume any trailing whitespace that followed this item. Doing this allows us to check - // whether this was the last item in the buffer by testing `buffer.is_empty()` afterward. - let (buffer_after_item, matched_item) = buffer_after_whitespace - .match_top_level_item_1_1() - .with_context("reading a v1.1 top-level value", buffer_after_whitespace)?; - - let (buffer_after_trailing_ws, _trailing_ws) = buffer_after_item - .match_optional_comments_and_whitespace() - .with_context( - "reading trailing top-level whitespace/comments in v1.1", - buffer_after_item, - )?; - - if let RawStreamItem::VersionMarker(marker) = matched_item { - // TODO: It is not the raw reader's responsibility to report this error. It should - // surface the IVM to the caller, who can then either create a different reader - // for the reported version OR raise an error. - // See: https://github.com/amazon-ion/ion-rust/issues/644 - let (major, minor) = marker.version(); - if (major, minor) != (1, 1) { - return IonResult::decoding_error(format!( - "Ion version {major}.{minor} is not supported" - )); - } - } - // Since we successfully matched the next value, we'll update the buffer - // so a future call to `next()` will resume parsing the remaining input. - self.local_offset = buffer_after_trailing_ws.offset() - self.stream_offset; - Ok(matched_item) - } - - fn position(&self) -> usize { - self.stream_offset + self.local_offset - } - - fn encoding(&self) -> IonEncoding { - IonEncoding::Text_1_1 - } -} - #[derive(Copy, Clone)] pub struct LazyRawTextList_1_1<'top> { pub(crate) value: LazyRawTextValue_1_1<'top>, @@ -741,17 +741,18 @@ impl<'top> TextStructSpanFinder_1_1<'top> { #[cfg(test)] mod tests { + use crate::lazy::expanded::EncodingContext; use crate::lazy::raw_value_ref::RawValueRef; use super::*; fn expect_next<'top, 'data: 'top>( - allocator: &'top BumpAllocator, + context: EncodingContextRef<'top>, reader: &'top mut LazyRawTextReader_1_1<'data>, expected: RawValueRef<'top, TextEncoding_1_1>, ) { let lazy_value = reader - .next(allocator) + .next(context) .expect("advancing the reader failed") .expect_value() .expect("expected a value"); @@ -775,18 +776,19 @@ mod tests { false "#; - let allocator = BumpAllocator::new(); + let empty_context = EncodingContext::empty(); + let context = empty_context.get_ref(); let reader = &mut LazyRawTextReader_1_1::new(data.as_bytes()); // $ion_1_1 - assert_eq!(reader.next(&allocator)?.expect_ivm()?.version(), (1, 1)); + assert_eq!(reader.next(context)?.expect_ivm()?.version(), (1, 1)); // "foo" - expect_next(&allocator, reader, RawValueRef::String("foo".into())); + expect_next(context, reader, RawValueRef::String("foo".into())); // bar - expect_next(&allocator, reader, RawValueRef::Symbol("bar".into())); + expect_next(context, reader, RawValueRef::Symbol("bar".into())); // (baz null.string) let sexp = reader - .next(&allocator)? + .next(context)? .expect_value()? .read()? .expect_sexp()?; @@ -801,10 +803,10 @@ mod tests { ); assert!(children.next().is_none()); // (:quux quuz) - let macro_invocation = reader.next(&allocator)?.expect_macro_invocation()?; + let macro_invocation = reader.next(context)?.expect_macro_invocation()?; assert_eq!(macro_invocation.id, MacroIdRef::LocalName("quux")); - expect_next(&allocator, reader, RawValueRef::Int(77.into())); - expect_next(&allocator, reader, RawValueRef::Bool(false)); + expect_next(context, reader, RawValueRef::Int(77.into())); + expect_next(context, reader, RawValueRef::Bool(false)); Ok(()) } } diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs index 4835f42f..a400b613 100644 --- a/src/lazy/text/value.rs +++ b/src/lazy/text/value.rs @@ -197,7 +197,7 @@ impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for LazyRawTextValue<'to fn read(&self) -> IonResult> { // Get the value's matched input, skipping over any annotations let matched_input = self.input.slice_to_end(self.encoded_value.data_offset()); - let allocator = self.input.allocator; + let allocator = self.input.context.allocator(); use crate::lazy::text::matched::MatchedValue::*; let value_ref = match self.encoded_value.matched() { @@ -258,7 +258,7 @@ impl<'top> Iterator for RawTextAnnotationsIterator<'top> { let matched_input = self .input .slice(span.start - self.input.offset(), span.len()); - let text = match symbol.read(self.input.allocator, matched_input) { + let text = match symbol.read(self.input.context.allocator(), matched_input) { Ok(text) => text, Err(e) => { self.has_returned_error = true; @@ -272,8 +272,7 @@ impl<'top> Iterator for RawTextAnnotationsIterator<'top> { #[cfg(test)] mod tests { - use bumpalo::Bump as BumpAllocator; - + use crate::lazy::expanded::EncodingContext; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::value::RawTextAnnotationsIterator; use crate::{IonResult, RawSymbolRef}; @@ -281,8 +280,9 @@ mod tests { #[test] fn iterate_annotations() -> IonResult<()> { fn test(input: &str) -> IonResult<()> { - let allocator = BumpAllocator::new(); - let input = TextBufferView::new(&allocator, input.as_bytes()); + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); + let input = TextBufferView::new(context, input.as_bytes()); let mut iter = RawTextAnnotationsIterator::new(input); assert_eq!(iter.next().unwrap()?, RawSymbolRef::Text("foo")); assert_eq!(iter.next().unwrap()?, RawSymbolRef::Text("bar")); diff --git a/src/lazy/value.rs b/src/lazy/value.rs index 02947d3f..ec38c90a 100644 --- a/src/lazy/value.rs +++ b/src/lazy/value.rs @@ -68,13 +68,13 @@ impl<'top, D: Decoder> LazyValue<'top, D> { #[cfg(feature = "experimental-tooling-apis")] pub fn symbol_table(&self) -> &SymbolTable { - self.expanded_value.context.symbol_table + self.expanded_value.context.symbol_table() } // When the `experimental-tooling-apis` feature is disabled, this method is `pub(crate)` #[cfg(not(feature = "experimental-tooling-apis"))] pub(crate) fn symbol_table(&self) -> &SymbolTable { - self.expanded_value.context.symbol_table + self.expanded_value.context.symbol_table() } /// Returns the [`IonType`] of this value. @@ -221,7 +221,7 @@ impl<'top, D: Decoder> LazyValue<'top, D> { pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_value.annotations(), - symbol_table: self.expanded_value.context.symbol_table, + symbol_table: self.expanded_value.context.symbol_table(), } } @@ -275,7 +275,7 @@ impl<'top, D: Decoder> LazyValue<'top, D> { RawSymbolRef::SymbolId(sid) => self .expanded_value .context - .symbol_table + .symbol_table() .symbol_for(sid) .ok_or_else(|| { IonError::decoding_error(format!( diff --git a/src/lib.rs b/src/lib.rs index 9fa8611f..0ecd9128 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -186,7 +186,7 @@ pub(crate) mod unsafe_helpers; #[cfg(feature = "experimental-ion-hash")] pub mod ion_hash; -mod lazy; +pub(crate) mod lazy; mod write_config; pub use crate::lazy::any_encoding::AnyEncoding;