From c769c7db77446ef843bd321db1e507e1c00e4000 Mon Sep 17 00:00:00 2001 From: Anton Romanov Date: Wed, 19 Jun 2024 17:26:39 +0000 Subject: [PATCH] [serde] Implement serde Serializer/Deserializer::is_human_readable Will return true for text encodings, false for binary. is_human_readable is used to determine whether Serialize implementations should serialize in human-readable form. Some types have a human-readable form that may be somewhat expensive to construct, as well as a binary form that is compact and efficient. Fixes #790 --- .../encoder/binary/v1_0/container_writers.rs | 2 ++ src/lazy/encoder/binary/v1_0/value_writer.rs | 2 ++ .../encoder/binary/v1_1/container_writers.rs | 1 + src/lazy/encoder/binary/v1_1/value_writer.rs | 4 +++ src/lazy/encoder/text/v1_0/value_writer.rs | 5 +++ src/lazy/encoder/text/v1_1/value_writer.rs | 4 +++ src/lazy/encoder/value_writer.rs | 5 +++ src/lazy/encoder/writer.rs | 2 ++ src/lazy/never.rs | 2 ++ src/lazy/reader.rs | 10 ++++-- src/lazy/streaming_raw_reader.rs | 27 ++++++++++---- src/lazy/system_reader.rs | 16 ++++----- src/serde/de.rs | 35 +++++++++++++------ src/serde/mod.rs | 22 ++++++++++++ src/serde/ser.rs | 4 +++ 15 files changed, 115 insertions(+), 26 deletions(-) diff --git a/src/lazy/encoder/binary/v1_0/container_writers.rs b/src/lazy/encoder/binary/v1_0/container_writers.rs index 19248a62..e5954274 100644 --- a/src/lazy/encoder/binary/v1_0/container_writers.rs +++ b/src/lazy/encoder/binary/v1_0/container_writers.rs @@ -335,6 +335,8 @@ impl<'value, 'top> MakeValueWriter for BinaryStructWriter_1_0<'value, 'top> { } impl<'value, 'top> StructWriter for BinaryStructWriter_1_0<'value, 'top> { + const IS_HUMAN_READABLE: bool = false; + fn close(self) -> IonResult<()> { self.container_writer.end() } diff --git a/src/lazy/encoder/binary/v1_0/value_writer.rs b/src/lazy/encoder/binary/v1_0/value_writer.rs index e485d71c..d53fde21 100644 --- a/src/lazy/encoder/binary/v1_0/value_writer.rs +++ b/src/lazy/encoder/binary/v1_0/value_writer.rs @@ -288,6 +288,7 @@ impl<'value, 'top> ValueWriter for BinaryValueWriter_1_0<'value, 'top> { type StructWriter = BinaryStructWriter_1_0<'value, 'top>; type EExpWriter = Never; + const IS_HUMAN_READABLE: bool = false; delegate_value_writer_to_self!(); } @@ -411,6 +412,7 @@ impl<'value, 'top> ValueWriter for BinaryAnnotatedValueWriter_1_0<'value, 'top> type SExpWriter = BinarySExpWriter_1_0<'value, 'top>; type StructWriter = BinaryStructWriter_1_0<'value, 'top>; + const IS_HUMAN_READABLE: bool = false; // Ion 1.0 type EExpWriter = Never; diff --git a/src/lazy/encoder/binary/v1_1/container_writers.rs b/src/lazy/encoder/binary/v1_1/container_writers.rs index 178efe65..aff68d0f 100644 --- a/src/lazy/encoder/binary/v1_1/container_writers.rs +++ b/src/lazy/encoder/binary/v1_1/container_writers.rs @@ -345,6 +345,7 @@ impl<'value, 'top> MakeValueWriter for BinaryStructWriter_1_1<'value, 'top> { } impl<'value, 'top> StructWriter for BinaryStructWriter_1_1<'value, 'top> { + const IS_HUMAN_READABLE: bool = false; fn close(mut self) -> IonResult<()> { if let ContainerEncodingKind::Delimited(_) = &mut self.container_writer.encoder { // Write the FlexSym escape (FlexUInt 0). The container writer can emit the closing diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index 38a34d5a..fad7efd6 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -664,6 +664,8 @@ impl<'value, 'top> ValueWriter for BinaryValueWriter_1_1<'value, 'top> { type EExpWriter = BinaryEExpWriter_1_1<'value, 'top>; + const IS_HUMAN_READABLE: bool = false; + delegate_value_writer_to_self!(); } @@ -767,6 +769,8 @@ impl<'value, 'top> ValueWriter for BinaryAnnotatedValueWriter_1_1<'value, 'top> type StructWriter = BinaryStructWriter_1_1<'value, 'top>; type EExpWriter = BinaryEExpWriter_1_1<'value, 'top>; + const IS_HUMAN_READABLE: bool = false; + annotate_and_delegate_1_1!( IonType => write_null, bool => write_bool, diff --git a/src/lazy/encoder/text/v1_0/value_writer.rs b/src/lazy/encoder/text/v1_0/value_writer.rs index 30c97fce..65320cc3 100644 --- a/src/lazy/encoder/text/v1_0/value_writer.rs +++ b/src/lazy/encoder/text/v1_0/value_writer.rs @@ -443,6 +443,7 @@ impl<'value, W: Write> MakeValueWriter for TextStructWriter_1_0<'value, W> { } impl<'value, W: Write> StructWriter for TextStructWriter_1_0<'value, W> { + const IS_HUMAN_READABLE: bool = true; fn close(self) -> IonResult<()> { self.end() } @@ -473,6 +474,8 @@ impl<'value, W: Write + 'value> ValueWriter for TextAnnotatedValueWriter_1_0<'va // Ion 1.0 does not support macros type EExpWriter = Never; + const IS_HUMAN_READABLE: bool = true; + delegate_value_writer_to!(fallible closure |self_: Self| self_.encode_annotations()); } @@ -498,6 +501,8 @@ impl<'value, W: Write> ValueWriter for TextValueWriter_1_0<'value, W> { type SExpWriter = TextSExpWriter_1_0<'value, W>; type StructWriter = TextStructWriter_1_0<'value, W>; + const IS_HUMAN_READABLE: bool = true; + // Ion 1.0 does not support macros type EExpWriter = Never; fn write_null(mut self, ion_type: IonType) -> IonResult<()> { diff --git a/src/lazy/encoder/text/v1_1/value_writer.rs b/src/lazy/encoder/text/v1_1/value_writer.rs index 21a31b1f..091989e7 100644 --- a/src/lazy/encoder/text/v1_1/value_writer.rs +++ b/src/lazy/encoder/text/v1_1/value_writer.rs @@ -46,6 +46,8 @@ impl<'value, W: Write + 'value> ValueWriter for TextValueWriter_1_1<'value, W> { type StructWriter = TextStructWriter_1_1<'value, W>; type EExpWriter = TextEExpWriter_1_1<'value, W>; + const IS_HUMAN_READABLE: bool = true; + // For all of the scalars, delegate to the existing 1.0 writing logic. delegate! { to self.value_writer_1_0 { @@ -121,6 +123,7 @@ impl<'value, W: Write + 'value> ValueWriter for TextAnnotatedValueWriter_1_1<'va type SExpWriter = TextSExpWriter_1_1<'value, W>; type StructWriter = TextStructWriter_1_1<'value, W>; type EExpWriter = TextEExpWriter_1_1<'value, W>; + const IS_HUMAN_READABLE: bool = true; // For all of the scalars, delegate to the existing 1.0 writing logic. delegate! { to self.value_writer_1_0 { @@ -227,6 +230,7 @@ impl<'value, W: Write> MakeValueWriter for TextStructWriter_1_1<'value, W> { } impl<'value, W: Write> StructWriter for TextStructWriter_1_1<'value, W> { + const IS_HUMAN_READABLE: bool = true; fn close(self) -> IonResult<()> { self.writer_1_0.close() } diff --git a/src/lazy/encoder/value_writer.rs b/src/lazy/encoder/value_writer.rs index 3c360b31..b43b0639 100644 --- a/src/lazy/encoder/value_writer.rs +++ b/src/lazy/encoder/value_writer.rs @@ -53,6 +53,7 @@ pub trait ValueWriter: AnnotatableWriter + Sized { type SExpWriter: SequenceWriter; type StructWriter: StructWriter; type EExpWriter: EExpWriter; + const IS_HUMAN_READABLE: bool; fn write_null(self, ion_type: IonType) -> IonResult<()>; fn write_bool(self, value: bool) -> IonResult<()>; @@ -236,6 +237,7 @@ impl<'field, StructWriterType: StructWriter> ValueWriter for FieldWriter<'field, <::ValueWriter<'field> as ValueWriter>::StructWriter; type EExpWriter = <::ValueWriter<'field> as ValueWriter>::EExpWriter; + const IS_HUMAN_READABLE: bool = StructWriterType::IS_HUMAN_READABLE; delegate_value_writer_to!(fallible closure |self_: Self| { self_.struct_writer.encode_field_name(self_.name)?; @@ -287,6 +289,8 @@ impl<'field, StructWriterType: StructWriter> AnnotatableWriter impl<'field, StructWriterType: StructWriter> ValueWriter for AnnotatedFieldWriter<'field, StructWriterType> { + const IS_HUMAN_READABLE: bool = StructWriterType::IS_HUMAN_READABLE; + type ListWriter = <<::ValueWriter<'field> as AnnotatableWriter>::AnnotatedValueWriter<'field> as ValueWriter>::ListWriter; type SExpWriter = @@ -304,6 +308,7 @@ impl<'field, StructWriterType: StructWriter> ValueWriter } pub trait StructWriter: FieldEncoder + MakeValueWriter + Sized { + const IS_HUMAN_READABLE: bool; /// Writes a struct field using the provided name/value pair. fn write( &mut self, diff --git a/src/lazy/encoder/writer.rs b/src/lazy/encoder/writer.rs index 2ca322fb..d42992b4 100644 --- a/src/lazy/encoder/writer.rs +++ b/src/lazy/encoder/writer.rs @@ -251,6 +251,7 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> { type SExpWriter = ApplicationSExpWriter<'value, V>; type StructWriter = ApplicationStructWriter<'value, V>; type EExpWriter = ApplicationEExpWriter<'value, V>; + const IS_HUMAN_READABLE: bool = V::IS_HUMAN_READABLE; delegate! { to self.raw_value_writer { @@ -403,6 +404,7 @@ impl<'value, V: ValueWriter> FieldEncoder for ApplicationStructWriter<'value, V> } impl<'value, V: ValueWriter> StructWriter for ApplicationStructWriter<'value, V> { + const IS_HUMAN_READABLE: bool = V::IS_HUMAN_READABLE; fn close(self) -> IonResult<()> { self.raw_struct_writer.close() } diff --git a/src/lazy/never.rs b/src/lazy/never.rs index 80e11dec..7b546b39 100644 --- a/src/lazy/never.rs +++ b/src/lazy/never.rs @@ -68,6 +68,7 @@ impl FieldEncoder for Never { } impl StructWriter for Never { + const IS_HUMAN_READABLE: bool = false; fn close(self) -> IonResult<()> { unreachable!("StructWriter::end in Never") } @@ -102,6 +103,7 @@ impl ValueWriter for Never { type SExpWriter = Never; type StructWriter = Never; type EExpWriter = Never; + const IS_HUMAN_READABLE: bool = false; delegate_value_writer_to_self!(); } diff --git a/src/lazy/reader.rs b/src/lazy/reader.rs index 2978bc97..d52151bc 100644 --- a/src/lazy/reader.rs +++ b/src/lazy/reader.rs @@ -9,7 +9,7 @@ use crate::lazy::text::raw::v1_1::reader::MacroAddress; use crate::lazy::value::LazyValue; use crate::read_config::ReadConfig; use crate::result::IonFailure; -use crate::{IonError, IonResult}; +use crate::{AnyEncoding, IonEncoding, IonError, IonResult}; /// A binary reader that only reads each value that it visits upon request (that is: lazily). /// @@ -71,6 +71,12 @@ pub(crate) enum NextApplicationValue<'top, D: Decoder> { EndOfStream, } +impl Reader { + pub fn detected_encoding(&self) -> IonEncoding { + self.system_reader.detected_encoding() + } +} + impl Reader { /// Returns the next top-level value in the input stream as `Ok(Some(lazy_value))`. /// If there are no more top-level values in the stream, returns `Ok(None)`. @@ -121,7 +127,7 @@ impl Reader { config: impl Into>, ion_data: Input, ) -> IonResult> { - let system_reader = SystemReader::new(config, ion_data); + let system_reader = SystemReader::new(config, ion_data)?; Ok(Reader { system_reader }) } } diff --git a/src/lazy/streaming_raw_reader.rs b/src/lazy/streaming_raw_reader.rs index e9a4a41a..685dbdf0 100644 --- a/src/lazy/streaming_raw_reader.rs +++ b/src/lazy/streaming_raw_reader.rs @@ -47,13 +47,28 @@ pub struct StreamingRawReader { const DEFAULT_IO_BUFFER_SIZE: usize = 4 * 1024; impl StreamingRawReader { - pub fn new(encoding: Encoding, input: Input) -> StreamingRawReader { - StreamingRawReader { + pub fn new(encoding: Encoding, input: Input) -> IonResult> { + let mut me = StreamingRawReader { encoding, input: input.into_data_source().into(), saved_state: Default::default(), stream_position: 0, + }; + me.detect_encoding()?; + Ok(me) + } + + fn detect_encoding<'top>(&'top mut self) -> IonResult<()> { + if self.buffer_is_empty() { + self.pull_more_data_from_source()?; } + + let available_bytes = unsafe { &*self.input.get() }.buffer(); + let reader = + as LazyRawReader<'top, Encoding>>::new(available_bytes); + self.saved_state = reader.save_state(); + + Ok(()) } /// Gets a reference to the data source and tries to fill its buffer. @@ -467,7 +482,7 @@ mod tests { let empty_context = EncodingContext::empty(); let context = empty_context.get_ref(); let ion = ""; - let mut reader = StreamingRawReader::new(AnyEncoding, ion.as_bytes()); + let mut reader = StreamingRawReader::new(AnyEncoding, ion.as_bytes()).unwrap(); // We expect `Ok(EndOfStream)`, not `Err(Incomplete)`. expect_end_of_stream(reader.next(context)?)?; Ok(()) @@ -476,7 +491,7 @@ mod tests { fn read_example_stream(input: impl IonInput) -> IonResult<()> { let empty_context = EncodingContext::empty(); let context = empty_context.get_ref(); - let mut reader = StreamingRawReader::new(AnyEncoding, input); + let mut reader = StreamingRawReader::new(AnyEncoding, input).unwrap(); expect_string(reader.next(context)?, "foo")?; expect_string(reader.next(context)?, "bar")?; expect_string(reader.next(context)?, "baz")?; @@ -524,7 +539,7 @@ mod tests { fn read_invalid_example_stream(input: impl IonInput) -> IonResult<()> { let empty_context = EncodingContext::empty(); let context = empty_context.get_ref(); - let mut reader = StreamingRawReader::new(AnyEncoding, input); + let mut reader = StreamingRawReader::new(AnyEncoding, input).unwrap(); let result = reader.next(context); // Because the input stream is exhausted, the incomplete value is illegal data and raises // a decoding error. @@ -572,7 +587,7 @@ mod tests { // contains incomplete data that could be misinterpreted by a reader. let empty_context = EncodingContext::empty(); let context = empty_context.get_ref(); - let mut reader = StreamingRawReader::new(v1_0::Text, IonStream::new(input)); + let mut reader = StreamingRawReader::new(v1_0::Text, IonStream::new(input)).unwrap(); assert_eq!(reader.next(context)?.expect_ivm()?.version(), (1, 0)); assert_eq!( diff --git a/src/lazy/system_reader.rs b/src/lazy/system_reader.rs index 93e17002..6b8612f3 100644 --- a/src/lazy/system_reader.rs +++ b/src/lazy/system_reader.rs @@ -109,11 +109,11 @@ impl SystemReader { pub fn new( config: impl Into>, input: Input, - ) -> SystemReader { + ) -> IonResult> { let config = config.into(); - let raw_reader = StreamingRawReader::new(config.encoding(), input); + let raw_reader = StreamingRawReader::new(config.encoding(), input)?; let expanding_reader = ExpandingReader::new(raw_reader, config.catalog); - SystemReader { expanding_reader } + Ok(SystemReader { expanding_reader }) } // Returns `true` if the provided [`LazyRawValue`] is a struct whose first annotation is @@ -368,7 +368,7 @@ mod tests { hello "#, )?; - let mut system_reader = SystemReader::new(Binary, ion_data); + let mut system_reader = SystemReader::new(Binary, ion_data)?; loop { match system_reader.next_item()? { SystemStreamItem::VersionMarker(marker) => { @@ -393,7 +393,7 @@ mod tests { ) "#, )?; - let mut system_reader = SystemReader::new(Binary, ion_data); + let mut system_reader = SystemReader::new(Binary, ion_data)?; loop { match system_reader.next_item()? { SystemStreamItem::Value(value) => { @@ -420,7 +420,7 @@ mod tests { } "#, )?; - let mut system_reader = SystemReader::new(Binary, ion_data); + let mut system_reader = SystemReader::new(Binary, ion_data)?; loop { match system_reader.next_item()? { SystemStreamItem::Value(value) => { @@ -441,14 +441,14 @@ mod tests { use crate::{MapCatalog, SharedSymbolTable}; fn system_reader_for(ion: I) -> SystemReader { - SystemReader::new(AnyEncoding, ion) + SystemReader::new(AnyEncoding, ion).unwrap() } fn system_reader_with_catalog_for( input: Input, catalog: impl Catalog + 'static, ) -> SystemReader { - SystemReader::new(AnyEncoding.with_catalog(catalog), input) + SystemReader::new(AnyEncoding.with_catalog(catalog), input).unwrap() } #[test] diff --git a/src/serde/de.rs b/src/serde/de.rs index 6aa3694b..ee3782a3 100644 --- a/src/serde/de.rs +++ b/src/serde/de.rs @@ -10,7 +10,7 @@ use crate::lazy::value_ref::ValueRef; use crate::result::IonFailure; use crate::serde::decimal::TUNNELED_DECIMAL_TYPE_NAME; use crate::serde::timestamp::TUNNELED_TIMESTAMP_TYPE_NAME; -use crate::{Decimal, IonError, IonResult, IonType, Timestamp}; +use crate::{Decimal, IonEncoding, IonError, IonResult, IonType, Timestamp}; /// Generic method that can deserialize an object from any given type /// that implements `IonInput`. @@ -20,19 +20,27 @@ where I: IonInput, { let mut reader = Reader::new(AnyEncoding, input)?; + let detected_encoding = reader.detected_encoding(); let value = reader.expect_next()?; - let value_deserializer = ValueDeserializer::new(&value); + let value_deserializer = ValueDeserializer::new(&value, detected_encoding); T::deserialize(value_deserializer) } #[derive(Clone, Copy)] pub struct ValueDeserializer<'a, 'de> { + detected_encoding: IonEncoding, pub(crate) value: &'a LazyValue<'de, AnyEncoding>, } impl<'a, 'de> ValueDeserializer<'a, 'de> { - pub(crate) fn new(value: &'a LazyValue<'de, AnyEncoding>) -> Self { - Self { value } + pub(crate) fn new( + value: &'a LazyValue<'de, AnyEncoding>, + detected_encoding: IonEncoding, + ) -> Self { + Self { + value, + detected_encoding, + } } fn deserialize_as_sequence>( @@ -41,8 +49,8 @@ impl<'a, 'de> ValueDeserializer<'a, 'de> { ) -> Result>::Error> { use ValueRef::*; match self.value.read()? { - List(l) => visitor.visit_seq(SequenceIterator(l.iter())), - SExp(l) => visitor.visit_seq(SequenceIterator(l.iter())), + List(l) => visitor.visit_seq(SequenceIterator(l.iter(), self.detected_encoding)), + SExp(l) => visitor.visit_seq(SequenceIterator(l.iter(), self.detected_encoding)), _ => IonResult::decoding_error("expected a list or sexp"), } } @@ -51,7 +59,7 @@ impl<'a, 'de> ValueDeserializer<'a, 'de> { visitor: V, ) -> Result>::Error> { let strukt = self.value.read()?.expect_struct()?; - let struct_as_map = StructAsMap::new(strukt.iter()); + let struct_as_map = StructAsMap::new(strukt.iter(), self.detected_encoding); visitor.visit_map(struct_as_map) } @@ -60,6 +68,10 @@ impl<'a, 'de> ValueDeserializer<'a, 'de> { impl<'a, 'de> de::Deserializer<'de> for ValueDeserializer<'a, 'de> { type Error = IonError; + fn is_human_readable(&self) -> bool { + self.detected_encoding.is_text() + } + fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, @@ -426,7 +438,7 @@ impl<'a, 'de> de::Deserializer<'de> for ValueDeserializer<'a, 'de> { } } -pub(crate) struct SequenceIterator(pub(crate) S); +pub(crate) struct SequenceIterator(pub(crate) S, IonEncoding); impl<'de, S> SeqAccess<'de> for SequenceIterator where @@ -441,7 +453,7 @@ where let Some(lazy_value) = self.0.next().transpose()? else { return Ok(None); }; - let deserializer = ValueDeserializer::new(&lazy_value); + let deserializer = ValueDeserializer::new(&lazy_value, self.1); seed.deserialize(deserializer).map(Some) } } @@ -449,13 +461,15 @@ where struct StructAsMap<'de> { iter: StructIterator<'de, AnyEncoding>, current_field: Option>, + detected_encoding: IonEncoding, } impl<'de> StructAsMap<'de> { - pub fn new(iter: StructIterator<'de, AnyEncoding>) -> Self { + pub fn new(iter: StructIterator<'de, AnyEncoding>, detected_encoding: IonEncoding) -> Self { Self { iter, current_field: None, + detected_encoding, } } } @@ -490,6 +504,7 @@ impl<'de> MapAccess<'de> for StructAsMap<'de> { // This method will only be called when `next_key_seed` reported another field, // so we can unwrap this safely. &self.current_field.as_ref().unwrap().value(), + self.detected_encoding, )) } } diff --git a/src/serde/mod.rs b/src/serde/mod.rs index 121b47e8..9ac884f0 100644 --- a/src/serde/mod.rs +++ b/src/serde/mod.rs @@ -204,6 +204,8 @@ pub use ser::{to_pretty, to_string}; #[cfg(test)] #[cfg(feature = "experimental-serde")] mod tests { + use std::net::IpAddr; + use crate::serde::{from_ion, to_pretty, to_string}; use crate::{Decimal, Element, Timestamp}; @@ -211,6 +213,8 @@ mod tests { use serde::{Deserialize, Serialize}; use serde_with::serde_as; + use super::ser::to_binary; + #[test] fn test_struct() { #[serde_as] @@ -356,4 +360,22 @@ mod tests { let expected = String::from("'embedded quotes'"); assert_eq!(expected, from_ion::(i).unwrap()); } + + #[test] + fn human_readable() { + // IpAddr has different repr based on if codec is considered + // human readable or not {true: string, false: byte array} + let ip: IpAddr = "127.0.0.1".parse().unwrap(); + let expected_binary = [ + 224, 1, 0, 234, 235, 129, 131, 216, 134, 113, 3, 135, 179, 130, 86, 52, 233, 129, 138, + 182, 33, 127, 32, 32, 33, 1, + ]; + let expected_s = "\"127.0.0.1\" "; + let binary = to_binary(&ip).unwrap(); + assert_eq!(&binary[..], &expected_binary[..]); + let s = to_string(&ip).unwrap(); + assert_eq!(s, expected_s); + assert_eq!(&from_ion::(s).unwrap(), &ip); + assert_eq!(&from_ion::(binary).unwrap(), &ip); + } } diff --git a/src/serde/ser.rs b/src/serde/ser.rs index 50326bae..7b0fad71 100644 --- a/src/serde/ser.rs +++ b/src/serde/ser.rs @@ -306,6 +306,10 @@ impl<'a, V: ValueWriter + 'a> ser::Serializer for ValueSerializer<'a, V> { .struct_writer()?, }) } + + fn is_human_readable(&self) -> bool { + V::IS_HUMAN_READABLE + } } pub struct SeqWriter {