Skip to content

Commit

Permalink
Improved text reader error reporting (#897)
Browse files Browse the repository at this point in the history
  • Loading branch information
zslayton authored Jan 16, 2025
1 parent a8f169c commit 53561b4
Show file tree
Hide file tree
Showing 10 changed files with 330 additions and 357 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ compact_str = "0.8.0"
chrono = { version = "0.4", default-features = false, features = ["clock", "std", "wasmbind"] }
delegate = "0.12.0"
thiserror = "1.0"
winnow = { version = "0.6", features = ["simd"] }
winnow = { version = "0.6.24", features = ["simd"] }
num-integer = "0.1.44"
num-traits = "0.2"
arrayvec = "0.7"
Expand Down
2 changes: 0 additions & 2 deletions benches/encoding_primitives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ mod benchmark {
println!("# Values: {NUM_VALUES}");

// TODO: For now, these benchmarks only write values that can be serialized in 8 bytes or fewer.
// This is because `VarUInt` has a bug[1] that causes it to encode very large u64s incorrectly.
// [1]: https://github.com/amazon-ion/ion-rust/issues/689
let unsigned_values = generate_unsigned_values(u64::MIN, (2 << 49) - 1);
let signed_values = generate_signed_values(-2 << 49, (2 << 49) - 1);

Expand Down
24 changes: 13 additions & 11 deletions src/lazy/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use crate::lazy::never::Never;
use crate::lazy::text::buffer::{whitespace_and_then, IonParser, TextBuffer};
use crate::lazy::text::encoded_value::EncodedTextValue;
use crate::lazy::text::matched::MatchedValue;
use crate::lazy::text::parse_result::fatal_parse_error;
use crate::lazy::text::raw::r#struct::{LazyRawTextFieldName, RawTextStructIterator};
use crate::lazy::text::raw::reader::LazyRawTextReader_1_0;
use crate::lazy::text::raw::sequence::{
Expand All @@ -42,7 +41,7 @@ use crate::{
};
use std::fmt::Debug;
use std::io;
use winnow::combinator::{alt, opt, separated_pair};
use winnow::combinator::{alt, cut_err, opt, separated_pair};
use winnow::Parser;

/// Marker trait for types that represent an Ion encoding.
Expand Down Expand Up @@ -266,21 +265,21 @@ pub trait TextEncoding<'top>:
fn list_matcher() -> impl IonParser<'top, EncodedTextValue<'top, Self>> {
let make_iter = |buffer: TextBuffer<'top>| RawTextListIterator::<Self>::new(buffer);
let end_matcher = (whitespace_and_then(opt(",")), whitespace_and_then("]")).take();
Self::container_matcher("a list", "[", make_iter, end_matcher)
Self::container_matcher("reading a list", "[", make_iter, end_matcher)
.map(|nested_expr_cache| EncodedTextValue::new(MatchedValue::List(nested_expr_cache)))
}

fn sexp_matcher() -> impl IonParser<'top, EncodedTextValue<'top, Self>> {
let make_iter = |buffer: TextBuffer<'top>| RawTextSExpIterator::<Self>::new(buffer);
let end_matcher = whitespace_and_then(")");
Self::container_matcher("an s-expression", "(", make_iter, end_matcher)
Self::container_matcher("reading an s-expression", "(", make_iter, end_matcher)
.map(|nested_expr_cache| EncodedTextValue::new(MatchedValue::SExp(nested_expr_cache)))
}

fn struct_matcher() -> impl IonParser<'top, EncodedTextValue<'top, Self>> {
let make_iter = |buffer: TextBuffer<'top>| RawTextStructIterator::new(buffer);
let end_matcher = (whitespace_and_then(opt(",")), whitespace_and_then("}")).take();
Self::container_matcher("a struct", "{", make_iter, end_matcher)
Self::container_matcher("reading a struct", "{", make_iter, end_matcher)
.map(|nested_expr_cache| EncodedTextValue::new(MatchedValue::Struct(nested_expr_cache)))
}

Expand Down Expand Up @@ -321,7 +320,7 @@ pub trait TextEncoding<'top>:
return input.incomplete(label);
}
Err(e) => {
return fatal_parse_error(*input, format!("failed to parse {label}: {e:?}"))
return input.invalid(format!("{e}")).context(label).cut();
}
};
// If there are no errors, add the new child expr to the cache.
Expand Down Expand Up @@ -359,9 +358,11 @@ impl<'top> TextEncoding<'top> for TextEncoding_1_0 {
fn field_expr_matcher() -> impl IonParser<'top, LazyRawFieldExpr<'top, Self>> {
// A (name, eexp) pair
separated_pair(
whitespace_and_then(TextBuffer::match_struct_field_name),
whitespace_and_then(":"),
whitespace_and_then(TextBuffer::match_annotated_value::<Self>),
whitespace_and_then(TextBuffer::match_struct_field_name)
.context("matching a struct field name"),
whitespace_and_then(":").context("matching a struct field delimiter (`:`)"),
whitespace_and_then(TextBuffer::match_annotated_value::<Self>)
.context("matching a struct field value"),
)
.map(|(field_name, invocation)| {
LazyRawFieldExpr::NameValue(LazyRawTextFieldName::new(field_name), invocation)
Expand All @@ -384,7 +385,7 @@ impl<'top> TextEncoding<'top> for TextEncoding_1_1 {
}

fn field_expr_matcher() -> impl IonParser<'top, LazyRawFieldExpr<'top, Self>> {
alt((
cut_err(alt((
// A (name, eexp) pair. Check for this first to prevent `(:` from being considered
// the beginning of an s-expression.
separated_pair(
Expand All @@ -407,7 +408,8 @@ impl<'top> TextEncoding<'top> for TextEncoding_1_1 {
}),
// An e-expression
TextBuffer::match_e_expression.map(LazyRawFieldExpr::EExp),
))
)))
.context("matching a struct field")
}
}

Expand Down
Loading

0 comments on commit 53561b4

Please sign in to comment.