From 9af5c75899993d619556972ca2f2f878df1037f2 Mon Sep 17 00:00:00 2001 From: Jason <940334249@qq.com> Date: Tue, 24 Feb 2026 03:47:00 +0800 Subject: [PATCH 01/80] refactor: simplify iterator using cloned().map(Some) (#9449) # Which issue does this PR close? # Rationale for this change Use .cloned().map(Some) instead of .map(|b| Some(b.clone())) for better readability and idiomatic Rust style. # What changes are included in this PR? # Are these changes tested? # Are there any user-facing changes? --- parquet/src/arrow/arrow_reader/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 670f9d80c5a3..1b02c4ae25d3 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -3510,7 +3510,7 @@ pub(crate) mod tests { }) .collect() } - None => values.iter().flatten().map(|b| Some(b.clone())).collect(), + None => values.iter().flatten().cloned().map(Some).collect(), }; data } From ff736e0167348ffdd66d7502614cc7749c8690c4 Mon Sep 17 00:00:00 2001 From: Jason <940334249@qq.com> Date: Tue, 24 Feb 2026 08:53:50 +0800 Subject: [PATCH 02/80] docs(parquet): Fix broken links in README (#9467) Fix missing link in Parquet README --- parquet/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/parquet/README.md b/parquet/README.md index 8317b4dbd4ff..9e4e91d85d73 100644 --- a/parquet/README.md +++ b/parquet/README.md @@ -77,6 +77,7 @@ Please see the [Implementation Status Page] on the [Apache Parquet] website for information on the status of this implementation. [implementation status page]: https://parquet.apache.org/docs/file-format/implementationstatus/ +[apache parquet]: https://parquet.apache.org/ ## License From a2cffdbf85c94e6850b725ce2f9d0f2d9b5ebb32 Mon Sep 17 00:00:00 2001 From: Eyad Ibrahim <159264031+Eyad3skr@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:16:08 +0200 Subject: [PATCH 03/80] Add `NullBuffer::from_unsliced_buffer` helper and refactor call sites (#9411) Implements a helper to replace the pattern of creating a `BooleanBuffer` from an unsliced validity bitmap and filtering by null count. Previously this was done with `BooleanBuffer::new(...)` plus `Some(NullBuffer::new(...)).filter(|n| n.null_count() > 0);` now it is a single call to` NullBuffer::try_from_unsliced(buffer, len)`, which returns `Some(NullBuffer)` when there are nulls and `None` when all values are valid. - Added `try_from_unsliced` in `arrow-buffer/src/buffer/null.rs` with tests for nulls, all valid, all null, empty - Refactor `FixedSizeBinaryArray::try_from_iter_with_size` and `try_from_sparse_iter_with_size` to use it - Refactor `take_nulls` in `arrow-select` to use it Closes #9385 --- arrow-array/src/array/boolean_array.rs | 7 +-- .../src/array/fixed_size_binary_array.rs | 8 +-- arrow-array/src/array/primitive_array.rs | 7 +-- arrow-buffer/src/buffer/null.rs | 55 +++++++++++++++++++ arrow-select/src/take.rs | 8 +-- arrow-string/src/regexp.rs | 3 +- arrow-string/src/substring.rs | 14 ++--- .../src/arrow/array_reader/primitive_array.rs | 2 +- parquet/src/arrow/buffer/view_buffer.rs | 6 +- 9 files changed, 76 insertions(+), 34 deletions(-) diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 79865b88fff6..65e19c80f8e8 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -534,12 +534,7 @@ impl BooleanArray { }); let values = BooleanBuffer::new(val_builder.into(), 0, data_len); - let nulls = Some(NullBuffer::new(BooleanBuffer::new( - null_builder.into(), - 0, - data_len, - ))) - .filter(|n| n.null_count() > 0); + let nulls = NullBuffer::from_unsliced_buffer(null_builder, data_len); BooleanArray::new(values, nulls) } } diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs index f9a4919b2c30..e3f08c066ee0 100644 --- a/arrow-array/src/array/fixed_size_binary_array.rs +++ b/arrow-array/src/array/fixed_size_binary_array.rs @@ -19,7 +19,7 @@ use crate::array::print_long_array; use crate::iterator::FixedSizeBinaryIter; use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, Scalar}; use arrow_buffer::buffer::NullBuffer; -use arrow_buffer::{ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, bit_util}; +use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, bit_util}; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{ArrowError, DataType}; use std::any::Any; @@ -328,8 +328,7 @@ impl FixedSizeBinaryArray { )); } - let null_buf = BooleanBuffer::new(null_buf.into(), 0, len); - let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() > 0); + let nulls = NullBuffer::from_unsliced_buffer(null_buf, len); let size = size.unwrap_or(0) as i32; Ok(Self { @@ -406,8 +405,7 @@ impl FixedSizeBinaryArray { Ok(()) })?; - let null_buf = BooleanBuffer::new(null_buf.into(), 0, len); - let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() > 0); + let nulls = NullBuffer::from_unsliced_buffer(null_buf, len); Ok(Self { data_type: DataType::FixedSizeBinary(size), diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 29189b450a40..d9c8ff66d0cb 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -25,9 +25,7 @@ use crate::timezone::Tz; use crate::trusted_len::trusted_len_unzip; use crate::types::*; use crate::{Array, ArrayAccessor, ArrayRef, Scalar}; -use arrow_buffer::{ - ArrowNativeType, BooleanBuffer, Buffer, NullBuffer, NullBufferBuilder, ScalarBuffer, i256, -}; +use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, NullBufferBuilder, ScalarBuffer, i256}; use arrow_data::bit_iterator::try_for_each_valid_idx; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{ArrowError, DataType}; @@ -1490,8 +1488,7 @@ impl PrimitiveArray { let (null, buffer) = unsafe { trusted_len_unzip(iterator) }; - let nulls = - Some(NullBuffer::new(BooleanBuffer::new(null, 0, len))).filter(|n| n.null_count() > 0); + let nulls = NullBuffer::from_unsliced_buffer(null, len); PrimitiveArray::new(ScalarBuffer::from(buffer), nulls) } } diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs index 64a21d99e830..97034a631ef8 100644 --- a/arrow-buffer/src/buffer/null.rs +++ b/arrow-buffer/src/buffer/null.rs @@ -222,6 +222,15 @@ impl NullBuffer { pub fn buffer(&self) -> &Buffer { self.buffer.inner() } + + /// Create a [`NullBuffer`] from an *unsliced* validity bitmap (`offset = 0` **bits**) of length `len`. + /// + /// Returns `None` if there are no nulls (all values valid). + pub fn from_unsliced_buffer(buffer: impl Into, len: usize) -> Option { + let bb = BooleanBuffer::new(buffer.into(), 0, len); + let nb = NullBuffer::new(bb); + (nb.null_count() > 0).then_some(nb) + } } impl<'a> IntoIterator for &'a NullBuffer { @@ -266,6 +275,7 @@ impl FromIterator for NullBuffer { #[cfg(test)] mod tests { use super::*; + #[test] fn test_size() { // This tests that the niche optimisation eliminates the overhead of an option @@ -274,4 +284,49 @@ mod tests { std::mem::size_of::>() ); } + + #[test] + fn test_from_unsliced_buffer_with_nulls() { + // 0b10110010 → null(0), valid(1), null(2), null(3), valid(4), valid(5), null(6), valid(7) + let buf = Buffer::from([0b10110010u8]); + let result = NullBuffer::from_unsliced_buffer(buf, 8); + assert!(result.is_some()); + let nb = result.unwrap(); + assert_eq!(nb.len(), 8); + assert_eq!(nb.null_count(), 4); + assert!(nb.is_null(0)); + assert!(nb.is_valid(1)); + assert!(nb.is_null(2)); + assert!(nb.is_null(3)); + assert!(nb.is_valid(4)); + assert!(nb.is_valid(5)); + assert!(nb.is_null(6)); + assert!(nb.is_valid(7)); + } + + #[test] + fn test_from_unsliced_buffer_all_valid() { + // All bits set = all valid, no nulls + let buf = Buffer::from([0b11111111u8]); + let result = NullBuffer::from_unsliced_buffer(buf, 8); + assert!(result.is_none()); + } + + #[test] + fn test_from_unsliced_buffer_all_null() { + // No bits set = all null + let buf = Buffer::from([0b00000000u8]); + let result = NullBuffer::from_unsliced_buffer(buf, 8); + assert!(result.is_some()); + let nb = result.unwrap(); + assert_eq!(nb.len(), 8); + assert_eq!(nb.null_count(), 8); + } + + #[test] + fn test_from_unsliced_buffer_empty() { + let buf = Buffer::from([]); + let result = NullBuffer::from_unsliced_buffer(buf, 0); + assert!(result.is_none()); + } } diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index 3e34e794f11f..43c13e66fb0e 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -415,10 +415,10 @@ fn take_nulls( indices: &PrimitiveArray, ) -> Option { match values.filter(|n| n.null_count() > 0) { - Some(n) => { - let buffer = take_bits(n.inner(), indices); - Some(NullBuffer::new(buffer)).filter(|n| n.null_count() > 0) - } + Some(n) => NullBuffer::from_unsliced_buffer( + take_bits(n.inner(), indices).into_inner(), + indices.len(), + ), None => indices.nulls().cloned(), } } diff --git a/arrow-string/src/regexp.rs b/arrow-string/src/regexp.rs index ad678598ea6c..07520a209095 100644 --- a/arrow-string/src/regexp.rs +++ b/arrow-string/src/regexp.rs @@ -203,8 +203,7 @@ where let nulls = array .nulls() .map(|n| n.inner().sliced()) - .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, array.len()))) - .filter(|n| n.null_count() > 0); + .and_then(|b| NullBuffer::from_unsliced_buffer(b, array.len())); Ok(BooleanArray::new(values, nulls)) } diff --git a/arrow-string/src/substring.rs b/arrow-string/src/substring.rs index 96858ee11763..05b3888a444a 100644 --- a/arrow-string/src/substring.rs +++ b/arrow-string/src/substring.rs @@ -22,7 +22,7 @@ use arrow_array::builder::BufferBuilder; use arrow_array::types::*; use arrow_array::*; -use arrow_buffer::{ArrowNativeType, BooleanBuffer, MutableBuffer, NullBuffer, OffsetBuffer}; +use arrow_buffer::{ArrowNativeType, MutableBuffer, NullBuffer, OffsetBuffer}; use arrow_schema::{ArrowError, DataType}; use num_traits::Zero; use std::cmp::Ordering; @@ -216,8 +216,7 @@ pub fn substring_by_char( let nulls = array .nulls() .map(|n| n.inner().sliced()) - .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, array.len()))) - .filter(|n| n.null_count() > 0); + .and_then(|b| NullBuffer::from_unsliced_buffer(b, array.len())); Ok(GenericStringArray::::new( offsets, values, nulls, )) @@ -318,8 +317,7 @@ where let nulls = array .nulls() .map(|n| n.inner().sliced()) - .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, array.len()))) - .filter(|n| n.null_count() > 0); + .and_then(|b| NullBuffer::from_unsliced_buffer(b, array.len())); Ok(Arc::new(GenericByteArray::::new(offsets, values, nulls))) } @@ -356,8 +354,8 @@ fn fixed_size_binary_substring( let mut nulls = array .nulls() .map(|n| n.inner().sliced()) - .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, num_of_elements))) - .filter(|n| n.null_count() > 0); + .and_then(|b| NullBuffer::from_unsliced_buffer(b, num_of_elements)); + if new_len == 0 && nulls.is_none() { // FixedSizeBinaryArray::new takes length from the values buffer, except when size == 0. // In that case it uses the null buffer length, so preserve the original length here. @@ -365,6 +363,7 @@ fn fixed_size_binary_substring( // otherwise it collapses to an empty array (len=0). nulls = Some(NullBuffer::new_valid(num_of_elements)); } + Ok(Arc::new(FixedSizeBinaryArray::new( new_len, new_values.into(), @@ -375,6 +374,7 @@ fn fixed_size_binary_substring( #[cfg(test)] mod tests { use super::*; + use arrow_buffer::BooleanBuffer; use arrow_buffer::Buffer; /// A helper macro to generate test cases. diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs index dae42c4c7124..e1c944f60c42 100644 --- a/parquet/src/arrow/array_reader/primitive_array.rs +++ b/parquet/src/arrow/array_reader/primitive_array.rs @@ -163,7 +163,7 @@ where let nulls = self .record_reader .consume_bitmap_buffer() - .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, len))); + .and_then(|b| NullBuffer::from_unsliced_buffer(b, len)); let array: ArrayRef = match T::get_physical_type() { PhysicalType::BOOLEAN => Arc::new(BooleanArray::new( diff --git a/parquet/src/arrow/buffer/view_buffer.rs b/parquet/src/arrow/buffer/view_buffer.rs index 1cccfd0f1b20..a93674663f7b 100644 --- a/parquet/src/arrow/buffer/view_buffer.rs +++ b/parquet/src/arrow/buffer/view_buffer.rs @@ -17,7 +17,7 @@ use crate::arrow::record_reader::buffer::ValuesBuffer; use arrow_array::{ArrayRef, BinaryViewArray, StringViewArray}; -use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer, ScalarBuffer}; +use arrow_buffer::{Buffer, NullBuffer, ScalarBuffer}; use arrow_schema::DataType as ArrowType; use std::sync::Arc; @@ -56,9 +56,7 @@ impl ViewBuffer { pub fn into_array(self, null_buffer: Option, data_type: &ArrowType) -> ArrayRef { let len = self.views.len(); let views = ScalarBuffer::from(self.views); - let nulls = null_buffer - .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, len))) - .filter(|n| n.null_count() != 0); + let nulls = null_buffer.and_then(|b| NullBuffer::from_unsliced_buffer(b, len)); match data_type { ArrowType::Utf8View => { // Safety: views were created correctly, and checked that the data is utf8 when building the buffer From 2bf6909305091c69edddb0f16c76184edd206141 Mon Sep 17 00:00:00 2001 From: Konstantin Tarasov <33369833+sdf-jkl@users.noreply.github.com> Date: Wed, 25 Feb 2026 16:56:53 -0500 Subject: [PATCH 04/80] Add list-like types support to VariantArray::try_new (#9457) # Which issue does this PR close? - Closes #9455. # Rationale for this change check issue # What changes are included in this PR? Added list types support to `VariantArray` data type checking # Are these changes tested? # Are there any user-facing changes? --- parquet-variant-compute/src/variant_array.rs | 110 +++++++++++++++++-- 1 file changed, 102 insertions(+), 8 deletions(-) diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs index 250852d021bd..145de5edfb70 100644 --- a/parquet-variant-compute/src/variant_array.rs +++ b/parquet-variant-compute/src/variant_array.rs @@ -1181,16 +1181,23 @@ fn canonicalize_and_verify_data_type(data_type: &DataType) -> Result borrow!(), FixedSizeBinary(_) | FixedSizeList(..) => fail!(), - // We can _possibly_ allow (some of) these some day? - ListView(_) | LargeList(_) | LargeListView(_) => { - fail!() - } - - // Lists and struct are allowed, maps and unions are not + // List-like containers and struct are allowed, maps and unions are not List(field) => match canonicalize_and_verify_field(field)? { Cow::Borrowed(_) => borrow!(), Cow::Owned(new_field) => Cow::Owned(DataType::List(new_field)), }, + LargeList(field) => match canonicalize_and_verify_field(field)? { + Cow::Borrowed(_) => borrow!(), + Cow::Owned(new_field) => Cow::Owned(DataType::LargeList(new_field)), + }, + ListView(field) => match canonicalize_and_verify_field(field)? { + Cow::Borrowed(_) => borrow!(), + Cow::Owned(new_field) => Cow::Owned(DataType::ListView(new_field)), + }, + LargeListView(field) => match canonicalize_and_verify_field(field)? { + Cow::Borrowed(_) => borrow!(), + Cow::Owned(new_field) => Cow::Owned(DataType::LargeListView(new_field)), + }, // Struct is used by the internal layout, and can also represent a shredded variant object. Struct(fields) => { // Avoid allocation unless at least one field changes, to avoid unnecessary deep cloning @@ -1235,9 +1242,10 @@ mod test { use super::*; use arrow::array::{ - BinaryViewArray, Decimal32Array, Decimal64Array, Decimal128Array, Int32Array, - Time64MicrosecondArray, + BinaryViewArray, Decimal32Array, Decimal64Array, Decimal128Array, Int32Array, Int64Array, + LargeListArray, LargeListViewArray, ListArray, ListViewArray, Time64MicrosecondArray, }; + use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow_schema::{Field, Fields}; use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, ShortString}; @@ -1335,6 +1343,17 @@ mod test { Arc::new(Int32Array::from(vec![1])) } + fn make_variant_struct_with_typed_value(typed_value: ArrayRef) -> StructArray { + let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n( + EMPTY_VARIANT_METADATA_BYTES, + typed_value.len(), + )); + StructArrayBuilder::new() + .with_field("metadata", Arc::new(metadata), false) + .with_field("typed_value", typed_value, true) + .build() + } + #[test] fn all_null_shredding_state() { // Verify the shredding state is AllNull @@ -1420,6 +1439,81 @@ mod test { )); } + #[test] + fn canonicalize_and_verify_list_like_data_types() { + // `parquet/tests/variant_integration.rs` validates Parquet shredded-variant fixtures that + // use Parquet LIST encoding, but those fixtures do not cover Arrow-specific list container + // variants (`LargeList`, `ListView`, `LargeListView`) accepted by `VariantArray::try_new`. + let make_item_binary = || Arc::new(Field::new("item", DataType::Binary, true)); + let make_item_binary_view = || Arc::new(Field::new("item", DataType::BinaryView, true)); + + let cases = vec![ + ( + DataType::LargeList(make_item_binary()), + DataType::LargeList(make_item_binary_view()), + ), + ( + DataType::ListView(make_item_binary()), + DataType::ListView(make_item_binary_view()), + ), + ( + DataType::LargeListView(make_item_binary()), + DataType::LargeListView(make_item_binary_view()), + ), + ]; + + for (input, expected) in cases { + assert_eq!( + canonicalize_and_verify_data_type(&input).unwrap().as_ref(), + &expected + ); + } + } + + #[test] + fn variant_array_try_new_supports_list_like_typed_value() { + let item_field = Arc::new(Field::new("item", DataType::Int64, true)); + let values: ArrayRef = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])); + + let typed_values = vec![ + Arc::new(ListArray::new( + item_field.clone(), + OffsetBuffer::new(ScalarBuffer::from(vec![0, 2, 3])), + values.clone(), + None, + )) as ArrayRef, + Arc::new(LargeListArray::new( + item_field.clone(), + OffsetBuffer::new(ScalarBuffer::from(vec![0_i64, 2, 3])), + values.clone(), + None, + )) as ArrayRef, + Arc::new(ListViewArray::new( + item_field.clone(), + ScalarBuffer::from(vec![0, 2]), + ScalarBuffer::from(vec![2, 1]), + values.clone(), + None, + )) as ArrayRef, + Arc::new(LargeListViewArray::new( + item_field, + ScalarBuffer::from(vec![0_i64, 2]), + ScalarBuffer::from(vec![2_i64, 1]), + values, + None, + )) as ArrayRef, + ]; + + for typed_value in typed_values { + let input = make_variant_struct_with_typed_value(typed_value.clone()); + let variant_array = VariantArray::try_new(&input).unwrap(); + assert_eq!( + variant_array.typed_value_field().unwrap().data_type(), + typed_value.data_type(), + ); + } + } + #[test] fn test_variant_array_iterable() { let mut b = VariantArrayBuilder::new(6); From 183f8c1c5361ac5f026d6fbfa8e99a2920dcb652 Mon Sep 17 00:00:00 2001 From: Bruno Date: Fri, 27 Feb 2026 14:34:49 +0100 Subject: [PATCH 05/80] Add PrimitiveRunBuilder::with_data_type() to customize the values' DataType (#9473) This enables setting a timezone or precision & scale on parameterized DataType values. Note: I think the panic is unfortunate, and a try_with_data_type() would be sensible. # Which issue does this PR close? - Closes https://github.com/apache/arrow-rs/issues/8042. # Are these changes tested? Yes # Are there any user-facing changes? - Adds `PrimitiveRunBuilder::with_data_type`. --- .../src/builder/primitive_run_builder.rs | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/arrow-array/src/builder/primitive_run_builder.rs b/arrow-array/src/builder/primitive_run_builder.rs index 52bdaa6f40e4..c1dc0d8d7d4b 100644 --- a/arrow-array/src/builder/primitive_run_builder.rs +++ b/arrow-array/src/builder/primitive_run_builder.rs @@ -108,6 +108,20 @@ where prev_run_end_index: 0, } } + + /// Overrides the data type of the values child array. + /// + /// By default, `V::DATA_TYPE` is used (via [`PrimitiveBuilder`]). This + /// allows setting the timezone of a Timestamp, the precision & scale of a + /// Decimal, etc. + /// + /// # Panics + /// + /// This method panics if `values_builder` rejects `data_type`. + pub fn with_data_type(mut self, data_type: arrow_schema::DataType) -> Self { + self.values_builder = self.values_builder.with_data_type(data_type); + self + } } impl ArrayBuilder for PrimitiveRunBuilder @@ -259,10 +273,12 @@ where #[cfg(test)] mod tests { + use arrow_schema::DataType; + use crate::builder::PrimitiveRunBuilder; use crate::cast::AsArray; - use crate::types::{Int16Type, UInt32Type}; - use crate::{Array, UInt32Array}; + use crate::types::{Decimal128Type, Int16Type, TimestampMicrosecondType, UInt32Type}; + use crate::{Array, Decimal128Array, TimestampMicrosecondArray, UInt32Array}; #[test] fn test_primitive_ree_array_builder() { @@ -310,4 +326,38 @@ mod tests { &[1, 2, 5, 4, 6, 2] ); } + + #[test] + #[should_panic] + fn test_override_data_type_invalid() { + PrimitiveRunBuilder::::new().with_data_type(DataType::UInt64); + } + + #[test] + fn test_override_data_type() { + // Noop. + PrimitiveRunBuilder::::new().with_data_type(DataType::UInt32); + + // Setting scale & precision. + let mut builder = PrimitiveRunBuilder::::new() + .with_data_type(DataType::Decimal128(1, 2)); + builder.append_value(123); + let array = builder.finish(); + let array = array.downcast::().unwrap(); + let values = array.values(); + assert_eq!(values.precision(), 1); + assert_eq!(values.scale(), 2); + + // Setting timezone. + let mut builder = PrimitiveRunBuilder::::new() + .with_data_type(DataType::Timestamp( + arrow_schema::TimeUnit::Microsecond, + Some("Europe/Paris".into()), + )); + builder.append_value(1); + let array = builder.finish(); + let array = array.downcast::().unwrap(); + let values = array.values(); + assert_eq!(values.timezone(), Some("Europe/Paris")); + } } From ae934888bb87196d272340bc528e93dd516bc9e6 Mon Sep 17 00:00:00 2001 From: Mikhail Zabaluev Date: Fri, 27 Feb 2026 20:09:41 +0200 Subject: [PATCH 06/80] fix: resolution of complex type variants in Avro unions (#9328) # Which issue does this PR close? - Closes #9336 # Rationale for this change When an Avro reader schema has a union type that needs to be resolved against the type in the writer schema, resolution information other than primitive type promotions is not properly handled when creating the decoder. For example, when the reader schema has a nullable record field that has an added nested field on top of the fields defined in the writer schema, the record type resolution needs to be applied, using a projection with the default field value. # What changes are included in this PR? Extend the union resolution information in the decoder with variant data for enum remapping and record projection. The `Projector` data structure with `Skipper` decoders makes part of this information, which necessitated some refactoring. # Are these changes tested? TODO: - [x] Debug failing tests including a busy-loop failure mode. - [ ] Add more unit tests exercising the complex resolutions. # Are there any user-facing changes? No. --- arrow-avro/src/codec.rs | 415 ++++++++++++++++---- arrow-avro/src/reader/mod.rs | 361 +++++++++++++++-- arrow-avro/src/reader/record.rs | 661 ++++++++++++++++++++------------ arrow-avro/src/schema.rs | 28 +- 4 files changed, 1097 insertions(+), 368 deletions(-) diff --git a/arrow-avro/src/codec.rs b/arrow-avro/src/codec.rs index d54c6602dad6..d20a71425d3e 100644 --- a/arrow-avro/src/codec.rs +++ b/arrow-avro/src/codec.rs @@ -141,7 +141,7 @@ impl Display for Promotion { pub(crate) struct ResolvedUnion { /// For each writer branch index, the reader branch index and how to read it. /// `None` means the writer branch doesn't resolve against the reader. - pub(crate) writer_to_reader: Arc<[Option<(usize, Promotion)>]>, + pub(crate) writer_to_reader: Arc<[Option<(usize, ResolutionInfo)>]>, /// Whether the writer schema at this site is a union pub(crate) writer_is_union: bool, /// Whether the reader schema at this site is a union @@ -1748,9 +1748,21 @@ impl<'a> Maker<'a> { nullable_union_variants(writer_variants), nullable_union_variants(reader_variants), ) { - (Some((w_nb, w_nonnull)), Some((_r_nb, r_nonnull))) => { - let mut dt = self.make_data_type(w_nonnull, Some(r_nonnull), namespace)?; + (Some((w_nb, w_nonnull)), Some((r_nb, r_nonnull))) => { + let mut dt = self.resolve_type(w_nonnull, r_nonnull, namespace)?; + let mut writer_to_reader = vec![None, None]; + writer_to_reader[w_nb.non_null_index()] = Some(( + r_nb.non_null_index(), + dt.resolution + .take() + .unwrap_or(ResolutionInfo::Promotion(Promotion::Direct)), + )); dt.nullability = Some(w_nb); + dt.resolution = Some(ResolutionInfo::Union(ResolvedUnion { + writer_to_reader: Arc::from(writer_to_reader), + writer_is_union: true, + reader_is_union: true, + })); #[cfg(feature = "avro_custom_types")] Self::propagate_nullability_into_ree(&mut dt, w_nb); Ok(dt) @@ -1759,12 +1771,17 @@ impl<'a> Maker<'a> { } } (Schema::Union(writer_variants), reader_non_union) => { - let writer_to_reader: Vec> = writer_variants + let writer_to_reader: Vec> = writer_variants .iter() .map(|writer| { self.resolve_type(writer, reader_non_union, namespace) .ok() - .map(|tmp| (0usize, Self::coercion_from(&tmp))) + .map(|tmp| { + let resolution = tmp + .resolution + .unwrap_or(ResolutionInfo::Promotion(Promotion::Direct)); + (0usize, resolution) + }) }) .collect(); let mut dt = self.parse_type(reader_non_union, namespace)?; @@ -1780,54 +1797,44 @@ impl<'a> Maker<'a> { nullable_union_variants(reader_variants) { let mut dt = self.resolve_type(writer_non_union, non_null_branch, namespace)?; - let non_null_idx = match nullability { - Nullability::NullFirst => 1, - Nullability::NullSecond => 0, - }; #[cfg(feature = "avro_custom_types")] Self::propagate_nullability_into_ree(&mut dt, nullability); dt.nullability = Some(nullability); - let promotion = Self::coercion_from(&dt); - dt.resolution = Some(ResolutionInfo::Union(ResolvedUnion { - writer_to_reader: Arc::from(vec![Some((non_null_idx, promotion))]), - writer_is_union: false, - reader_is_union: true, - })); + // Ensure resolution is set to a non-Union variant to suppress + // reading the union tag which is the default behavior. + if dt.resolution.is_none() { + dt.resolution = Some(ResolutionInfo::Promotion(Promotion::Direct)); + } Ok(dt) } else { - let mut best_match: Option<(usize, AvroDataType, Promotion)> = None; - for (i, variant) in reader_variants.iter().enumerate() { - if let Ok(resolved_dt) = - self.resolve_type(writer_non_union, variant, namespace) - { - let promotion = Self::coercion_from(&resolved_dt); - if promotion == Promotion::Direct { - best_match = Some((i, resolved_dt, promotion)); - break; - } else if best_match.is_none() { - best_match = Some((i, resolved_dt, promotion)); - } - } - } - let Some((match_idx, match_dt, promotion)) = best_match else { + let Some((match_idx, mut match_dt)) = + self.find_best_union_match(writer_non_union, reader_variants, namespace) + else { return Err(ArrowError::SchemaError( "Writer schema does not match any reader union branch".to_string(), )); }; - let mut children = Vec::with_capacity(reader_variants.len()); + // Steal the resolution info from the matching reader branch + // for the Union resolution, but preserve possible resolution + // information on its inner types. + // For other branches, resolution is irrelevant, + // so just parse them. + let resolution = match_dt + .resolution + .take() + .unwrap_or(ResolutionInfo::Promotion(Promotion::Direct)); let mut match_dt = Some(match_dt); - for (i, variant) in reader_variants.iter().enumerate() { - if i == match_idx { - if let Some(mut dt) = match_dt.take() { - if matches!(dt.resolution, Some(ResolutionInfo::Promotion(_))) { - dt.resolution = None; - } - children.push(dt); + let children = reader_variants + .iter() + .enumerate() + .map(|(idx, variant)| { + if idx == match_idx { + Ok(match_dt.take().unwrap()) + } else { + self.parse_type(variant, namespace) } - } else { - children.push(self.parse_type(variant, namespace)?); - } - } + }) + .collect::, _>>()?; let union_fields = build_union_fields(&children)?; let mut dt = AvroDataType::new( Codec::Union(children.into(), union_fields, UnionMode::Dense), @@ -1835,7 +1842,7 @@ impl<'a> Maker<'a> { None, ); dt.resolution = Some(ResolutionInfo::Union(ResolvedUnion { - writer_to_reader: Arc::from(vec![Some((match_idx, promotion))]), + writer_to_reader: Arc::from(vec![Some((match_idx, resolution))]), writer_is_union: false, reader_is_union: true, })); @@ -1870,34 +1877,30 @@ impl<'a> Maker<'a> { } } - #[inline] - fn coercion_from(dt: &AvroDataType) -> Promotion { - match dt.resolution.as_ref() { - Some(ResolutionInfo::Promotion(promotion)) => *promotion, - _ => Promotion::Direct, - } - } - - fn find_best_promotion( + fn find_best_union_match( &mut self, writer: &Schema<'a>, reader_variants: &[Schema<'a>], namespace: Option<&'a str>, - ) -> Option<(usize, Promotion)> { - let mut first_promotion: Option<(usize, Promotion)> = None; + ) -> Option<(usize, AvroDataType)> { + let mut first_resolution = None; for (reader_index, reader) in reader_variants.iter().enumerate() { - if let Ok(tmp) = self.resolve_type(writer, reader, namespace) { - let promotion = Self::coercion_from(&tmp); - if promotion == Promotion::Direct { - // An exact match is best, return immediately. - return Some((reader_index, promotion)); - } else if first_promotion.is_none() { - // Store the first valid promotion but keep searching for a direct match. - first_promotion = Some((reader_index, promotion)); - } + if let Ok(dt) = self.resolve_type(writer, reader, namespace) { + match &dt.resolution { + None | Some(ResolutionInfo::Promotion(Promotion::Direct)) => { + // An exact match is best, return immediately. + return Some((reader_index, dt)); + } + Some(_) => { + if first_resolution.is_none() { + // Store the first valid promotion but keep searching for a direct match. + first_resolution = Some((reader_index, dt)); + } + } + }; } } - first_promotion + first_resolution } fn resolve_unions<'s>( @@ -1906,15 +1909,34 @@ impl<'a> Maker<'a> { reader_variants: &'s [Schema<'a>], namespace: Option<&'a str>, ) -> Result { + let mut resolved_reader_encodings = HashMap::new(); + let writer_to_reader: Vec> = writer_variants + .iter() + .map(|writer| { + self.find_best_union_match(writer, reader_variants, namespace) + .map(|(match_idx, mut match_dt)| { + let resolution = match_dt + .resolution + .take() + .unwrap_or(ResolutionInfo::Promotion(Promotion::Direct)); + // TODO: check for overlapping reader variants? + // They should not be possible in a valid schema. + resolved_reader_encodings.insert(match_idx, match_dt); + (match_idx, resolution) + }) + }) + .collect(); let reader_encodings: Vec = reader_variants .iter() - .map(|reader_schema| self.parse_type(reader_schema, namespace)) + .enumerate() + .map(|(reader_idx, reader_schema)| { + if let Some(resolved) = resolved_reader_encodings.remove(&reader_idx) { + Ok(resolved) + } else { + self.parse_type(reader_schema, namespace) + } + }) .collect::>()?; - let mut writer_to_reader: Vec> = - Vec::with_capacity(writer_variants.len()); - for writer in writer_variants { - writer_to_reader.push(self.find_best_promotion(writer, reader_variants, namespace)); - } let union_fields = build_union_fields(&reader_encodings)?; let mut dt = AvroDataType::new( Codec::Union(reader_encodings.into(), union_fields, UnionMode::Dense), @@ -2179,7 +2201,14 @@ impl<'a> Maker<'a> { )?; let writer_ns = writer_record.namespace.or(namespace); let reader_ns = reader_record.namespace.or(namespace); - let reader_md = reader_record.attributes.field_metadata(); + let mut reader_md = reader_record.attributes.field_metadata(); + reader_md.insert( + AVRO_NAME_METADATA_KEY.to_string(), + reader_record.name.to_string(), + ); + if let Some(ns) = reader_ns { + reader_md.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), ns.to_string()); + } // Build writer lookup and ambiguous alias set. let (writer_lookup, ambiguous_writer_aliases) = Self::build_writer_lookup(writer_record); let mut writer_to_reader: Vec> = vec![None; writer_record.fields.len()]; @@ -2620,7 +2649,15 @@ mod tests { assert!(matches!(result.codec, Codec::Float64)); assert_eq!( result.resolution, - Some(ResolutionInfo::Promotion(Promotion::IntToDouble)) + Some(ResolutionInfo::Union(ResolvedUnion { + writer_to_reader: [ + None, + Some((0, ResolutionInfo::Promotion(Promotion::IntToDouble))) + ] + .into(), + writer_is_union: true, + reader_is_union: true, + })) ); assert_eq!(result.nullability, Some(Nullability::NullFirst)); } @@ -2642,7 +2679,10 @@ mod tests { assert!(resolved.writer_is_union && !resolved.reader_is_union); assert_eq!( resolved.writer_to_reader.as_ref(), - &[Some((0, Promotion::StringToBytes)), None] + &[ + Some((0, ResolutionInfo::Promotion(Promotion::StringToBytes))), + None + ] ); } @@ -2662,7 +2702,7 @@ mod tests { assert!(!resolved.writer_is_union && resolved.reader_is_union); assert_eq!( resolved.writer_to_reader.as_ref(), - &[Some((0, Promotion::Direct))] + &[Some((0, ResolutionInfo::Promotion(Promotion::Direct)))] ); } @@ -2682,7 +2722,200 @@ mod tests { }; assert_eq!( resolved.writer_to_reader.as_ref(), - &[Some((1, Promotion::IntToLong))] + &[Some((1, ResolutionInfo::Promotion(Promotion::IntToLong)))] + ); + } + + #[test] + fn test_resolve_writer_non_union_to_reader_union_preserves_inner_record_defaults() { + // Writer: record Inner{a: int} + // Reader: union [Inner{a: int, b: int default 42}, string] + // The matching child (Inner) should preserve DefaultValue(Int(42)) on field b. + let writer = Schema::Complex(ComplexType::Record(Record { + name: "Inner", + namespace: None, + doc: None, + aliases: vec![], + fields: vec![AvroFieldSchema { + name: "a", + doc: None, + r#type: mk_primitive(PrimitiveType::Int), + default: None, + aliases: vec![], + }], + attributes: Attributes::default(), + })); + let reader = mk_union(vec![ + Schema::Complex(ComplexType::Record(Record { + name: "Inner", + namespace: None, + doc: None, + aliases: vec![], + fields: vec![ + AvroFieldSchema { + name: "a", + doc: None, + r#type: mk_primitive(PrimitiveType::Int), + default: None, + aliases: vec![], + }, + AvroFieldSchema { + name: "b", + doc: None, + r#type: mk_primitive(PrimitiveType::Int), + default: Some(Value::Number(serde_json::Number::from(42))), + aliases: vec![], + }, + ], + attributes: Attributes::default(), + })), + mk_primitive(PrimitiveType::String), + ]); + let mut maker = Maker::new(false, false); + let dt = maker + .make_data_type(&writer, Some(&reader), None) + .expect("resolution should succeed"); + // Verify the union resolution structure + let resolved = match dt.resolution.as_ref() { + Some(ResolutionInfo::Union(u)) => u, + other => panic!("expected union resolution info, got {other:?}"), + }; + assert!(!resolved.writer_is_union && resolved.reader_is_union); + assert_eq!( + resolved.writer_to_reader.len(), + 1, + "expected the non-union record to resolve to a union variant" + ); + let resolution = match resolved.writer_to_reader.first().unwrap() { + Some((0, resolution)) => resolution, + other => panic!("unexpected writer-to-reader table value {other:?}"), + }; + match resolution { + ResolutionInfo::Record(ResolvedRecord { + writer_to_reader, + default_fields, + skip_fields, + }) => { + assert_eq!(writer_to_reader.len(), 1); + assert_eq!(writer_to_reader[0], Some(0)); + assert_eq!(default_fields.len(), 1); + assert_eq!(default_fields[0], 1); + assert_eq!(skip_fields.len(), 1); + assert_eq!(skip_fields[0], None); + } + other => panic!("unexpected resolution {other:?}"), + } + // The matching child (Inner at index 0) should have field b with DefaultValue + let children = match dt.codec() { + Codec::Union(children, _, _) => children, + other => panic!("expected union codec, got {other:?}"), + }; + let inner_fields = match children[0].codec() { + Codec::Struct(f) => f, + other => panic!("expected struct codec for Inner, got {other:?}"), + }; + assert_eq!(inner_fields.len(), 2); + assert_eq!(inner_fields[1].name(), "b"); + assert_eq!( + inner_fields[1].data_type().resolution, + Some(ResolutionInfo::DefaultValue(AvroLiteral::Int(42))), + "field b should have DefaultValue(Int(42)) from schema resolution" + ); + } + + #[test] + fn test_resolve_writer_union_to_reader_union_preserves_inner_record_defaults() { + // Writer: record [string, Inner{a: int}] + // Reader: union [Inner{a: int, b: int default 42}, string] + // The matching child (Inner) should preserve DefaultValue(Int(42)) on field b. + let writer = mk_union(vec![ + mk_primitive(PrimitiveType::String), + Schema::Complex(ComplexType::Record(Record { + name: "Inner", + namespace: None, + doc: None, + aliases: vec![], + fields: vec![AvroFieldSchema { + name: "a", + doc: None, + r#type: mk_primitive(PrimitiveType::Int), + default: None, + aliases: vec![], + }], + attributes: Attributes::default(), + })), + ]); + let reader = mk_union(vec![ + Schema::Complex(ComplexType::Record(Record { + name: "Inner", + namespace: None, + doc: None, + aliases: vec![], + fields: vec![ + AvroFieldSchema { + name: "a", + doc: None, + r#type: mk_primitive(PrimitiveType::Int), + default: None, + aliases: vec![], + }, + AvroFieldSchema { + name: "b", + doc: None, + r#type: mk_primitive(PrimitiveType::Int), + default: Some(Value::Number(serde_json::Number::from(42))), + aliases: vec![], + }, + ], + attributes: Attributes::default(), + })), + mk_primitive(PrimitiveType::String), + ]); + let mut maker = Maker::new(false, false); + let dt = maker + .make_data_type(&writer, Some(&reader), None) + .expect("resolution should succeed"); + // Verify the union resolution structure + let resolved = match dt.resolution.as_ref() { + Some(ResolutionInfo::Union(u)) => u, + other => panic!("expected union resolution info, got {other:?}"), + }; + assert!(resolved.writer_is_union && resolved.reader_is_union); + assert_eq!(resolved.writer_to_reader.len(), 2); + let resolution = match resolved.writer_to_reader[1].as_ref() { + Some((0, resolution)) => resolution, + other => panic!("unexpected writer-to-reader table value {other:?}"), + }; + match resolution { + ResolutionInfo::Record(ResolvedRecord { + writer_to_reader, + default_fields, + skip_fields, + }) => { + assert_eq!(writer_to_reader.len(), 1); + assert_eq!(writer_to_reader[0], Some(0)); + assert_eq!(default_fields.len(), 1); + assert_eq!(default_fields[0], 1); + assert_eq!(skip_fields.len(), 1); + assert_eq!(skip_fields[0], None); + } + other => panic!("unexpected resolution {other:?}"), + } + // The matching child (Inner at index 0) should have field b with DefaultValue + let children = match dt.codec() { + Codec::Union(children, _, _) => children, + other => panic!("expected union codec, got {other:?}"), + }; + let inner_fields = match children[0].codec() { + Codec::Struct(f) => f, + other => panic!("expected struct codec for Inner, got {other:?}"), + }; + assert_eq!(inner_fields.len(), 2); + assert_eq!(inner_fields[1].name(), "b"); + assert_eq!( + inner_fields[1].data_type().resolution, + Some(ResolutionInfo::DefaultValue(AvroLiteral::Int(42))), + "field b should have DefaultValue(Int(42)) from schema resolution" ); } @@ -2700,7 +2933,18 @@ mod tests { let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap(); assert!(matches!(dt.codec(), Codec::Utf8)); assert_eq!(dt.nullability, Some(Nullability::NullFirst)); - assert!(dt.resolution.is_none()); + assert_eq!( + dt.resolution, + Some(ResolutionInfo::Union(ResolvedUnion { + writer_to_reader: [ + None, + Some((0, ResolutionInfo::Promotion(Promotion::Direct))) + ] + .into(), + writer_is_union: true, + reader_is_union: true + })) + ); } #[test] @@ -2719,7 +2963,15 @@ mod tests { assert_eq!(dt.nullability, Some(Nullability::NullFirst)); assert_eq!( dt.resolution, - Some(ResolutionInfo::Promotion(Promotion::IntToDouble)) + Some(ResolutionInfo::Union(ResolvedUnion { + writer_to_reader: [ + None, + Some((0, ResolutionInfo::Promotion(Promotion::IntToDouble))) + ] + .into(), + writer_is_union: true, + reader_is_union: true + })) ); } @@ -3316,14 +3568,7 @@ mod tests { assert_eq!(inner.nullability(), Some(Nullability::NullFirst)); assert!(matches!(inner.codec(), Codec::Int32)); match inner.resolution.as_ref() { - Some(ResolutionInfo::Union(info)) => { - assert!(!info.writer_is_union, "writer should be non-union"); - assert!(info.reader_is_union, "reader should be union"); - assert_eq!( - info.writer_to_reader.as_ref(), - &[Some((1, Promotion::Direct))] - ); - } + Some(ResolutionInfo::Promotion(Promotion::Direct)) => {} other => panic!("expected Union resolution, got {other:?}"), } } else { diff --git a/arrow-avro/src/reader/mod.rs b/arrow-avro/src/reader/mod.rs index aa01f272bfeb..63b61b601e00 100644 --- a/arrow-avro/src/reader/mod.rs +++ b/arrow-avro/src/reader/mod.rs @@ -6866,6 +6866,264 @@ mod test { assert_eq!(int_values.value(1), 2); } + #[test] + fn test_nested_record_field_addition() { + let file = arrow_test_data("avro/nested_records.avro"); + + // Adds fields to the writer schema: + // * "ns2.record2" / "f1_4" + // - nullable + // - added last + // - the containing "f1" field is made nullable in the reader + // * "ns4.record4" / "f2_3" + // - non-nullable with an integer default value + // - resolution of a record nested in an array + // * "ns5.record5" / "f3_0" + // - non-nullable with a string default value + // - prepended before existing fields in the schema order + let reader_schema = AvroSchema::new( + r#" + { + "type": "record", + "name": "record1", + "namespace": "ns1", + "fields": [ + { + "name": "f1", + "type": [ + "null", + { + "type": "record", + "name": "record2", + "namespace": "ns2", + "fields": [ + { + "name": "f1_1", + "type": "string" + }, + { + "name": "f1_2", + "type": "int" + }, + { + "name": "f1_3", + "type": { + "type": "record", + "name": "record3", + "namespace": "ns3", + "fields": [ + { + "name": "f1_3_1", + "type": "double" + } + ] + } + }, + { + "name": "f1_4", + "type": ["null", "int"], + "default": null + } + ] + } + ] + }, + { + "name": "f2", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "record4", + "namespace": "ns4", + "fields": [ + { + "name": "f2_1", + "type": "boolean" + }, + { + "name": "f2_2", + "type": "float" + }, + { + "name": "f2_3", + "type": ["null", "int"], + "default": 42 + } + ] + } + } + }, + { + "name": "f3", + "type": [ + "null", + { + "type": "record", + "name": "record5", + "namespace": "ns5", + "fields": [ + { + "name": "f3_0", + "type": "string", + "default": "lorem ipsum" + }, + { + "name": "f3_1", + "type": "string" + } + ] + } + ], + "default": null + }, + { + "name": "f4", + "type": { + "type": "array", + "items": [ + "null", + { + "type": "record", + "name": "record6", + "namespace": "ns6", + "fields": [ + { + "name": "f4_1", + "type": "long" + } + ] + } + ] + } + } + ] + } + "# + .to_string(), + ); + + let file = File::open(&file).unwrap(); + let mut reader = ReaderBuilder::new() + .with_reader_schema(reader_schema) + .build(BufReader::new(file)) + .expect("reader with evolved reader schema should be built successfully"); + + let batch = reader + .next() + .expect("should have at least one batch") + .expect("reading should succeed"); + + assert!(batch.num_rows() > 0); + + let schema = batch.schema(); + + let f1_field = schema.field_with_name("f1").expect("f1 field should exist"); + if let DataType::Struct(f1_fields) = f1_field.data_type() { + let (_, f1_4) = f1_fields + .find("f1_4") + .expect("f1_4 field should be present in record2"); + assert!(f1_4.is_nullable(), "f1_4 should be nullable"); + assert_eq!(f1_4.data_type(), &DataType::Int32, "f1_4 should be Int32"); + assert_eq!( + f1_4.metadata().get("avro.field.default"), + Some(&"null".to_string()), + "f1_4 should have null default value in metadata" + ); + } else { + panic!("f1 should be a struct"); + } + + let f2_field = schema.field_with_name("f2").expect("f2 field should exist"); + if let DataType::List(f2_items_field) = f2_field.data_type() { + if let DataType::Struct(f2_items_fields) = f2_items_field.data_type() { + let (_, f2_3) = f2_items_fields + .find("f2_3") + .expect("f2_3 field should be present in record4"); + assert!(f2_3.is_nullable(), "f2_3 should be nullable"); + assert_eq!(f2_3.data_type(), &DataType::Int32, "f2_3 should be Int32"); + assert_eq!( + f2_3.metadata().get("avro.field.default"), + Some(&"42".to_string()), + "f2_3 should have 42 default value in metadata" + ); + } else { + panic!("f2 array items should be a struct"); + } + } else { + panic!("f2 should be a list"); + } + + let f3_field = schema.field_with_name("f3").expect("f3 field should exist"); + assert!(f3_field.is_nullable(), "f3 should be nullable"); + if let DataType::Struct(f3_fields) = f3_field.data_type() { + let (_, f3_0) = f3_fields + .find("f3_0") + .expect("f3_0 field should be present in record5"); + assert!(!f3_0.is_nullable(), "f3_0 should be non-nullable"); + assert_eq!(f3_0.data_type(), &DataType::Utf8, "f3_0 should be a string"); + assert_eq!( + f3_0.metadata().get("avro.field.default"), + Some(&"\"lorem ipsum\"".to_string()), + "f3_0 should have \"lorem ipsum\" default value in metadata" + ); + } else { + panic!("f3 should be a struct"); + } + + // Verify the actual values in the columns match the expected defaults + let num_rows = batch.num_rows(); + + // Check f1_4 values (should all be null since default is null) + let f1_array = batch + .column_by_name("f1") + .expect("f1 column should exist") + .as_struct(); + let f1_4_array = f1_array + .column_by_name("f1_4") + .expect("f1_4 column should exist in f1 struct") + .as_primitive::(); + + assert_eq!(f1_4_array.null_count(), num_rows); + + let f2_array = batch + .column_by_name("f2") + .expect("f2 column should exist") + .as_list::(); + + for i in 0..num_rows { + assert!(!f2_array.is_null(i)); + let f2_value = f2_array.value(i); + let f2_record_array = f2_value.as_struct(); + let f2_3_array = f2_record_array + .column_by_name("f2_3") + .expect("f2_3 column should exist in f2 array items") + .as_primitive::(); + + for j in 0..f2_3_array.len() { + assert!(!f2_3_array.is_null(j)); + assert_eq!(f2_3_array.value(j), 42); + } + } + + let f3_array = batch + .column_by_name("f3") + .expect("f3 column should exist") + .as_struct(); + let f3_0_array = f3_array + .column_by_name("f3_0") + .expect("f3_0 column should exist in f3 struct") + .as_string::(); + + for i in 0..num_rows { + // Only check f3_0 when the parent f3 struct is not null + if !f3_array.is_null(i) { + assert!(!f3_0_array.is_null(i)); + assert_eq!(f3_0_array.value(i), "lorem ipsum"); + } + } + } + fn corrupt_first_block_payload_byte( mut bytes: Vec, field_offset: usize, @@ -8441,6 +8699,33 @@ mod test { ])), false, )); + let person_md = { + let mut m = HashMap::::new(); + m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Person".to_string()); + m.insert( + AVRO_NAMESPACE_METADATA_KEY.to_string(), + "com.example".to_string(), + ); + m + }; + let maybe_auth_md = { + let mut m = HashMap::::new(); + m.insert(AVRO_NAME_METADATA_KEY.to_string(), "MaybeAuth".to_string()); + m.insert( + AVRO_NAMESPACE_METADATA_KEY.to_string(), + "org.apache.arrow.avrotests.v1.types".to_string(), + ); + m + }; + let address_md = { + let mut m = HashMap::::new(); + m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Address".to_string()); + m.insert( + AVRO_NAMESPACE_METADATA_KEY.to_string(), + "org.apache.arrow.avrotests.v1.types".to_string(), + ); + m + }; let rec_a_md = { let mut m = HashMap::::new(); m.insert(AVRO_NAME_METADATA_KEY.to_string(), "RecA".to_string()); @@ -8576,11 +8861,18 @@ mod test { true, ), ]); - let kv_item_field = Arc::new(Field::new( - item_name, - DataType::Struct(kv_fields.clone()), - false, - )); + let kv_md = { + let mut m = HashMap::::new(); + m.insert(AVRO_NAME_METADATA_KEY.to_string(), "KV".to_string()); + m.insert( + AVRO_NAMESPACE_METADATA_KEY.to_string(), + "org.apache.arrow.avrotests.v1.types".to_string(), + ); + m + }; + let kv_item_field = Arc::new( + Field::new(item_name, DataType::Struct(kv_fields.clone()), false).with_metadata(kv_md), + ); let map_int_entries = Arc::new(Field::new( "entries", DataType::Struct(Fields::from(vec![ @@ -8652,14 +8944,17 @@ mod test { #[cfg(not(feature = "small_decimals"))] let dec10_dt = DataType::Decimal128(10, 2); let fields: Vec = vec![ - Arc::new(Field::new( - "person", - DataType::Struct(Fields::from(vec![ - Field::new("name", DataType::Utf8, false), - Field::new("age", DataType::Int32, false), - ])), - false, - )), + Arc::new( + Field::new( + "person", + DataType::Struct(Fields::from(vec![ + Field::new("name", DataType::Utf8, false), + Field::new("age", DataType::Int32, false), + ])), + false, + ) + .with_metadata(person_md), + ), Arc::new(Field::new("old_count", DataType::Int32, false)), Arc::new(Field::new( "union_map_or_array_int", @@ -8691,23 +8986,29 @@ mod test { DataType::Union(uf_union_big.clone(), UnionMode::Dense), false, )), - Arc::new(Field::new( - "maybe_auth", - DataType::Struct(Fields::from(vec![ - Field::new("user", DataType::Utf8, false), - Field::new("token", DataType::Binary, true), // [bytes,null] -> nullable bytes - ])), - false, - )), - Arc::new(Field::new( - "address", - DataType::Struct(Fields::from(vec![ - Field::new("street_name", DataType::Utf8, false), - Field::new("zip", DataType::Int32, false), - Field::new("country", DataType::Utf8, false), - ])), - false, - )), + Arc::new( + Field::new( + "maybe_auth", + DataType::Struct(Fields::from(vec![ + Field::new("user", DataType::Utf8, false), + Field::new("token", DataType::Binary, true), // [bytes,null] -> nullable bytes + ])), + false, + ) + .with_metadata(maybe_auth_md), + ), + Arc::new( + Field::new( + "address", + DataType::Struct(Fields::from(vec![ + Field::new("street_name", DataType::Utf8, false), + Field::new("zip", DataType::Int32, false), + Field::new("country", DataType::Utf8, false), + ])), + false, + ) + .with_metadata(address_md), + ), Arc::new(Field::new( "map_union", DataType::Map(map_entries_field.clone(), false), diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs index 7701eeea725a..5e281d1fc6f6 100644 --- a/arrow-avro/src/reader/record.rs +++ b/arrow-avro/src/reader/record.rs @@ -18,7 +18,7 @@ //! Avro Decoder for Arrow types. use crate::codec::{ - AvroDataType, AvroField, AvroLiteral, Codec, Promotion, ResolutionInfo, ResolvedRecord, + AvroDataType, AvroLiteral, Codec, EnumMapping, Promotion, ResolutionInfo, ResolvedRecord, ResolvedUnion, }; use crate::errors::AvroError; @@ -38,22 +38,14 @@ use arrow_schema::{ }; #[cfg(feature = "avro_custom_types")] use arrow_select::take::{TakeOptions, take}; -use std::cmp::Ordering; -use std::sync::Arc; use strum_macros::AsRefStr; use uuid::Uuid; -const DEFAULT_CAPACITY: usize = 1024; +use std::cmp::Ordering; +use std::mem; +use std::sync::Arc; -/// Runtime plan for decoding reader-side `["null", T]` types. -#[derive(Clone, Copy, Debug)] -enum NullablePlan { - /// Writer actually wrote a union (branch tag present). - ReadTag, - /// Writer wrote a single (non-union) value resolved to the non-null branch - /// of the reader union; do NOT read a branch tag, but apply any promotion. - FromSingle { promotion: Promotion }, -} +const DEFAULT_CAPACITY: usize = 1024; /// Macro to decode a decimal payload for a given width and integer type. macro_rules! decode_decimal { @@ -121,13 +113,22 @@ impl RecordDecoder { // Build Arrow schema fields and per-child decoders let mut arrow_fields = Vec::with_capacity(reader_fields.len()); let mut encodings = Vec::with_capacity(reader_fields.len()); + let mut field_defaults = Vec::with_capacity(reader_fields.len()); for avro_field in reader_fields.iter() { arrow_fields.push(avro_field.field()); encodings.push(Decoder::try_new(avro_field.data_type())?); + + if let Some(ResolutionInfo::DefaultValue(lit)) = + avro_field.data_type().resolution.as_ref() + { + field_defaults.push(Some(lit.clone())); + } else { + field_defaults.push(None); + } } let projector = match data_type.resolution.as_ref() { Some(ResolutionInfo::Record(rec)) => { - Some(ProjectorBuilder::try_new(rec, reader_fields).build()?) + Some(ProjectorBuilder::try_new(rec, &field_defaults).build()?) } _ => None, }; @@ -179,12 +180,6 @@ impl RecordDecoder { } } -#[derive(Debug)] -struct EnumResolution { - mapping: Arc<[i32]>, - default_index: i32, -} - #[derive(Debug, AsRefStr)] enum Decoder { Null(usize), @@ -249,7 +244,12 @@ enum Decoder { /// String data encoded as UTF-8 bytes, but mapped to Arrow's StringViewArray StringView(OffsetBufferBuilder, Vec), Array(FieldRef, OffsetBufferBuilder, Box), - Record(Fields, Vec, Option), + Record( + Fields, + Vec, + Vec>, + Option, + ), Map( FieldRef, OffsetBufferBuilder, @@ -270,7 +270,7 @@ enum Decoder { #[cfg(feature = "avro_custom_types")] RunEndEncoded(u8, usize, Box), Union(UnionDecoder), - Nullable(Nullability, NullBufferBuilder, Box, NullablePlan), + Nullable(NullablePlan, NullBufferBuilder, Box), } impl Decoder { @@ -279,7 +279,7 @@ impl Decoder { if info.writer_is_union && !info.reader_is_union { let mut clone = data_type.clone(); clone.resolution = None; // Build target base decoder without Union resolution - let target = Box::new(Self::try_new_internal(&clone)?); + let target = Self::try_new_internal(&clone)?; let decoder = Self::Union( UnionDecoderBuilder::new() .with_resolved_union(info.clone()) @@ -295,7 +295,7 @@ impl Decoder { fn try_new_internal(data_type: &AvroDataType) -> Result { // Extract just the Promotion (if any) to simplify pattern matching let promotion = match data_type.resolution.as_ref() { - Some(ResolutionInfo::Promotion(p)) => Some(p), + Some(ResolutionInfo::Promotion(p)) => Some(*p), _ => None, }; let decoder = match (data_type.codec(), promotion) { @@ -466,10 +466,9 @@ impl Decoder { } (Codec::Enum(symbols), _) => { let res = match data_type.resolution.as_ref() { - Some(ResolutionInfo::EnumMapping(mapping)) => Some(EnumResolution { - mapping: mapping.mapping.clone(), - default_index: mapping.default_index, - }), + Some(ResolutionInfo::EnumMapping(mapping)) => { + Some(EnumResolution::new(mapping)) + } _ => None, }; Self::Enum(Vec::with_capacity(DEFAULT_CAPACITY), symbols.clone(), res) @@ -477,18 +476,27 @@ impl Decoder { (Codec::Struct(fields), _) => { let mut arrow_fields = Vec::with_capacity(fields.len()); let mut encodings = Vec::with_capacity(fields.len()); + let mut field_defaults = Vec::with_capacity(fields.len()); for avro_field in fields.iter() { let encoding = Self::try_new(avro_field.data_type())?; arrow_fields.push(avro_field.field()); encodings.push(encoding); + + if let Some(ResolutionInfo::DefaultValue(lit)) = + avro_field.data_type().resolution.as_ref() + { + field_defaults.push(Some(lit.clone())); + } else { + field_defaults.push(None); + } } let projector = if let Some(ResolutionInfo::Record(rec)) = data_type.resolution.as_ref() { - Some(ProjectorBuilder::try_new(rec, fields).build()?) + Some(ProjectorBuilder::try_new(rec, &field_defaults).build()?) } else { None }; - Self::Record(arrow_fields.into(), encodings, projector) + Self::Record(arrow_fields.into(), encodings, field_defaults, projector) } (Codec::Map(child), _) => { let val_field = child.field_with_name("value"); @@ -568,20 +576,49 @@ impl Decoder { }; Ok(match data_type.nullability() { Some(nullability) => { - // Default to reading a union branch tag unless the resolution proves otherwise. - let mut plan = NullablePlan::ReadTag; - if let Some(ResolutionInfo::Union(info)) = data_type.resolution.as_ref() { - if !info.writer_is_union && info.reader_is_union { - if let Some(Some((_reader_idx, promo))) = info.writer_to_reader.first() { - plan = NullablePlan::FromSingle { promotion: *promo }; + // Default to reading a union branch tag unless the resolution directs otherwise. + let plan = match &data_type.resolution { + None => NullablePlan::ReadTag { + nullability, + resolution: ResolutionPlan::Promotion(Promotion::Direct), + }, + Some(ResolutionInfo::Promotion(_)) => { + // Promotions should have been incorporated + // into the inner decoder. + NullablePlan::FromSingle { + resolution: ResolutionPlan::Promotion(Promotion::Direct), } } - } + Some(ResolutionInfo::Union(info)) if !info.writer_is_union => { + let Some(Some((_, resolution))) = info.writer_to_reader.first() else { + return Err(AvroError::SchemaError( + "unexpected union resolution info for non-union writer and union reader type".into(), + )); + }; + let resolution = ResolutionPlan::try_new(&decoder, resolution)?; + NullablePlan::FromSingle { resolution } + } + Some(ResolutionInfo::Union(info)) => { + let Some((_, resolution)) = + info.writer_to_reader[nullability.non_null_index()].as_ref() + else { + return Err(AvroError::SchemaError( + "unexpected union resolution info for nullable writer type".into(), + )); + }; + NullablePlan::ReadTag { + nullability, + resolution: ResolutionPlan::try_new(&decoder, resolution)?, + } + } + Some(resolution) => NullablePlan::FromSingle { + resolution: ResolutionPlan::try_new(&decoder, resolution)?, + }, + }; Self::Nullable( - nullability, + plan, NullBufferBuilder::new(DEFAULT_CAPACITY), Box::new(decoder), - plan, ) } None => decoder, @@ -645,7 +682,7 @@ impl Decoder { Self::Array(_, offsets, _) => { offsets.push_length(0); } - Self::Record(_, e, _) => { + Self::Record(_, e, _, _) => { for encoding in e.iter_mut() { encoding.append_null()?; } @@ -670,7 +707,7 @@ impl Decoder { inner.append_null()?; } Self::Union(u) => u.append_null()?, - Self::Nullable(_, null_buffer, inner, _) => { + Self::Nullable(_, null_buffer, inner) => { null_buffer.append(false); inner.append_null()?; } @@ -681,7 +718,7 @@ impl Decoder { /// Append a single default literal into the decoder's buffers fn append_default(&mut self, lit: &AvroLiteral) -> Result<(), AvroError> { match self { - Self::Nullable(_, nb, inner, _) => { + Self::Nullable(_, nb, inner) => { if matches!(lit, AvroLiteral::Null) { nb.append(false); inner.append_null() @@ -1087,14 +1124,14 @@ impl Decoder { inner.append_default(lit) } Self::Union(u) => u.append_default(lit), - Self::Record(field_meta, decoders, projector) => match lit { + Self::Record(field_meta, decoders, field_defaults, _) => match lit { AvroLiteral::Map(entries) => { for (i, dec) in decoders.iter_mut().enumerate() { let name = field_meta[i].name(); if let Some(sub) = entries.get(name) { dec.append_default(sub)?; - } else if let Some(proj) = projector.as_ref() { - proj.project_default(dec, i)?; + } else if let Some(default_literal) = field_defaults[i].as_ref() { + dec.append_default(default_literal)?; } else { dec.append_null()?; } @@ -1103,8 +1140,8 @@ impl Decoder { } AvroLiteral::Null => { for (i, dec) in decoders.iter_mut().enumerate() { - if let Some(proj) = projector.as_ref() { - proj.project_default(dec, i)?; + if let Some(default_literal) = field_defaults[i].as_ref() { + dec.append_default(default_literal)?; } else { dec.append_null()?; } @@ -1246,12 +1283,12 @@ impl Decoder { let total_items = read_blocks(buf, |cursor| encoding.decode(cursor))?; off.push_length(total_items); } - Self::Record(_, encodings, None) => { + Self::Record(_, encodings, _, None) => { for encoding in encodings { encoding.decode(buf)?; } } - Self::Record(_, encodings, Some(proj)) => { + Self::Record(_, encodings, _, Some(proj)) => { proj.project_record(buf, encodings)?; } Self::Map(_, koff, moff, kdata, valdec) => { @@ -1286,18 +1323,8 @@ impl Decoder { } Self::Enum(indices, _, Some(res)) => { let raw = buf.get_int()?; - let resolved = usize::try_from(raw) - .ok() - .and_then(|idx| res.mapping.get(idx).copied()) - .filter(|&idx| idx >= 0) - .unwrap_or(res.default_index); - if resolved >= 0 { - indices.push(resolved); - } else { - return Err(AvroError::ParseError(format!( - "Enum symbol index {raw} not resolvable and no default provided", - ))); - } + let resolved = res.resolve(raw)?; + indices.push(resolved); } Self::Duration(builder) => { let b = buf.get_fixed(12)?; @@ -1313,26 +1340,31 @@ impl Decoder { inner.decode(buf)?; } Self::Union(u) => u.decode(buf)?, - Self::Nullable(order, nb, encoding, plan) => match *plan { - NullablePlan::FromSingle { promotion } => { - encoding.decode_with_promotion(buf, promotion)?; - nb.append(true); - } - NullablePlan::ReadTag => { - let branch = buf.read_vlq()?; - let is_not_null = match *order { - Nullability::NullFirst => branch != 0, - Nullability::NullSecond => branch == 0, - }; - if is_not_null { - // It is important to decode before appending to null buffer in case of decode error - encoding.decode(buf)?; - } else { - encoding.append_null()?; + Self::Nullable(plan, nb, encoding) => { + match plan { + NullablePlan::FromSingle { resolution } => { + encoding.decode_with_resolution(buf, resolution)?; + nb.append(true); + } + NullablePlan::ReadTag { + nullability, + resolution, + } => { + let branch = buf.read_vlq()?; + let is_not_null = match *nullability { + Nullability::NullFirst => branch != 0, + Nullability::NullSecond => branch == 0, + }; + if is_not_null { + // It is important to decode before appending to null buffer in case of decode error + encoding.decode_with_resolution(buf, resolution)?; + } else { + encoding.append_null()?; + } + nb.append(is_not_null); } - nb.append(is_not_null); } - }, + } } Ok(()) } @@ -1401,10 +1433,49 @@ impl Decoder { } } + fn decode_with_resolution<'d>( + &'d mut self, + buf: &mut AvroCursor<'_>, + resolution: &'d ResolutionPlan, + ) -> Result<(), AvroError> { + #[cfg(feature = "avro_custom_types")] + if let Self::RunEndEncoded(_, len, inner) = self { + *len += 1; + return inner.decode_with_resolution(buf, resolution); + } + + match resolution { + ResolutionPlan::Promotion(promotion) => { + let promotion = *promotion; + self.decode_with_promotion(buf, promotion) + } + ResolutionPlan::DefaultValue(lit) => self.append_default(lit), + ResolutionPlan::EnumMapping(res) => { + let Self::Enum(indices, _, _) = self else { + return Err(AvroError::SchemaError( + "enum mapping resolution provided for non-enum decoder".into(), + )); + }; + let raw = buf.get_int()?; + let resolved = res.resolve(raw)?; + indices.push(resolved); + Ok(()) + } + ResolutionPlan::Record(proj) => { + let Self::Record(_, encodings, _, _) = self else { + return Err(AvroError::SchemaError( + "record projection provided for non-record decoder".into(), + )); + }; + proj.project_record(buf, encodings) + } + } + } + /// Flush decoded records to an [`ArrayRef`] fn flush(&mut self, nulls: Option) -> Result { Ok(match self { - Self::Nullable(_, n, e, _) => e.flush(n.finish())?, + Self::Nullable(_, n, e) => e.flush(n.finish())?, Self::Null(size) => Arc::new(NullArray::new(std::mem::replace(size, 0))), Self::Boolean(b) => Arc::new(BooleanArray::new(b.finish(), nulls)), Self::Int32(values) => Arc::new(flush_primitive::(values, nulls)), @@ -1533,7 +1604,7 @@ impl Decoder { let offsets = flush_offsets(offsets); Arc::new(ListArray::try_new(field.clone(), offsets, values, nulls)?) } - Self::Record(fields, encodings, _) => { + Self::Record(fields, encodings, _, _) => { let arrays = encodings .iter_mut() .map(|x| x.flush(None)) @@ -1678,6 +1749,83 @@ impl Decoder { } } +/// Runtime plan for decoding reader-side `["null", T]` types. +#[derive(Debug)] +enum NullablePlan { + /// Writer actually wrote a union (branch tag present). + ReadTag { + nullability: Nullability, + resolution: ResolutionPlan, + }, + /// Writer wrote a single (non-union) value resolved to the non-null branch + /// of the reader union; do NOT read a branch tag, but apply any resolution. + FromSingle { resolution: ResolutionPlan }, +} + +/// Runtime plan for resolving writer-reader type differences. +#[derive(Debug)] +enum ResolutionPlan { + /// Indicates that the writer's type should be promoted to the reader's type. + Promotion(Promotion), + /// Provides a default value for the field missing in the writer type. + DefaultValue(AvroLiteral), + /// Provides mapping information for resolving enums. + EnumMapping(EnumResolution), + /// Provides projection information for record fields. + Record(Projector), +} + +impl ResolutionPlan { + fn try_new(decoder: &Decoder, resolution: &ResolutionInfo) -> Result { + match (decoder, resolution) { + (_, ResolutionInfo::Promotion(p)) => Ok(ResolutionPlan::Promotion(*p)), + (_, ResolutionInfo::DefaultValue(lit)) => Ok(ResolutionPlan::DefaultValue(lit.clone())), + (_, ResolutionInfo::EnumMapping(m)) => { + Ok(ResolutionPlan::EnumMapping(EnumResolution::new(m))) + } + (Decoder::Record(_, _, field_defaults, _), ResolutionInfo::Record(r)) => Ok( + ResolutionPlan::Record(ProjectorBuilder::try_new(r, field_defaults).build()?), + ), + (_, ResolutionInfo::Record(_)) => Err(AvroError::SchemaError( + "record resolution on non-record decoder".into(), + )), + (_, ResolutionInfo::Union(_)) => Err(AvroError::SchemaError( + "union variant cannot be resolved to a union type".into(), + )), + } + } +} + +#[derive(Debug)] +struct EnumResolution { + mapping: Arc<[i32]>, + default_index: i32, +} + +impl EnumResolution { + fn new(mapping: &EnumMapping) -> Self { + EnumResolution { + mapping: mapping.mapping.clone(), + default_index: mapping.default_index, + } + } + + fn resolve(&self, index: i32) -> Result { + let resolved = usize::try_from(index) + .ok() + .and_then(|idx| self.mapping.get(idx).copied()) + .filter(|&idx| idx >= 0) + .unwrap_or(self.default_index); + if resolved >= 0 { + Ok(resolved) + } else { + Err(AvroError::ParseError(format!( + "Enum symbol index {index} not resolvable and no default provided", + ))) + } + } +} + // A lookup table for resolving fields between writer and reader schemas during record projection. #[derive(Debug)] struct DispatchLookupTable { @@ -1697,11 +1845,11 @@ struct DispatchLookupTable { // - `to_reader.len() == promotion.len()` and matches the reader field count. // - If `to_reader[r] == NO_SOURCE`, `promotion[r]` is ignored. to_reader: Box<[i8]>, - // For each reader field `r`, specifies the `Promotion` to apply to the writer's value. + // For each reader field `r`, specifies the resolution to apply to the writer's value. // // This is used when a writer field's type can be promoted to a reader field's type // (e.g., `Int` to `Long`). It is ignored if `to_reader[r] == NO_SOURCE`. - promotion: Box<[Promotion]>, + resolution: Box<[ResolutionPlan]>, } // Sentinel used in `DispatchLookupTable::to_reader` to mark @@ -1710,64 +1858,94 @@ const NO_SOURCE: i8 = -1; impl DispatchLookupTable { fn from_writer_to_reader( - promotion_map: &[Option<(usize, Promotion)>], + reader_branches: &[Decoder], + resolution_map: &[Option<(usize, ResolutionInfo)>], ) -> Result { - let mut to_reader = Vec::with_capacity(promotion_map.len()); - let mut promotion = Vec::with_capacity(promotion_map.len()); - for map in promotion_map { - match *map { - Some((idx, promo)) => { + let mut to_reader = Vec::with_capacity(resolution_map.len()); + let mut resolution = Vec::with_capacity(resolution_map.len()); + for map in resolution_map { + match map { + Some((idx, res)) => { + let idx = *idx; let idx_i8 = i8::try_from(idx).map_err(|_| { AvroError::SchemaError(format!( "Reader branch index {idx} exceeds i8 range (max {})", i8::MAX )) })?; + let plan = ResolutionPlan::try_new(&reader_branches[idx], res)?; to_reader.push(idx_i8); - promotion.push(promo); + resolution.push(plan); } None => { to_reader.push(NO_SOURCE); - promotion.push(Promotion::Direct); + resolution.push(ResolutionPlan::DefaultValue(AvroLiteral::Null)); } } } Ok(Self { to_reader: to_reader.into_boxed_slice(), - promotion: promotion.into_boxed_slice(), + resolution: resolution.into_boxed_slice(), }) } - // Resolve a writer branch index to (reader_idx, promotion) + // Resolve a writer branch index to (reader_idx, resolution) #[inline] - fn resolve(&self, writer_index: usize) -> Option<(usize, Promotion)> { + fn resolve(&self, writer_index: usize) -> Option<(usize, &ResolutionPlan)> { let reader_index = *self.to_reader.get(writer_index)?; - (reader_index >= 0).then(|| (reader_index as usize, self.promotion[writer_index])) + (reader_index >= 0).then(|| (reader_index as usize, &self.resolution[writer_index])) } } #[derive(Debug)] struct UnionDecoder { fields: UnionFields, - type_ids: Vec, - offsets: Vec, - branches: Vec, - counts: Vec, - reader_type_codes: Vec, + branches: UnionDecoderBranches, default_emit_idx: usize, null_emit_idx: usize, plan: UnionReadPlan, } +#[derive(Debug, Default)] +struct UnionDecoderBranches { + decoders: Vec, + reader_type_codes: Vec, + type_ids: Vec, + offsets: Vec, + counts: Vec, +} + +impl UnionDecoderBranches { + fn new(decoders: Vec, reader_type_codes: Vec) -> Self { + let branch_len = decoders.len().max(reader_type_codes.len()); + Self { + decoders, + reader_type_codes, + type_ids: Vec::with_capacity(DEFAULT_CAPACITY), + offsets: Vec::with_capacity(DEFAULT_CAPACITY), + counts: vec![0; branch_len], + } + } + + fn emit_to(&mut self, reader_idx: usize) -> Result<&mut Decoder, AvroError> { + let branches_len = self.decoders.len(); + let Some(reader_branch) = self.decoders.get_mut(reader_idx) else { + return Err(AvroError::ParseError(format!( + "Union branch index {reader_idx} out of range ({branches_len} branches)" + ))); + }; + self.type_ids.push(self.reader_type_codes[reader_idx]); + self.offsets.push(self.counts[reader_idx]); + self.counts[reader_idx] += 1; + Ok(reader_branch) + } +} + impl Default for UnionDecoder { fn default() -> Self { Self { fields: UnionFields::empty(), - type_ids: Vec::new(), - offsets: Vec::new(), - branches: Vec::new(), - counts: Vec::new(), - reader_type_codes: Vec::new(), + branches: Default::default(), default_emit_idx: 0, null_emit_idx: 0, plan: UnionReadPlan::Passthrough, @@ -1782,7 +1960,7 @@ enum UnionReadPlan { }, FromSingle { reader_idx: usize, - promotion: Promotion, + resolution: ResolutionPlan, }, ToSingle { target: Box, @@ -1791,6 +1969,47 @@ enum UnionReadPlan { Passthrough, } +impl UnionReadPlan { + fn from_resolved( + reader_branches: &[Decoder], + resolved: Option, + ) -> Result { + let Some(info) = resolved else { + return Ok(Self::Passthrough); + }; + match (info.writer_is_union, info.reader_is_union) { + (true, true) => { + let lookup_table = + DispatchLookupTable::from_writer_to_reader(reader_branches, &info.writer_to_reader)?; + Ok(Self::ReaderUnion { lookup_table }) + } + (false, true) => { + let Some((idx, resolution)) = + info.writer_to_reader.first().and_then(Option::as_ref) + else { + return Err(AvroError::SchemaError( + "Writer type does not match any reader union branch".to_string(), + )); + }; + let reader_idx = *idx; + Ok(Self::FromSingle { + reader_idx, + resolution: ResolutionPlan::try_new(&reader_branches[reader_idx], resolution)?, + }) + } + (true, false) => Err(AvroError::InvalidArgument( + "UnionDecoder::try_new cannot build writer-union to single; use UnionDecoderBuilder with a target" + .to_string(), + )), + // (false, false) is invalid and should never be constructed by the resolver. + _ => Err(AvroError::SchemaError( + "ResolvedUnion constructed for non-union sides; resolver should return None" + .to_string(), + )), + } + } +} + impl UnionDecoder { fn try_new( fields: UnionFields, @@ -1801,7 +2020,6 @@ impl UnionDecoder { let null_branch = branches.iter().position(|b| matches!(b, Decoder::Null(_))); let default_emit_idx = 0; let null_emit_idx = null_branch.unwrap_or(default_emit_idx); - let branch_len = branches.len().max(reader_type_codes.len()); // Guard against impractically large unions that cannot be indexed by an Avro int let max_addr = (i32::MAX as usize) + 1; if branches.len() > max_addr { @@ -1812,26 +2030,23 @@ impl UnionDecoder { i32::MAX ))); } + let plan = UnionReadPlan::from_resolved(&branches, resolved)?; Ok(Self { fields, - type_ids: Vec::with_capacity(DEFAULT_CAPACITY), - offsets: Vec::with_capacity(DEFAULT_CAPACITY), - branches, - counts: vec![0; branch_len], - reader_type_codes, + branches: UnionDecoderBranches::new(branches, reader_type_codes), default_emit_idx, null_emit_idx, - plan: Self::plan_from_resolved(resolved)?, + plan, }) } - fn try_new_from_writer_union( - info: ResolvedUnion, - target: Box, - ) -> Result { + fn with_single_target(target: Decoder, info: ResolvedUnion) -> Result { // This constructor is only for writer-union to single-type resolution debug_assert!(info.writer_is_union && !info.reader_is_union); - let lookup_table = DispatchLookupTable::from_writer_to_reader(&info.writer_to_reader)?; + let mut reader_branches = [target]; + let lookup_table = + DispatchLookupTable::from_writer_to_reader(&reader_branches, &info.writer_to_reader)?; + let target = Box::new(mem::replace(&mut reader_branches[0], Decoder::Null(0))); Ok(Self { plan: UnionReadPlan::ToSingle { target, @@ -1841,41 +2056,6 @@ impl UnionDecoder { }) } - fn plan_from_resolved(resolved: Option) -> Result { - let Some(info) = resolved else { - return Ok(UnionReadPlan::Passthrough); - }; - match (info.writer_is_union, info.reader_is_union) { - (true, true) => { - let lookup_table = - DispatchLookupTable::from_writer_to_reader(&info.writer_to_reader)?; - Ok(UnionReadPlan::ReaderUnion { lookup_table }) - } - (false, true) => { - let Some(&(reader_idx, promotion)) = - info.writer_to_reader.first().and_then(Option::as_ref) - else { - return Err(AvroError::SchemaError( - "Writer type does not match any reader union branch".to_string(), - )); - }; - Ok(UnionReadPlan::FromSingle { - reader_idx, - promotion, - }) - } - (true, false) => Err(AvroError::InvalidArgument( - "UnionDecoder::try_new cannot build writer-union to single; use UnionDecoderBuilder with a target" - .to_string(), - )), - // (false, false) is invalid and should never be constructed by the resolver. - _ => Err(AvroError::SchemaError( - "ResolvedUnion constructed for non-union sides; resolver should return None" - .to_string(), - )), - } - } - #[inline] fn read_tag(buf: &mut AvroCursor<'_>) -> Result { // Avro unions are encoded by first writing the zero-based branch index. @@ -1896,20 +2076,6 @@ impl UnionDecoder { }) } - #[inline] - fn emit_to(&mut self, reader_idx: usize) -> Result<&mut Decoder, AvroError> { - let branches_len = self.branches.len(); - let Some(reader_branch) = self.branches.get_mut(reader_idx) else { - return Err(AvroError::ParseError(format!( - "Union branch index {reader_idx} out of range ({branches_len} branches)" - ))); - }; - self.type_ids.push(self.reader_type_codes[reader_idx]); - self.offsets.push(self.counts[reader_idx]); - self.counts[reader_idx] += 1; - Ok(reader_branch) - } - #[inline] fn on_decoder(&mut self, fallback_idx: usize, action: F) -> Result<(), AvroError> where @@ -1922,7 +2088,7 @@ impl UnionDecoder { UnionReadPlan::FromSingle { reader_idx, .. } => *reader_idx, _ => fallback_idx, }; - self.emit_to(reader_idx).and_then(action) + self.branches.emit_to(reader_idx).and_then(action) } fn append_null(&mut self) -> Result<(), AvroError> { @@ -1934,35 +2100,42 @@ impl UnionDecoder { } fn decode(&mut self, buf: &mut AvroCursor<'_>) -> Result<(), AvroError> { - let (reader_idx, promotion) = match &mut self.plan { - UnionReadPlan::Passthrough => (Self::read_tag(buf)?, Promotion::Direct), + match &mut self.plan { + UnionReadPlan::Passthrough => { + let reader_idx = Self::read_tag(buf)?; + let decoder = self.branches.emit_to(reader_idx)?; + decoder.decode(buf) + } UnionReadPlan::ReaderUnion { lookup_table } => { let idx = Self::read_tag(buf)?; - lookup_table.resolve(idx).ok_or_else(|| { - AvroError::ParseError(format!( + let Some((reader_idx, resolution)) = lookup_table.resolve(idx) else { + return Err(AvroError::ParseError(format!( "Union branch index {idx} not resolvable by reader schema" - )) - })? + ))); + }; + let decoder = self.branches.emit_to(reader_idx)?; + decoder.decode_with_resolution(buf, resolution) } UnionReadPlan::FromSingle { reader_idx, - promotion, - } => (*reader_idx, *promotion), + resolution, + } => { + let decoder = self.branches.emit_to(*reader_idx)?; + decoder.decode_with_resolution(buf, resolution) + } UnionReadPlan::ToSingle { target, lookup_table, } => { let idx = Self::read_tag(buf)?; - return match lookup_table.resolve(idx) { - Some((_, promotion)) => target.decode_with_promotion(buf, promotion), - None => Err(AvroError::ParseError(format!( - "Writer union branch {idx} does not resolve to reader type" - ))), + let Some((_, resolution)) = lookup_table.resolve(idx) else { + return Err(AvroError::ParseError(format!( + "Writer union branch index {idx} not resolvable by reader schema" + ))); }; + target.decode_with_resolution(buf, resolution) } - }; - let decoder = self.emit_to(reader_idx)?; - decoder.decode_with_promotion(buf, promotion) + } } fn flush(&mut self, nulls: Option) -> Result { @@ -1976,13 +2149,20 @@ impl UnionDecoder { ); let children = self .branches + .decoders .iter_mut() .map(|d| d.flush(None)) .collect::, _>>()?; let arr = UnionArray::try_new( self.fields.clone(), - flush_values(&mut self.type_ids).into_iter().collect(), - Some(flush_values(&mut self.offsets).into_iter().collect()), + flush_values(&mut self.branches.type_ids) + .into_iter() + .collect(), + Some( + flush_values(&mut self.branches.offsets) + .into_iter() + .collect(), + ), children, ) .map_err(|e| AvroError::ParseError(e.to_string()))?; @@ -1995,7 +2175,7 @@ struct UnionDecoderBuilder { fields: Option, branches: Option>, resolved: Option, - target: Option>, + target: Option, } impl UnionDecoderBuilder { @@ -2018,7 +2198,7 @@ impl UnionDecoderBuilder { self } - fn with_target(mut self, target: Box) -> Self { + fn with_target(mut self, target: Decoder) -> Self { self.target = Some(target); self } @@ -2031,7 +2211,7 @@ impl UnionDecoderBuilder { (Some(info), None, None, Some(target)) if info.writer_is_union && !info.reader_is_union => { - UnionDecoder::try_new_from_writer_union(info, target) + UnionDecoder::with_single_target(target, info) } _ => Err(AvroError::InvalidArgument( "Invalid UnionDecoderBuilder configuration: expected either \ @@ -2238,42 +2418,31 @@ fn values_equal_at(arr: &dyn Array, i: usize, j: usize) -> bool { struct Projector { writer_to_reader: Arc<[Option]>, skip_decoders: Vec>, - field_defaults: Vec>, default_injections: Arc<[(usize, AvroLiteral)]>, } #[derive(Debug)] struct ProjectorBuilder<'a> { rec: &'a ResolvedRecord, - reader_fields: Arc<[AvroField]>, + field_defaults: &'a [Option], } impl<'a> ProjectorBuilder<'a> { #[inline] - fn try_new(rec: &'a ResolvedRecord, reader_fields: &Arc<[AvroField]>) -> Self { + fn try_new(rec: &'a ResolvedRecord, field_defaults: &'a [Option]) -> Self { Self { rec, - reader_fields: reader_fields.clone(), + field_defaults, } } #[inline] fn build(self) -> Result { - let reader_fields = self.reader_fields; - let mut field_defaults: Vec> = Vec::with_capacity(reader_fields.len()); - for avro_field in reader_fields.as_ref() { - if let Some(ResolutionInfo::DefaultValue(lit)) = - avro_field.data_type().resolution.as_ref() - { - field_defaults.push(Some(lit.clone())); - } else { - field_defaults.push(None); - } - } let mut default_injections: Vec<(usize, AvroLiteral)> = Vec::with_capacity(self.rec.default_fields.len()); for &idx in self.rec.default_fields.as_ref() { - let lit = field_defaults + let lit = self + .field_defaults .get(idx) .and_then(|lit| lit.clone()) .unwrap_or(AvroLiteral::Null); @@ -2291,31 +2460,15 @@ impl<'a> ProjectorBuilder<'a> { Ok(Projector { writer_to_reader: self.rec.writer_to_reader.clone(), skip_decoders, - field_defaults, default_injections: default_injections.into(), }) } } impl Projector { - #[inline] - fn project_default(&self, decoder: &mut Decoder, index: usize) -> Result<(), AvroError> { - // SAFETY: `index` is obtained by listing the reader's record fields (i.e., from - // `decoders.iter_mut().enumerate()`), and `field_defaults` was built in - // `ProjectorBuilder::build` to have exactly one element per reader field. - // Therefore, `index < self.field_defaults.len()` always holds here, so - // `self.field_defaults[index]` cannot panic. We only take an immutable reference - // via `.as_ref()`, and `self` is borrowed immutably. - if let Some(default_literal) = self.field_defaults[index].as_ref() { - decoder.append_default(default_literal) - } else { - decoder.append_null() - } - } - #[inline] fn project_record( - &mut self, + &self, buf: &mut AvroCursor<'_>, encodings: &mut [Decoder], ) -> Result<(), AvroError> { @@ -2327,10 +2480,10 @@ impl Projector { for (i, (mapping, skipper_opt)) in self .writer_to_reader .iter() - .zip(self.skip_decoders.iter_mut()) + .zip(self.skip_decoders.iter()) .enumerate() { - match (mapping, skipper_opt.as_mut()) { + match (mapping, skipper_opt.as_ref()) { (Some(reader_index), _) => encodings[*reader_index].decode(buf)?, (None, Some(skipper)) => skipper.skip(buf)?, (None, None) => { @@ -2459,7 +2612,7 @@ impl Skipper { Ok(base) } - fn skip(&mut self, buf: &mut AvroCursor<'_>) -> Result<(), AvroError> { + fn skip(&self, buf: &mut AvroCursor<'_>) -> Result<(), AvroError> { match self { Self::Null => Ok(()), Self::Boolean => { @@ -2522,7 +2675,7 @@ impl Skipper { Ok(()) } Self::Struct(fields) => { - for f in fields.iter_mut() { + for f in fields.iter() { f.skip(buf)? } Ok(()) @@ -2541,7 +2694,7 @@ impl Skipper { (usize::BITS as usize) )) })?; - let Some(encoding) = encodings.get_mut(idx) else { + let Some(encoding) = encodings.get(idx) else { return Err(AvroError::ParseError(format!( "Union branch index {idx} out of range for skipper ({} branches)", encodings.len() @@ -3488,10 +3641,12 @@ mod tests { let dt = avro_from_codec(Codec::Decimal(4, Some(1), None)); let inner = Decoder::try_new(&dt).unwrap(); let mut decoder = Decoder::Nullable( - Nullability::NullSecond, + NullablePlan::ReadTag { + nullability: Nullability::NullSecond, + resolution: ResolutionPlan::Promotion(Promotion::Direct), + }, NullBufferBuilder::new(DEFAULT_CAPACITY), Box::new(inner), - NullablePlan::ReadTag, ); let mut data = Vec::new(); data.extend_from_slice(&encode_avro_int(0)); @@ -3531,10 +3686,12 @@ mod tests { let dt = avro_from_codec(Codec::Decimal(6, Some(2), Some(16))); let inner = Decoder::try_new(&dt).unwrap(); let mut decoder = Decoder::Nullable( - Nullability::NullSecond, + NullablePlan::ReadTag { + nullability: Nullability::NullSecond, + resolution: ResolutionPlan::Promotion(Promotion::Direct), + }, NullBufferBuilder::new(DEFAULT_CAPACITY), Box::new(inner), - NullablePlan::ReadTag, ); let row1 = [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, @@ -3992,10 +4149,10 @@ mod tests { Decoder::Record( fields, encodings, + vec![None; reader_fields.len()], Some(Projector { writer_to_reader: Arc::from(writer_to_reader), skip_decoders, - field_defaults: vec![None; reader_fields.len()], default_injections: Arc::from(Vec::<(usize, AvroLiteral)>::new()), }), ) @@ -4374,10 +4531,9 @@ mod tests { let projector = Projector { writer_to_reader: Arc::from(vec![None; writer_to_reader_len]), skip_decoders, - field_defaults, default_injections: Arc::from(default_injections), }; - Decoder::Record(fields, encodings, Some(projector)) + Decoder::Record(fields, encodings, field_defaults, Some(projector)) } #[cfg(feature = "avro_custom_types")] @@ -4631,10 +4787,12 @@ mod tests { fn test_default_append_nullable_int32_null_and_value() { let inner = Decoder::Int32(Vec::with_capacity(DEFAULT_CAPACITY)); let mut dec = Decoder::Nullable( - Nullability::NullFirst, + NullablePlan::ReadTag { + nullability: Nullability::NullFirst, + resolution: ResolutionPlan::Promotion(Promotion::Direct), + }, NullBufferBuilder::new(DEFAULT_CAPACITY), Box::new(inner), - NullablePlan::ReadTag, ); dec.append_default(&AvroLiteral::Null).unwrap(); dec.append_default(&AvroLiteral::Int(11)).unwrap(); @@ -4885,29 +5043,33 @@ mod tests { field_refs.push(Arc::new(ArrowField::new(*name, dt.clone(), *nullable))); } let enc_a = Decoder::Nullable( - Nullability::NullSecond, + NullablePlan::ReadTag { + nullability: Nullability::NullSecond, + resolution: ResolutionPlan::Promotion(Promotion::Direct), + }, NullBufferBuilder::new(DEFAULT_CAPACITY), Box::new(Decoder::Int32(Vec::with_capacity(DEFAULT_CAPACITY))), - NullablePlan::ReadTag, ); let enc_b = Decoder::Nullable( - Nullability::NullSecond, + NullablePlan::ReadTag { + nullability: Nullability::NullSecond, + resolution: ResolutionPlan::Promotion(Promotion::Direct), + }, NullBufferBuilder::new(DEFAULT_CAPACITY), Box::new(Decoder::String( OffsetBufferBuilder::new(DEFAULT_CAPACITY), Vec::with_capacity(DEFAULT_CAPACITY), )), - NullablePlan::ReadTag, ); encoders.push(enc_a); encoders.push(enc_b); + let field_defaults = vec![None, None]; // no defaults -> append_null let projector = Projector { writer_to_reader: Arc::from(vec![]), skip_decoders: vec![], - field_defaults: vec![None, None], // no defaults -> append_null default_injections: Arc::from(Vec::<(usize, AvroLiteral)>::new()), }; - let mut rec = Decoder::Record(field_refs.into(), encoders, Some(projector)); + let mut rec = Decoder::Record(field_refs.into(), encoders, field_defaults, Some(projector)); let mut map: IndexMap = IndexMap::new(); map.insert("a".to_string(), AvroLiteral::Int(9)); rec.append_default(&AvroLiteral::Map(map)).unwrap(); @@ -5034,7 +5196,7 @@ mod tests { Codec::DurationSeconds, ] { let dt = make_avro_dt(codec.clone(), None); - let mut s = Skipper::from_avro(&dt)?; + let s = Skipper::from_avro(&dt)?; for &v in &values { let bytes = encode_avro_long(v); let mut cursor = AvroCursor::new(&bytes); @@ -5055,7 +5217,7 @@ mod tests { #[test] fn skipper_nullable_custom_duration_respects_null_first() -> Result<(), AvroError> { let dt = make_avro_dt(Codec::DurationNanos, Some(Nullability::NullFirst)); - let mut s = Skipper::from_avro(&dt)?; + let s = Skipper::from_avro(&dt)?; match &s { Skipper::Nullable(Nullability::NullFirst, inner) => match **inner { Skipper::Int64 => {} @@ -5084,7 +5246,7 @@ mod tests { #[test] fn skipper_nullable_custom_duration_respects_null_second() -> Result<(), AvroError> { let dt = make_avro_dt(Codec::DurationMicros, Some(Nullability::NullSecond)); - let mut s = Skipper::from_avro(&dt)?; + let s = Skipper::from_avro(&dt)?; match &s { Skipper::Nullable(Nullability::NullSecond, inner) => match **inner { Skipper::Int64 => {} @@ -5115,7 +5277,7 @@ mod tests { #[test] fn skipper_interval_is_fixed12_and_skips_12_bytes() -> Result<(), AvroError> { let dt = make_avro_dt(Codec::Interval, None); - let mut s = Skipper::from_avro(&dt)?; + let s = Skipper::from_avro(&dt)?; match s { Skipper::DurationFixed12 => {} other => panic!("expected DurationFixed12, got {:?}", other), @@ -5227,12 +5389,11 @@ mod tests { Box::new(inner_values), ); let mut dec = Decoder::Nullable( - Nullability::NullSecond, - NullBufferBuilder::new(DEFAULT_CAPACITY), - Box::new(ree), NullablePlan::FromSingle { - promotion: Promotion::IntToDouble, + resolution: ResolutionPlan::Promotion(Promotion::IntToDouble), }, + NullBufferBuilder::new(DEFAULT_CAPACITY), + Box::new(ree), ); for v in [1, 1, 2, 2, 2] { let bytes = encode_avro_int(v); diff --git a/arrow-avro/src/schema.rs b/arrow-avro/src/schema.rs index 90c0d5a1648d..1b0c2e26f773 100644 --- a/arrow-avro/src/schema.rs +++ b/arrow-avro/src/schema.rs @@ -78,6 +78,16 @@ pub(crate) enum Nullability { NullSecond, } +impl Nullability { + /// Returns the index of the non-null variant in the union. + pub(crate) fn non_null_index(&self) -> usize { + match self { + Nullability::NullFirst => 1, + Nullability::NullSecond => 0, + } + } +} + /// Either a [`PrimitiveType`] or a reference to a previously defined named type /// /// @@ -3331,7 +3341,11 @@ mod tests { false, )])), false, - ); + ) + .with_metadata(HashMap::from_iter([( + "avro.name".to_owned(), + "R".to_owned(), + )])); assert_eq!(resolved.field(), expected); } @@ -3393,7 +3407,11 @@ mod tests { false, )])), false, - ); + ) + .with_metadata(HashMap::from_iter([( + "avro.name".to_owned(), + "R".to_owned(), + )])); assert_eq!(resolved.field(), expected); } @@ -3430,7 +3448,11 @@ mod tests { )])), ])), false, - ); + ) + .with_metadata(HashMap::from_iter([( + "avro.name".to_owned(), + "R".to_owned(), + )])); assert_eq!(resolved.field(), expected); } From a20753c70c74258831df149e6fb222b6ec501098 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 27 Feb 2026 14:48:28 -0500 Subject: [PATCH 07/80] Update planned release schedule in README.md (#9466) - part of https://github.com/apache/arrow-rs/issues/8466 Update release schedule based on historical reality --- README.md | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 27e0ca13c179..70f2f158e2f4 100644 --- a/README.md +++ b/README.md @@ -91,14 +91,10 @@ Planned Release Schedule | Approximate Date | Version | Notes | | ---------------- | ---------- | --------------------------------------- | -| December 2025 | [`57.2.0`] | Minor, NO breaking API changes | -| January 2026 | [`58.0.0`] | Major, potentially breaking API changes | -| February 2026 | [`58.1.0`] | Minor, NO breaking API changes | -| March 2026 | [`58.2.0`] | Minor, NO breaking API changes | -| April 2026 | [`59.0.0`] | Major, potentially breaking API changes | - -[`57.2.0`]: https://github.com/apache/arrow-rs/milestone/5 -[`58.0.0`]: https://github.com/apache/arrow-rs/milestone/6 +| March 2026 | [`58.1.0`] | Minor, NO breaking API changes | +| April 2026 | [`58.2.0`] | Minor, NO breaking API changes | +| May 2026 | [`59.0.0`] | Major, potentially breaking API changes | + [`58.1.0`]: https://github.com/apache/arrow-rs/issues/9108 [`58.2.0`]: https://github.com/apache/arrow-rs/issues/9109 [`59.0.0`]: https://github.com/apache/arrow-rs/issues/9110 From a7acf3d7396d763c0ae2ebba6190358ce574ee5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Mon, 2 Mar 2026 14:11:37 +0100 Subject: [PATCH 08/80] Convert `prettyprint` tests in `arrow-cast` to `insta` inline snapshots (#9472) # Rationale for this change The motivation for this PR is to create to improve the testing infrastructure as a precursor to the following PR: - #9221 @Jefffrey seemed to be in favor of using `insta` for more tests: https://github.com/apache/arrow-rs/pull/9221#discussion_r2735246111 # What changes are included in this PR? This PR does not do logic changes, but is a straightforward translation of the current tests. More test cases, especially around escape sequences can be added in follow up PRs. # Are these changes tested? Yes, to review we still need to manually confirm that no test cases changed accidentally. # Are there any user-facing changes? No. --- Cargo.toml | 2 + arrow-cast/Cargo.toml | 1 + arrow-cast/src/base64.rs | 2 +- arrow-cast/src/pretty.rs | 1019 +++++++++++++++++--------------------- arrow-schema/Cargo.toml | 2 +- parquet/Cargo.toml | 2 +- 6 files changed, 460 insertions(+), 568 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8b51c01acab6..1a02830b0b9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -112,6 +112,8 @@ simdutf8 = { version = "0.1.5", default-features = false } criterion = { version = "0.8.0", default-features = false } +insta = { version = "1.46.3", default-features = false } + # release inherited profile keeping debug information and symbols # for mem/cpu profiling [profile.profiling] diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml index 536bc101a816..81649353d182 100644 --- a/arrow-cast/Cargo.toml +++ b/arrow-cast/Cargo.toml @@ -58,6 +58,7 @@ ryu = "1.0.16" [dev-dependencies] criterion = { workspace = true, default-features = false } half = { version = "2.1", default-features = false } +insta = { workspace = true } rand = "0.9" [[bench]] diff --git a/arrow-cast/src/base64.rs b/arrow-cast/src/base64.rs index 5637bdc689d9..6a8da0141dea 100644 --- a/arrow-cast/src/base64.rs +++ b/arrow-cast/src/base64.rs @@ -106,7 +106,7 @@ mod tests { let data: BinaryArray = (0..len) .map(|_| { let len = rng.random_range(0..16); - Some((0..len).map(|_| rng.random()).collect::>()) + Some((0..len).map(|_| rng.random::()).collect::>()) }) .collect(); diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index e7c199dbed97..e63147cd09c1 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -318,20 +318,16 @@ mod tests { let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+---+-----+", - "| a | b |", - "+---+-----+", - "| a | 1 |", - "| b | |", - "| | 10 |", - "| d | 100 |", - "+---+-----+", - ]; - - let actual: Vec<&str> = table.lines().collect(); - - assert_eq!(expected, actual, "Actual result:\n{table}"); + insta::assert_snapshot!(table, @" + +---+-----+ + | a | b | + +---+-----+ + | a | 1 | + | b | | + | | 10 | + | d | 100 | + +---+-----+ + "); } #[test] @@ -348,14 +344,19 @@ mod tests { let table = pretty_format_columns("a", &columns).unwrap().to_string(); - let expected = vec![ - "+---+", "| a |", "+---+", "| a |", "| b |", "| |", "| d |", "| e |", "| |", - "| g |", "+---+", - ]; - - let actual: Vec<&str> = table.lines().collect(); - - assert_eq!(expected, actual, "Actual result:\n{table}"); + insta::assert_snapshot!(table, @" + +---+ + | a | + +---+ + | a | + | b | + | | + | d | + | e | + | | + | g | + +---+ + "); } #[test] @@ -378,20 +379,16 @@ mod tests { let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+---+---+---+", - "| a | b | c |", - "+---+---+---+", - "| | | |", - "| | | |", - "| | | |", - "| | | |", - "+---+---+---+", - ]; - - let actual: Vec<&str> = table.lines().collect(); - - assert_eq!(expected, actual, "Actual result:\n{table:#?}"); + insta::assert_snapshot!(table, @" + +---+---+---+ + | a | b | c | + +---+---+---+ + | | | | + | | | | + | | | | + | | | | + +---+---+---+ + "); } #[test] @@ -411,19 +408,15 @@ mod tests { let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+-------+", - "| d1 |", - "+-------+", - "| one |", - "| |", - "| three |", - "+-------+", - ]; - - let actual: Vec<&str> = table.lines().collect(); - - assert_eq!(expected, actual, "Actual result:\n{table}"); + insta::assert_snapshot!(table, @" + +-------+ + | d1 | + +-------+ + | one | + | | + | three | + +-------+ + "); } #[test] @@ -447,19 +440,16 @@ mod tests { let batch = RecordBatch::try_new(schema, vec![array]).unwrap(); let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+-----------+", - "| d1 |", - "+-----------+", - "| [1, 2, 3] |", - "| |", - "| [7, 8, 9] |", - "+-----------+", - ]; - - let actual: Vec<&str> = table.lines().collect(); - assert_eq!(expected, actual, "Actual result:\n{table}"); + insta::assert_snapshot!(table, @" + +-----------+ + | d1 | + +-----------+ + | [1, 2, 3] | + | | + | [7, 8, 9] | + +-----------+ + "); } #[test] @@ -482,22 +472,19 @@ mod tests { let array: ArrayRef = Arc::new(builder.finish()); let batch = RecordBatch::try_new(schema, vec![array]).unwrap(); let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+-----------------------+", - "| d1 |", - "+-----------------------+", - "| hello |", - "| |", - "| longer than 12 bytes |", - "| another than 12 bytes |", - "| |", - "| small |", - "+-----------------------+", - ]; - let actual: Vec<&str> = table.lines().collect(); - - assert_eq!(expected, actual, "Actual result:\n{table:#?}"); + insta::assert_snapshot!(table, @" + +-----------------------+ + | d1 | + +-----------------------+ + | hello | + | | + | longer than 12 bytes | + | another than 12 bytes | + | | + | small | + +-----------------------+ + "); } #[test] @@ -520,22 +507,19 @@ mod tests { let array: ArrayRef = Arc::new(builder.finish()); let batch = RecordBatch::try_new(schema, vec![array]).unwrap(); let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+--------------------------------------------+", - "| d1 |", - "+--------------------------------------------+", - "| 68656c6c6f |", - "| |", - "| 6c6f6e676572207468616e203132206279746573 |", - "| 616e6f74686572207468616e203132206279746573 |", - "| |", - "| 736d616c6c |", - "+--------------------------------------------+", - ]; - let actual: Vec<&str> = table.lines().collect(); - - assert_eq!(expected, actual, "Actual result:\n\n{table:#?}"); + insta::assert_snapshot!(table, @" + +--------------------------------------------+ + | d1 | + +--------------------------------------------+ + | 68656c6c6f | + | | + | 6c6f6e676572207468616e203132206279746573 | + | 616e6f74686572207468616e203132206279746573 | + | | + | 736d616c6c | + +--------------------------------------------+ + "); } #[test] @@ -554,47 +538,34 @@ mod tests { let batch = RecordBatch::try_new(schema, vec![array]).unwrap(); let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+--------+", - "| d1 |", - "+--------+", - "| 010203 |", - "| |", - "| 070809 |", - "+--------+", - ]; - let actual: Vec<&str> = table.lines().collect(); - - assert_eq!(expected, actual, "Actual result:\n{table}"); + insta::assert_snapshot!(table, @" + +--------+ + | d1 | + +--------+ + | 010203 | + | | + | 070809 | + +--------+ + "); } - /// Generate an array with type $ARRAYTYPE with a numeric value of - /// $VALUE, and compare $EXPECTED_RESULT to the output of - /// formatting that array with `pretty_format_batches` - macro_rules! check_datetime { - ($ARRAYTYPE:ident, $VALUE:expr, $EXPECTED_RESULT:expr) => { - let mut builder = $ARRAYTYPE::builder(10); - builder.append_value($VALUE); - builder.append_null(); - let array = builder.finish(); - - let schema = Arc::new(Schema::new(vec![Field::new( - "f", - array.data_type().clone(), - true, - )])); - let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap(); - - let table = pretty_format_batches(&[batch]) - .expect("formatting batches") - .to_string(); - - let expected = $EXPECTED_RESULT; - let actual: Vec<&str> = table.lines().collect(); - - assert_eq!(expected, actual, "Actual result:\n\n{actual:#?}\n\n"); - }; + /// Generate an array of [`ArrowPrimitiveType`] with a numeric `value`, + /// then format it with `pretty_format_batches`. + fn format_primitive_batch(value: T::Native) -> String { + let mut builder = PrimitiveBuilder::::with_capacity(10); + builder.append_value(value); + builder.append_null(); + let array = builder.finish(); + let schema = Arc::new(Schema::new(vec![Field::new( + "f", + array.data_type().clone(), + true, + )])); + let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap(); + pretty_format_batches(&[batch]) + .expect("formatting batches") + .to_string() } fn timestamp_batch(timezone: &str, value: T::Native) -> RecordBatch { @@ -617,158 +588,151 @@ mod tests { let batch = timestamp_batch::("+08:00", 11111111); let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+---------------------------+", - "| f |", - "+---------------------------+", - "| 1970-05-09T22:25:11+08:00 |", - "| |", - "+---------------------------+", - ]; - let actual: Vec<&str> = table.lines().collect(); - assert_eq!(expected, actual, "Actual result:\n\n{actual:#?}\n\n"); + insta::assert_snapshot!(table, @" + +---------------------------+ + | f | + +---------------------------+ + | 1970-05-09T22:25:11+08:00 | + | | + +---------------------------+ + "); } #[test] fn test_pretty_format_timestamp_second() { - let expected = vec![ - "+---------------------+", - "| f |", - "+---------------------+", - "| 1970-05-09T14:25:11 |", - "| |", - "+---------------------+", - ]; - check_datetime!(TimestampSecondArray, 11111111, expected); + let table = format_primitive_batch::(11111111); + insta::assert_snapshot!(table, @" + +---------------------+ + | f | + +---------------------+ + | 1970-05-09T14:25:11 | + | | + +---------------------+ + "); } #[test] fn test_pretty_format_timestamp_millisecond() { - let expected = vec![ - "+-------------------------+", - "| f |", - "+-------------------------+", - "| 1970-01-01T03:05:11.111 |", - "| |", - "+-------------------------+", - ]; - check_datetime!(TimestampMillisecondArray, 11111111, expected); + let table = format_primitive_batch::(11111111); + insta::assert_snapshot!(table, @" + +-------------------------+ + | f | + +-------------------------+ + | 1970-01-01T03:05:11.111 | + | | + +-------------------------+ + "); } #[test] fn test_pretty_format_timestamp_microsecond() { - let expected = vec![ - "+----------------------------+", - "| f |", - "+----------------------------+", - "| 1970-01-01T00:00:11.111111 |", - "| |", - "+----------------------------+", - ]; - check_datetime!(TimestampMicrosecondArray, 11111111, expected); + let table = format_primitive_batch::(11111111); + insta::assert_snapshot!(table, @" + +----------------------------+ + | f | + +----------------------------+ + | 1970-01-01T00:00:11.111111 | + | | + +----------------------------+ + "); } #[test] fn test_pretty_format_timestamp_nanosecond() { - let expected = vec![ - "+-------------------------------+", - "| f |", - "+-------------------------------+", - "| 1970-01-01T00:00:00.011111111 |", - "| |", - "+-------------------------------+", - ]; - check_datetime!(TimestampNanosecondArray, 11111111, expected); + let table = format_primitive_batch::(11111111); + insta::assert_snapshot!(table, @" + +-------------------------------+ + | f | + +-------------------------------+ + | 1970-01-01T00:00:00.011111111 | + | | + +-------------------------------+ + "); } #[test] fn test_pretty_format_date_32() { - let expected = vec![ - "+------------+", - "| f |", - "+------------+", - "| 1973-05-19 |", - "| |", - "+------------+", - ]; - check_datetime!(Date32Array, 1234, expected); + let table = format_primitive_batch::(1234); + insta::assert_snapshot!(table, @" + +------------+ + | f | + +------------+ + | 1973-05-19 | + | | + +------------+ + "); } #[test] fn test_pretty_format_date_64() { - let expected = vec![ - "+---------------------+", - "| f |", - "+---------------------+", - "| 2005-03-18T01:58:20 |", - "| |", - "+---------------------+", - ]; - check_datetime!(Date64Array, 1111111100000, expected); + let table = format_primitive_batch::(1111111100000); + insta::assert_snapshot!(table, @" + +---------------------+ + | f | + +---------------------+ + | 2005-03-18T01:58:20 | + | | + +---------------------+ + "); } #[test] fn test_pretty_format_time_32_second() { - let expected = vec![ - "+----------+", - "| f |", - "+----------+", - "| 00:18:31 |", - "| |", - "+----------+", - ]; - check_datetime!(Time32SecondArray, 1111, expected); + let table = format_primitive_batch::(1111); + insta::assert_snapshot!(table, @" + +----------+ + | f | + +----------+ + | 00:18:31 | + | | + +----------+ + "); } #[test] fn test_pretty_format_time_32_millisecond() { - let expected = vec![ - "+--------------+", - "| f |", - "+--------------+", - "| 03:05:11.111 |", - "| |", - "+--------------+", - ]; - check_datetime!(Time32MillisecondArray, 11111111, expected); + let table = format_primitive_batch::(11111111); + insta::assert_snapshot!(table, @" + +--------------+ + | f | + +--------------+ + | 03:05:11.111 | + | | + +--------------+ + "); } #[test] fn test_pretty_format_time_64_microsecond() { - let expected = vec![ - "+-----------------+", - "| f |", - "+-----------------+", - "| 00:00:11.111111 |", - "| |", - "+-----------------+", - ]; - check_datetime!(Time64MicrosecondArray, 11111111, expected); + let table = format_primitive_batch::(11111111); + insta::assert_snapshot!(table, @" + +-----------------+ + | f | + +-----------------+ + | 00:00:11.111111 | + | | + +-----------------+ + "); } #[test] fn test_pretty_format_time_64_nanosecond() { - let expected = vec![ - "+--------------------+", - "| f |", - "+--------------------+", - "| 00:00:00.011111111 |", - "| |", - "+--------------------+", - ]; - check_datetime!(Time64NanosecondArray, 11111111, expected); + let table = format_primitive_batch::(11111111); + insta::assert_snapshot!(table, @" + +--------------------+ + | f | + +--------------------+ + | 00:00:00.011111111 | + | | + +--------------------+ + "); } #[test] fn test_int_display() { let array = Arc::new(Int32Array::from(vec![6, 3])) as ArrayRef; - let actual_one = array_value_to_string(&array, 0).unwrap(); - let expected_one = "6"; - - let actual_two = array_value_to_string(&array, 1).unwrap(); - let expected_two = "3"; - assert_eq!(actual_one, expected_one); - assert_eq!(actual_two, expected_two); + insta::assert_snapshot!(array_value_to_string(&array, 0).unwrap(), @"6"); + insta::assert_snapshot!(array_value_to_string(&array, 1).unwrap(), @"3"); } #[test] @@ -794,19 +758,16 @@ mod tests { let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+-------+", - "| f |", - "+-------+", - "| 1.01 |", - "| |", - "| 2.00 |", - "| 30.40 |", - "+-------+", - ]; - - let actual: Vec<&str> = table.lines().collect(); - assert_eq!(expected, actual, "Actual result:\n{table}"); + insta::assert_snapshot!(table, @" + +-------+ + | f | + +-------+ + | 1.01 | + | | + | 2.00 | + | 30.40 | + +-------+ + "); } #[test] @@ -831,13 +792,17 @@ mod tests { let batch = RecordBatch::try_new(schema, vec![dm]).unwrap(); let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+------+", "| f |", "+------+", "| 101 |", "| |", "| 200 |", "| 3040 |", - "+------+", - ]; - let actual: Vec<&str> = table.lines().collect(); - assert_eq!(expected, actual, "Actual result:\n{table}"); + insta::assert_snapshot!(table, @" + +------+ + | f | + +------+ + | 101 | + | | + | 200 | + | 3040 | + +------+ + "); } #[test] @@ -881,18 +846,16 @@ mod tests { RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap(); let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+--------------------------+----+", - "| c1 | c2 |", - "+--------------------------+----+", - "| {c11: 1, c12: {c121: e}} | a |", - "| {c11: , c12: {c121: f}} | b |", - "| {c11: 5, c12: {c121: g}} | c |", - "+--------------------------+----+", - ]; - let actual: Vec<&str> = table.lines().collect(); - assert_eq!(expected, actual, "Actual result:\n{table}"); + insta::assert_snapshot!(table, @" + +--------------------------+----+ + | c1 | c2 | + +--------------------------+----+ + | {c11: 1, c12: {c121: e}} | a | + | {c11: , c12: {c121: f}} | b | + | {c11: 5, c12: {c121: g}} | c | + +--------------------------+----+ + "); } #[test] @@ -916,19 +879,17 @@ mod tests { let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap(); let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let actual: Vec<&str> = table.lines().collect(); - let expected = vec![ - "+------------+", - "| Teamsters |", - "+------------+", - "| {a=1} |", - "| {b=3.2234} |", - "| {b=} |", - "| {a=} |", - "+------------+", - ]; - assert_eq!(expected, actual); + insta::assert_snapshot!(table, @" + +------------+ + | Teamsters | + +------------+ + | {a=1} | + | {b=3.2234} | + | {b=} | + | {a=} | + +------------+ + "); } #[test] @@ -952,19 +913,17 @@ mod tests { let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap(); let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let actual: Vec<&str> = table.lines().collect(); - let expected = vec![ - "+------------+", - "| Teamsters |", - "+------------+", - "| {a=1} |", - "| {b=3.2234} |", - "| {b=} |", - "| {a=} |", - "+------------+", - ]; - assert_eq!(expected, actual); + insta::assert_snapshot!(table, @" + +------------+ + | Teamsters | + +------------+ + | {a=1} | + | {b=3.2234} | + | {b=} | + | {a=} | + +------------+ + "); } #[test] @@ -1012,19 +971,18 @@ mod tests { let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(outer)]).unwrap(); let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let actual: Vec<&str> = table.lines().collect(); - let expected = vec![ - "+-----------------------------+", - "| Teamsters |", - "+-----------------------------+", - "| {European Union={b=1}} |", - "| {European Union={c=3.2234}} |", - "| {a=} |", - "| {a=1234} |", - "| {European Union={c=}} |", - "+-----------------------------+", - ]; - assert_eq!(expected, actual); + + insta::assert_snapshot!(table, @" + +-----------------------------+ + | Teamsters | + +-----------------------------+ + | {European Union={b=1}} | + | {European Union={c=3.2234}} | + | {a=} | + | {a=1234} | + | {European Union={c=}} | + +-----------------------------+ + "); } #[test] @@ -1055,21 +1013,18 @@ mod tests { ) .unwrap(); - let mut buf = String::new(); - write!(&mut buf, "{}", pretty_format_batches(&[batch]).unwrap()).unwrap(); - - let s = [ - "+---+-----+", - "| a | b |", - "+---+-----+", - "| a | 1 |", - "| b | |", - "| | 10 |", - "| d | 100 |", - "+---+-----+", - ]; - let expected = s.join("\n"); - assert_eq!(expected, buf); + let table = pretty_format_batches(&[batch]).unwrap().to_string(); + + insta::assert_snapshot!(table, @" + +---+-----+ + | a | b | + +---+-----+ + | a | 1 | + | b | | + | | 10 | + | d | 100 | + +---+-----+ + "); } #[test] @@ -1091,12 +1046,15 @@ mod tests { let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+------+", "| f16 |", "+------+", "| NaN |", "| 4 |", "| -inf |", "+------+", - ]; - - let actual: Vec<&str> = table.lines().collect(); - assert_eq!(expected, actual, "Actual result:\n{table}"); + insta::assert_snapshot!(table, @" + +------+ + | f16 | + +------+ + | NaN | + | 4 | + | -inf | + +------+ + "); } #[test] @@ -1121,23 +1079,19 @@ mod tests { let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+------------------+", - "| IntervalDayTime |", - "+------------------+", - "| -1 days -10 mins |", - "| -1.001 secs |", - "| -0.001 secs |", - "| 0.001 secs |", - "| 0.010 secs |", - "| 0.100 secs |", - "| 0 secs |", - "+------------------+", - ]; - - let actual: Vec<&str> = table.lines().collect(); - - assert_eq!(expected, actual, "Actual result:\n{table}"); + insta::assert_snapshot!(table, @" + +------------------+ + | IntervalDayTime | + +------------------+ + | -1 days -10 mins | + | -1.001 secs | + | -0.001 secs | + | 0.001 secs | + | 0.010 secs | + | 0.100 secs | + | 0 secs | + +------------------+ + "); } #[test] @@ -1169,30 +1123,26 @@ mod tests { let table = pretty_format_batches(&[batch]).unwrap().to_string(); - let expected = vec![ - "+--------------------------+", - "| IntervalMonthDayNano |", - "+--------------------------+", - "| -1 mons -1 days -10 mins |", - "| -1.000000001 secs |", - "| -0.000000001 secs |", - "| 0.000000001 secs |", - "| 0.000000010 secs |", - "| 0.000000100 secs |", - "| 0.000001000 secs |", - "| 0.000010000 secs |", - "| 0.000100000 secs |", - "| 0.001000000 secs |", - "| 0.010000000 secs |", - "| 0.100000000 secs |", - "| 1.000000000 secs |", - "| 0 secs |", - "+--------------------------+", - ]; - - let actual: Vec<&str> = table.lines().collect(); - - assert_eq!(expected, actual, "Actual result:\n{table}"); + insta::assert_snapshot!(table, @" + +--------------------------+ + | IntervalMonthDayNano | + +--------------------------+ + | -1 mons -1 days -10 mins | + | -1.000000001 secs | + | -0.000000001 secs | + | 0.000000001 secs | + | 0.000000010 secs | + | 0.000000100 secs | + | 0.000001000 secs | + | 0.000010000 secs | + | 0.000100000 secs | + | 0.001000000 secs | + | 0.010000000 secs | + | 0.100000000 secs | + | 1.000000000 secs | + | 0 secs | + +--------------------------+ + "); } #[test] @@ -1218,40 +1168,34 @@ mod tests { .unwrap() .to_string(); - let expected_column = vec![ - "+----------------+", - "| my_column_name |", - "+----------------+", - "| 1 |", - "| 2 |", - "| null |", - "| 3 |", - "| 4 |", - "+----------------+", - ]; - - let actual: Vec<&str> = column.lines().collect(); - assert_eq!(expected_column, actual, "Actual result:\n{column}"); - - let batch = pretty_format_batches_with_options(&[batch], &options) + insta::assert_snapshot!(column, @" + +----------------+ + | my_column_name | + +----------------+ + | 1 | + | 2 | + | null | + | 3 | + | 4 | + +----------------+ + "); + + let table = pretty_format_batches_with_options(&[batch], &options) .unwrap() .to_string(); - let expected_table = vec![ - "+---------------+----------------+", - "| my_int32_name | my_string_name |", - "| Int32 | Utf8 |", - "+---------------+----------------+", - "| 1 | foo |", - "| 2 | bar |", - "| null | null |", - "| 3 | baz |", - "| 4 | null |", - "+---------------+----------------+", - ]; - - let actual: Vec<&str> = batch.lines().collect(); - assert_eq!(expected_table, actual, "Actual result:\n{batch}"); + insta::assert_snapshot!(table, @" + +---------------+----------------+ + | my_int32_name | my_string_name | + | Int32 | Utf8 | + +---------------+----------------+ + | 1 | foo | + | 2 | bar | + | null | null | + | 3 | baz | + | 4 | null | + +---------------+----------------+ + "); } #[test] @@ -1268,20 +1212,16 @@ mod tests { .unwrap() .to_string(); - // Expected output - let expected_pretty = vec![ - "+------------------------------+", - "| pretty |", - "+------------------------------+", - "| |", - "| |", - "| 0 days 1 hours 1 mins 1 secs |", - "| null |", - "+------------------------------+", - ]; - - let actual: Vec<&str> = pretty.lines().collect(); - assert_eq!(expected_pretty, actual, "Actual result:\n{pretty}"); + insta::assert_snapshot!(pretty, @" + +------------------------------+ + | pretty | + +------------------------------+ + | | + | | + | 0 days 1 hours 1 mins 1 secs | + | null | + +------------------------------+ + "); // ISO8601 formatting let opts_iso = FormatOptions::default() @@ -1291,20 +1231,16 @@ mod tests { .unwrap() .to_string(); - // Expected output - let expected_iso = vec![ - "+-----------+", - "| iso |", - "+-----------+", - "| |", - "| |", - "| PT3661S |", - "| null |", - "+-----------+", - ]; - - let actual: Vec<&str> = iso.lines().collect(); - assert_eq!(expected_iso, actual, "Actual result:\n{iso}"); + insta::assert_snapshot!(iso, @" + +-----------+ + | iso | + +-----------+ + | | + | | + | PT3661S | + | null | + +-----------+ + "); } // @@ -1408,26 +1344,20 @@ mod tests { ) .unwrap(); - let mut buf = String::new(); - write!( - &mut buf, - "{}", - pretty_format_batches_with_options(&[batch], &options).unwrap() - ) - .unwrap(); + let table = pretty_format_batches_with_options(&[batch], &options) + .unwrap() + .to_string(); - let s = [ - "+--------+", - "| income |", - "+--------+", - "| 1 € |", - "| |", - "| 10 € |", - "| 100 € |", - "+--------+", - ]; - let expected = s.join("\n"); - assert_eq!(expected, buf); + insta::assert_snapshot!(table, @" + +--------+ + | income | + +--------+ + | 1 € | + | | + | 10 € | + | 100 € | + +--------+ + "); } #[test] @@ -1466,24 +1396,18 @@ mod tests { // define data. let batch = RecordBatch::try_new(schema, vec![Arc::new(outer_list)]).unwrap(); - let mut buf = String::new(); - write!( - &mut buf, - "{}", - pretty_format_batches_with_options(&[batch], &options).unwrap() - ) - .unwrap(); + let table = pretty_format_batches_with_options(&[batch], &options) + .unwrap() + .to_string(); - let s = [ - "+----------------------------------+", - "| income |", - "+----------------------------------+", - "| [[1 €], ] |", - "| [[2 €, 8 €], [50 €, 25 €, 25 €]] |", - "+----------------------------------+", - ]; - let expected = s.join("\n"); - assert_eq!(expected, buf); + insta::assert_snapshot!(table, @" + +----------------------------------+ + | income | + +----------------------------------+ + | [[1 €], ] | + | [[2 €, 8 €], [50 €, 25 €, 25 €]] | + +----------------------------------+ + "); } #[test] @@ -1530,25 +1454,19 @@ mod tests { // define data. let batch = RecordBatch::try_new(schema, vec![Arc::new(nested_data.finish())]).unwrap(); - let mut buf = String::new(); - write!( - &mut buf, - "{}", - pretty_format_batches_with_options(&[batch], &options).unwrap() - ) - .unwrap(); + let table = pretty_format_batches_with_options(&[batch], &options) + .unwrap() + .to_string(); - let s = [ - "+---------------------------------+", - "| income |", - "+---------------------------------+", - "| {name: Gimli, income: 10 €} |", - "| {name: Legolas, income: } |", - "| {name: Aragorn, income: 30 €} |", - "+---------------------------------+", - ]; - let expected = s.join("\n"); - assert_eq!(expected, buf); + insta::assert_snapshot!(table, @" + +---------------------------------+ + | income | + +---------------------------------+ + | {name: Gimli, income: 10 €} | + | {name: Legolas, income: } | + | {name: Aragorn, income: 30 €} | + +---------------------------------+ + "); } #[test] @@ -1585,23 +1503,17 @@ mod tests { )])); let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap(); - let mut buf = String::new(); - write!( - &mut buf, - "{}", - pretty_format_batches_with_options(&[batch], &options).unwrap() - ) - .unwrap(); + let table = pretty_format_batches_with_options(&[batch], &options) + .unwrap() + .to_string(); - let s = [ - "+-----------------------------------------------+", - "| income |", - "+-----------------------------------------------+", - "| {Gimli: 10 €, Legolas: , Aragorn: 30 €} |", - "+-----------------------------------------------+", - ]; - let expected = s.join("\n"); - assert_eq!(expected, buf); + insta::assert_snapshot!(table, @" + +-----------------------------------------------+ + | income | + +-----------------------------------------------+ + | {Gimli: 10 €, Legolas: , Aragorn: 30 €} | + +-----------------------------------------------+ + "); } #[test] @@ -1635,23 +1547,17 @@ mod tests { // define data. let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap(); - let mut buf = String::new(); - write!( - &mut buf, - "{}", - pretty_format_batches_with_options(&[batch], &options).unwrap() - ) - .unwrap(); + let table = pretty_format_batches_with_options(&[batch], &options) + .unwrap() + .to_string(); - let s = [ - "+--------------+", - "| income |", - "+--------------+", - "| {income=1 €} |", - "+--------------+", - ]; - let expected = s.join("\n"); - assert_eq!(expected, buf); + insta::assert_snapshot!(table, @" + +--------------+ + | income | + +--------------+ + | {income=1 €} | + +--------------+ + "); } #[test] @@ -1678,37 +1584,30 @@ mod tests { ) .unwrap(); - let mut buf = String::new(); - write!( - &mut buf, - "{}", - create_table( - // No metadata compared to test_format_batches_with_custom_formatters - Some(Arc::new(Schema::new(vec![Field::new( - "income", - DataType::Int32, - true - ),]))), - &[batch], - &options, - ) - .unwrap() + let table = create_table( + // No metadata compared to test_format_batches_with_custom_formatters + Some(Arc::new(Schema::new(vec![Field::new( + "income", + DataType::Int32, + true, + )]))), + &[batch], + &options, ) - .unwrap(); + .unwrap() + .to_string(); // No € formatting as in test_format_batches_with_custom_formatters - let s = [ - "+--------------+", - "| income |", - "+--------------+", - "| 1 (32-Bit) |", - "| |", - "| 10 (32-Bit) |", - "| 100 (32-Bit) |", - "+--------------+", - ]; - let expected = s.join("\n"); - assert_eq!(expected, buf); + insta::assert_snapshot!(table, @" + +--------------+ + | income | + +--------------+ + | 1 (32-Bit) | + | | + | 10 (32-Bit) | + | 100 (32-Bit) | + +--------------+ + "); } #[test] @@ -1721,31 +1620,24 @@ mod tests { Some(100), ])); - let mut buf = String::new(); - write!( - &mut buf, - "{}", - pretty_format_columns_with_options( - "income", - &[array], - &FormatOptions::default().with_formatter_factory(Some(&TestFormatters {})) - ) - .unwrap() + let table = pretty_format_columns_with_options( + "income", + &[array], + &FormatOptions::default().with_formatter_factory(Some(&TestFormatters {})), ) - .unwrap(); + .unwrap() + .to_string(); - let s = [ - "+--------------+", - "| income |", - "+--------------+", - "| 1 (32-Bit) |", - "| |", - "| 10 (32-Bit) |", - "| 100 (32-Bit) |", - "+--------------+", - ]; - let expected = s.join("\n"); - assert_eq!(expected, buf); + insta::assert_snapshot!(table, @" + +--------------+ + | income | + +--------------+ + | 1 (32-Bit) | + | | + | 10 (32-Bit) | + | 100 (32-Bit) | + +--------------+ + "); } #[test] @@ -1771,9 +1663,6 @@ mod tests { let error = pretty_format_batches_with_schema(schema_a, &[batch]) .err() .unwrap(); - assert_eq!( - &error.to_string(), - "Invalid argument error: Expected the same number of columns in a record batch (1) as the number of fields (2) in the schema" - ); + insta::assert_snapshot!(error, @"Invalid argument error: Expected the same number of columns in a record batch (1) as the number of fields (2) in the schema"); } } diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml index fb6461a9e9ae..2991e2aa46b6 100644 --- a/arrow-schema/Cargo.toml +++ b/arrow-schema/Cargo.toml @@ -54,7 +54,7 @@ all-features = true [dev-dependencies] criterion = { workspace = true, default-features = false } -insta = "1.43.1" +insta = { workspace = true, default-features = true } postcard = { version = "1.0.10", default-features = false, features = ["use-std"] } [[bench]] diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index d1ada01c3773..75ab432cceb8 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -83,7 +83,7 @@ base64 = { version = "0.22", default-features = false, features = ["std"] } criterion = { workspace = true, default-features = false, features = ["async_futures"] } snap = { version = "1.0", default-features = false } tempfile = { version = "3.0", default-features = false } -insta = "1.43.1" +insta = { workspace = true, default-features = true } brotli = { version = "8.0", default-features = false, features = ["std"] } flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] } lz4_flex = { version = "0.12", default-features = false, features = ["std", "frame"] } From 9ec9f578fc7e1fa38534e3cf4859822c50001be5 Mon Sep 17 00:00:00 2001 From: Yan Tingwang Date: Tue, 3 Mar 2026 04:24:31 +0800 Subject: [PATCH 09/80] Deprecate ArrowTimestampType::make_value in favor of from_naive_datetime (#9491) Mark ArrowTimestampType::make_value as deprecated and migrate internal callers to the newer from_naive_datetime API. # Which issue does this PR close? - Closes #9490 . # Rationale for this change Follow-up from PR #9345. # What changes are included in this PR? Mark ArrowTimestampType::make_value as deprecated and migrate internal callers to the newer from_naive_datetime API. # Are these changes tested? YES. # Are there any user-facing changes? Migration Path: Users should replace: ```rust // Old TimestampSecondType::make_value(naive) ``` With: ```rust // New TimestampSecondType::from_naive_datetime(naive, None) ``` --- arrow-arith/src/numeric.rs | 5 ++++- arrow-array/src/types.rs | 21 +++++++------------ arrow-cast/src/cast/mod.rs | 2 +- arrow-cast/src/cast/string.rs | 4 ++-- arrow/tests/arithmetic.rs | 2 +- .../src/type_conversion.rs | 16 +++++++------- 6 files changed, 24 insertions(+), 26 deletions(-) diff --git a/arrow-arith/src/numeric.rs b/arrow-arith/src/numeric.rs index a57ba67544b7..f5a844ffd280 100644 --- a/arrow-arith/src/numeric.rs +++ b/arrow-arith/src/numeric.rs @@ -1320,7 +1320,10 @@ mod tests { "1960-01-30T04:23:20Z", ] .into_iter() - .map(|x| T::make_value(DateTime::parse_from_rfc3339(x).unwrap().naive_utc()).unwrap()) + .map(|x| { + T::from_naive_datetime(DateTime::parse_from_rfc3339(x).unwrap().naive_utc(), None) + .unwrap() + }) .collect(); let a = PrimitiveArray::::new(values, None); diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs index ff1caaacaecc..267011d8af80 100644 --- a/arrow-array/src/types.rs +++ b/arrow-array/src/types.rs @@ -324,6 +324,7 @@ pub trait ArrowTimestampType: ArrowTemporalType { /// Creates a ArrowTimestampType::Native from the provided [`NaiveDateTime`] /// /// See [`DataType::Timestamp`] for more information on timezone handling + #[deprecated(since = "58.1.0", note = "Use from_naive_datetime instead")] fn make_value(naive: NaiveDateTime) -> Option; /// Creates a timestamp value from a [`DateTime`] in any timezone. @@ -350,7 +351,7 @@ pub trait ArrowTimestampType: ArrowTemporalType { chrono::offset::LocalResult::Ambiguous(dt1, _) => Self::from_datetime(dt1), chrono::offset::LocalResult::None => None, }, - None => Self::make_value(naive), + None => Self::from_datetime(naive.and_utc()), } } } @@ -416,8 +417,7 @@ fn add_year_months( let months = IntervalYearMonthType::to_months(delta); let res = as_datetime_with_timezone::(timestamp, tz)?; let res = add_months_datetime(res, months)?; - let res = res.naive_utc(); - T::make_value(res) + T::from_naive_datetime(res.naive_utc(), None) } fn add_day_time( @@ -429,8 +429,7 @@ fn add_day_time( let res = as_datetime_with_timezone::(timestamp, tz)?; let res = add_days_datetime(res, days)?; let res = res.checked_add_signed(Duration::try_milliseconds(ms as i64)?)?; - let res = res.naive_utc(); - T::make_value(res) + T::from_naive_datetime(res.naive_utc(), None) } fn add_month_day_nano( @@ -443,8 +442,7 @@ fn add_month_day_nano( let res = add_months_datetime(res, months)?; let res = add_days_datetime(res, days)?; let res = res.checked_add_signed(Duration::nanoseconds(nanos))?; - let res = res.naive_utc(); - T::make_value(res) + T::from_naive_datetime(res.naive_utc(), None) } fn subtract_year_months( @@ -455,8 +453,7 @@ fn subtract_year_months( let months = IntervalYearMonthType::to_months(delta); let res = as_datetime_with_timezone::(timestamp, tz)?; let res = sub_months_datetime(res, months)?; - let res = res.naive_utc(); - T::make_value(res) + T::from_naive_datetime(res.naive_utc(), None) } fn subtract_day_time( @@ -468,8 +465,7 @@ fn subtract_day_time( let res = as_datetime_with_timezone::(timestamp, tz)?; let res = sub_days_datetime(res, days)?; let res = res.checked_sub_signed(Duration::try_milliseconds(ms as i64)?)?; - let res = res.naive_utc(); - T::make_value(res) + T::from_naive_datetime(res.naive_utc(), None) } fn subtract_month_day_nano( @@ -482,8 +478,7 @@ fn subtract_month_day_nano( let res = sub_months_datetime(res, months)?; let res = sub_days_datetime(res, days)?; let res = res.checked_sub_signed(Duration::nanoseconds(nanos))?; - let res = res.naive_utc(); - T::make_value(res) + T::from_naive_datetime(res.naive_utc(), None) } impl TimestampSecondType { diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 67efb5742485..9f1eba1057fd 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -2507,7 +2507,7 @@ fn adjust_timestamp_to_timezone( let adjust = |o| { let local = as_datetime::(o)?; let offset = to_tz.offset_from_local_datetime(&local).single()?; - T::make_value(local - offset.fix()) + T::from_naive_datetime(local - offset.fix(), None) }; let adjusted = if cast_options.safe { array.unary_opt::<_, Int64Type>(adjust) diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs index 77696ae0d8cc..68fce85cb436 100644 --- a/arrow-cast/src/cast/string.rs +++ b/arrow-cast/src/cast/string.rs @@ -168,7 +168,7 @@ fn cast_string_to_timestamp_impl< let iter = iter.map(|v| { v.and_then(|v| { let naive = string_to_datetime(tz, v).ok()?.naive_utc(); - T::make_value(naive) + T::from_naive_datetime(naive, None) }) }); // Benefit: @@ -182,7 +182,7 @@ fn cast_string_to_timestamp_impl< .map(|v| { v.map(|v| { let naive = string_to_datetime(tz, v)?.naive_utc(); - T::make_value(naive).ok_or_else(|| match T::UNIT { + T::from_naive_datetime(naive, None).ok_or_else(|| match T::UNIT { TimeUnit::Nanosecond => ArrowError::CastError(format!( "Overflow converting {naive} to Nanosecond. The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804" )), diff --git a/arrow/tests/arithmetic.rs b/arrow/tests/arithmetic.rs index cc6a97e123f8..5d024f715a1e 100644 --- a/arrow/tests/arithmetic.rs +++ b/arrow/tests/arithmetic.rs @@ -76,7 +76,7 @@ fn test_timestamp_with_timezone_impl(tz_str: &str) { .naive_utc(), ] .into_iter() - .map(|x| T::make_value(x).unwrap()) + .map(|x| T::from_naive_datetime(x, None).unwrap()) .collect(); let a = PrimitiveArray::::new(values, None).with_timezone(tz_str); diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 6a0a743c9029..42bac5727aa5 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -109,7 +109,7 @@ impl_timestamp_from_variant!( if timestamp.nanosecond() != 0 { None } else { - Self::make_value(timestamp) + Self::from_naive_datetime(timestamp, None) } } ); @@ -122,7 +122,7 @@ impl_timestamp_from_variant!( if timestamp.nanosecond() != 0 { None } else { - Self::make_value(timestamp.naive_utc()) + Self::from_naive_datetime(timestamp.naive_utc(), None) } } ); @@ -135,7 +135,7 @@ impl_timestamp_from_variant!( if timestamp.nanosecond() % 1_000_000 != 0 { None } else { - Self::make_value(timestamp) + Self::from_naive_datetime(timestamp, None) } } ); @@ -148,7 +148,7 @@ impl_timestamp_from_variant!( if timestamp.nanosecond() % 1_000_000 != 0 { None } else { - Self::make_value(timestamp.naive_utc()) + Self::from_naive_datetime(timestamp.naive_utc(), None) } } ); @@ -156,25 +156,25 @@ impl_timestamp_from_variant!( datatypes::TimestampMicrosecondType, as_timestamp_ntz_micros, ntz = true, - Self::make_value, + |timestamp| Self::from_naive_datetime(timestamp, None), ); impl_timestamp_from_variant!( datatypes::TimestampMicrosecondType, as_timestamp_micros, ntz = false, - |timestamp| Self::make_value(timestamp.naive_utc()) + |timestamp| Self::from_naive_datetime(timestamp.naive_utc(), None) ); impl_timestamp_from_variant!( datatypes::TimestampNanosecondType, as_timestamp_ntz_nanos, ntz = true, - Self::make_value + |timestamp| Self::from_naive_datetime(timestamp, None) ); impl_timestamp_from_variant!( datatypes::TimestampNanosecondType, as_timestamp_nanos, ntz = false, - |timestamp| Self::make_value(timestamp.naive_utc()) + |timestamp| Self::from_naive_datetime(timestamp.naive_utc(), None) ); /// Returns the unscaled integer representation for Arrow decimal type `O` From 4d8e8baed0a712f875d7ee83536be2c983261631 Mon Sep 17 00:00:00 2001 From: Yan Tingwang Date: Tue, 3 Mar 2026 05:48:19 +0800 Subject: [PATCH 10/80] chore: remove duplicate macro `partially_shredded_variant_array_gen` (#9498) # Which issue does this PR close? - Closes #9492 . # What changes are included in this PR? See title. # Are these changes tested? YES # Are there any user-facing changes? NO --- parquet-variant-compute/src/variant_get.rs | 48 +--------------------- 1 file changed, 2 insertions(+), 46 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index f9985084cc49..e02518057be1 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -466,6 +466,8 @@ mod test { macro_rules! partially_shredded_variant_array_gen { ($func_name:ident, $typed_value_array_gen: expr) => { fn $func_name() -> ArrayRef { + // At the time of writing, the `VariantArrayBuilder` does not support shredding. + // so we must construct the array manually. see https://github.com/apache/arrow-rs/issues/7895 let (metadata, string_value) = { let mut builder = parquet_variant::VariantBuilder::new(); builder.append_value("n/a"); @@ -1674,52 +1676,6 @@ mod test { }; } - macro_rules! partially_shredded_variant_array_gen { - ($func:ident, $typed_array_gen: expr) => { - fn $func() -> ArrayRef { - // At the time of writing, the `VariantArrayBuilder` does not support shredding. - // so we must construct the array manually. see https://github.com/apache/arrow-rs/issues/7895 - let (metadata, string_value) = { - let mut builder = parquet_variant::VariantBuilder::new(); - builder.append_value("n/a"); - builder.finish() - }; - - let nulls = NullBuffer::from(vec![ - true, // row 0 non null - false, // row 1 is null - true, // row 2 non null - true, // row 3 non null - ]); - - // metadata is the same for all rows - let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4)); - - // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY - // about why row1 is an empty but non null, value. - let values = BinaryViewArray::from(vec![ - None, // row 0 is shredded, so no value - Some(b"" as &[u8]), // row 1 is null, so empty value (why?) - Some(&string_value), // copy the string value "N/A" - None, // row 3 is shredded, so no value - ]); - - let typed_value = $typed_array_gen(); - - let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata), false) - .with_field("typed_value", Arc::new(typed_value), true) - .with_field("value", Arc::new(values), true) - .with_nulls(nulls) - .build(); - - ArrayRef::from( - VariantArray::try_new(&struct_array).expect("should create variant array"), - ) - } - }; - } - numeric_partially_shredded_variant_array_fn!( partially_shredded_int8_variant_array, Int8Array, From d99043e3c3a30f283cc2b3332770f8e65e8d9d8e Mon Sep 17 00:00:00 2001 From: Congxian Qiu Date: Tue, 3 Mar 2026 05:49:08 +0800 Subject: [PATCH 11/80] [Variant] Enahcne bracket access for VariantPath (#9479) # Which issue does this PR close? - Closes #9478 . # What changes are included in this PR? - Fix the typo - Enhance the bracket access for the variant path # Are these changes tested? - Add some tests to cover the logic # Are there any user-facing changes? No --- parquet-variant/src/path.rs | 33 +++++++++++++++++++++++++++++---- parquet-variant/src/utils.rs | 25 ++++++++++++++++++++----- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs index fe10d0451d54..8e68d9efadf2 100644 --- a/parquet-variant/src/path.rs +++ b/parquet-variant/src/path.rs @@ -75,14 +75,15 @@ use std::{borrow::Cow, ops::Deref}; /// assert_eq!(path[1], VariantPathElement::field("bar")); /// ``` /// -/// # Example: Accessing filed with bracket +/// # Example: Accessing field with bracket /// ``` /// # use parquet_variant::{VariantPath, VariantPathElement}; -/// let path = VariantPath::try_from("a[b.c].d[2]").unwrap(); +/// let path = VariantPath::try_from("a['b.c'].d[2]['3']").unwrap(); /// let expected = VariantPath::from_iter([VariantPathElement::field("a"), /// VariantPathElement::field("b.c"), /// VariantPathElement::field("d"), -/// VariantPathElement::index(2)]); +/// VariantPathElement::index(2), +/// VariantPathElement::field("3")]); /// assert_eq!(path, expected) #[derive(Debug, Clone, PartialEq, Default)] pub struct VariantPath<'a>(Vec>); @@ -287,11 +288,22 @@ mod tests { assert_eq!(path, expected); // invalid index will be treated as field - let path = VariantPath::try_from("foo.bar[abc]").unwrap(); + let path = VariantPath::try_from("foo.bar['abc'][\"def\"]").unwrap(); let expected = VariantPath::from_iter([ VariantPathElement::field("foo"), VariantPathElement::field("bar"), VariantPathElement::field("abc"), + VariantPathElement::field("def"), + ]); + assert_eq!(path, expected); + + // a number quoted with `'` is treated as field, not index + let path = VariantPath::try_from("foo['0'].bar[\"1\"]").unwrap(); + let expected = VariantPath::from_iter([ + VariantPathElement::field("foo"), + VariantPathElement::field("0"), + VariantPathElement::field("bar"), + VariantPathElement::field("1"), ]); assert_eq!(path, expected); } @@ -321,5 +333,18 @@ mod tests { // No '[' before ']' let err = VariantPath::try_from("foo.bar]baz").unwrap_err(); assert_eq!(err.to_string(), "Parser error: Unexpected ']' at byte 7"); + + // Invalid number(without quote) parse + let err = VariantPath::try_from("foo.bar[123abc]").unwrap_err(); + assert_eq!( + err.to_string(), + "Parser error: Invalid token in bracket request: `123abc`. Expected a quoted string or a number(e.g., `['field']` or `[123]`)" + ); + + let err = VariantPath::try_from("foo.bar[abc]").unwrap_err(); + assert_eq!( + err.to_string(), + "Parser error: Invalid token in bracket request: `abc`. Expected a quoted string or a number(e.g., `['field']` or `[123]`)" + ); } } diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs index 0984a601b213..85d79ed8aea0 100644 --- a/parquet-variant/src/utils.rs +++ b/parquet-variant/src/utils.rs @@ -170,9 +170,10 @@ pub(crate) fn fits_precision(n: impl Into) -> bool { /// - `"foo"` -> single field `foo` /// - `"foo.bar"` -> nested fields `foo`, `bar` /// - `"[1]"` -> array index 1 +/// - `"['1']"` or `"["1"]"`-> field `1` /// - `"foo[1].bar"` -> field `foo`, index 1, field `bar` -/// - `"[a.b]"` -> field `a.b` (dot is literal inside bracket) -/// - `"[a\\]b]"` -> field `a]b` (escaped `]` +/// - `"['a.b']"` -> field `a.b` (dot is literal inside bracket) +/// - `"['a\]b']"` -> field `a]b` (escaped `]` /// - etc. /// /// # Errors @@ -267,9 +268,23 @@ fn parse_in_bracket(s: &str, i: usize) -> Result<(VariantPathElement<'_>, usize) } }; - let element = match unescaped.parse() { - Ok(idx) => VariantPathElement::index(idx), - Err(_) => VariantPathElement::field(unescaped), + let element = if let Some(inner) = unescaped + .strip_prefix('\'') + .and_then(|s| s.strip_suffix('\'')) + .or_else(|| { + unescaped + .strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + }) { + // Quoted field name, e.g., ['field'] or ['123'] or ["123"] + VariantPathElement::field(inner.to_string()) + } else { + let Ok(idx) = unescaped.parse() else { + return Err(ArrowError::ParseError(format!( + "Invalid token in bracket request: `{unescaped}`. Expected a quoted string or a number(e.g., `['field']` or `[123]`)" + ))); + }; + VariantPathElement::index(idx) }; Ok((element, end + 1)) From 73a516e3bc9d3850f16b66d6cb65d01e6b080c97 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Mon, 2 Mar 2026 16:49:56 -0500 Subject: [PATCH 12/80] Move `ListLikeArray` to arrow-array to be shared with json writer and parquet unshredding (#9437) # Which issue does this PR close? - Part of #9340. # Rationale for this change Json writers for ListLike types (List/ListView/FixedSizeList) are pretty similar apart from the element range representation. We already had a good way to abstract this kind of encoder in parquet variant unshredding. Given this, it would be good to move this `ListLikeArray` trait to arrow-array to be shared with json/parquet # What changes are included in this PR? Move `ListLikeArray` trait from parquet-variant-compute to arrow-array # Are these changes tested? Covered by existing tests # Are there any user-facing changes? New pub trait in arrow-array --- .../src/array/fixed_size_list_array.rs | 12 +++++ arrow-array/src/array/list_array.rs | 13 +++++ arrow-array/src/array/list_view_array.rs | 12 +++++ arrow-array/src/array/mod.rs | 15 ++++++ .../src/arrow_to_variant.rs | 53 +------------------ parquet-variant-compute/src/shred_variant.rs | 4 +- .../src/unshred_variant.rs | 4 +- 7 files changed, 58 insertions(+), 55 deletions(-) diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index ce75855c6815..a3db33d61b56 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -530,6 +530,18 @@ unsafe impl Array for FixedSizeListArray { } } +impl super::ListLikeArray for FixedSizeListArray { + fn values(&self) -> &ArrayRef { + self.values() + } + + fn element_range(&self, index: usize) -> std::ops::Range { + let value_length = self.value_length().as_usize(); + let offset = index * value_length; + offset..(offset + value_length) + } +} + impl ArrayAccessor for FixedSizeListArray { type Item = ArrayRef; diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index e4c603e0d921..d9613c6809ac 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -622,6 +622,19 @@ unsafe impl Array for GenericListArray } } +impl super::ListLikeArray for GenericListArray { + fn values(&self) -> &ArrayRef { + self.values() + } + + fn element_range(&self, index: usize) -> std::ops::Range { + let offsets = self.offsets(); + let start = offsets[index].as_usize(); + let end = offsets[index + 1].as_usize(); + start..end + } +} + impl ArrayAccessor for &GenericListArray { type Item = ArrayRef; diff --git a/arrow-array/src/array/list_view_array.rs b/arrow-array/src/array/list_view_array.rs index b8d427d829c8..eda3be11ac39 100644 --- a/arrow-array/src/array/list_view_array.rs +++ b/arrow-array/src/array/list_view_array.rs @@ -488,6 +488,18 @@ unsafe impl Array for GenericListViewArray super::ListLikeArray for GenericListViewArray { + fn values(&self) -> &ArrayRef { + self.values() + } + + fn element_range(&self, index: usize) -> std::ops::Range { + let offset = self.value_offsets()[index].as_usize(); + let size = self.value_sizes()[index].as_usize(); + offset..(offset + size) + } +} + impl std::fmt::Debug for GenericListViewArray { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let prefix = OffsetSize::PREFIX; diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index 0d8125a2a1db..ca3a02577f47 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -667,6 +667,21 @@ impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray { } } +/// A trait for Arrow list-like arrays, abstracting over +/// [`GenericListArray`], [`GenericListViewArray`], and [`FixedSizeListArray`]. +/// +/// This trait provides a uniform interface for accessing the child values and +/// computing the element range for a given index, regardless of the underlying +/// list layout (offsets, offsets+sizes, or fixed-size). +pub trait ListLikeArray: Array { + /// Returns the child values array. + fn values(&self) -> &ArrayRef; + + /// Returns the start and end indices into the values array for the list + /// element at `index`. + fn element_range(&self, index: usize) -> std::ops::Range; +} + impl PartialEq for dyn Array + '_ { fn eq(&self, other: &Self) -> bool { self.to_data().eq(&other.to_data()) diff --git a/parquet-variant-compute/src/arrow_to_variant.rs b/parquet-variant-compute/src/arrow_to_variant.rs index be241a9a4e00..03a84109ffa0 100644 --- a/parquet-variant-compute/src/arrow_to_variant.rs +++ b/parquet-variant-compute/src/arrow_to_variant.rs @@ -16,8 +16,8 @@ // under the License. use arrow::array::{ - Array, ArrayRef, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, - GenericListViewArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray, + Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray, + GenericStringArray, ListLikeArray, OffsetSizeTrait, PrimitiveArray, }; use arrow::compute::{CastOptions, kernels::cast}; use arrow::datatypes::{ @@ -32,7 +32,6 @@ use parquet_variant::{ VariantDecimal16, VariantDecimalType, }; use std::collections::HashMap; -use std::ops::Range; // ============================================================================ // Row-oriented builders for efficient Arrow-to-Variant conversion @@ -552,54 +551,6 @@ impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> { } } -/// Trait for list-like arrays that can provide element ranges -pub(crate) trait ListLikeArray: Array { - /// Get the values array - fn values(&self) -> &ArrayRef; - - /// Get the start and end indices for a list element - fn element_range(&self, index: usize) -> Range; -} - -impl ListLikeArray for GenericListArray { - fn values(&self) -> &ArrayRef { - self.values() - } - - fn element_range(&self, index: usize) -> Range { - let offsets = self.offsets(); - let start = offsets[index].as_usize(); - let end = offsets[index + 1].as_usize(); - start..end - } -} - -impl ListLikeArray for GenericListViewArray { - fn values(&self) -> &ArrayRef { - self.values() - } - - fn element_range(&self, index: usize) -> Range { - let offsets = self.value_offsets(); - let sizes = self.value_sizes(); - let offset = offsets[index].as_usize(); - let size = sizes[index].as_usize(); - offset..(offset + size) - } -} - -impl ListLikeArray for FixedSizeListArray { - fn values(&self) -> &ArrayRef { - self.values() - } - - fn element_range(&self, index: usize) -> Range { - let value_length = self.value_length().as_usize(); - let offset = index * value_length; - offset..(offset + value_length) - } -} - /// Struct builder for StructArray pub(crate) struct StructArrowToVariantBuilder<'a> { struct_array: &'a arrow::array::StructArray, diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index c60c602baa37..6fa3a930fc37 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -652,10 +652,10 @@ impl VariantSchemaNode { mod tests { use super::*; use crate::VariantArrayBuilder; - use crate::arrow_to_variant::ListLikeArray; use arrow::array::{ Array, BinaryViewArray, FixedSizeBinaryArray, Float64Array, GenericListArray, - GenericListViewArray, Int64Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray, + GenericListViewArray, Int64Array, ListArray, ListLikeArray, OffsetSizeTrait, + PrimitiveArray, StringArray, }; use arrow::datatypes::{ ArrowPrimitiveType, DataType, Field, Fields, Int64Type, TimeUnit, UnionFields, UnionMode, diff --git a/parquet-variant-compute/src/unshred_variant.rs b/parquet-variant-compute/src/unshred_variant.rs index 37363fd9d085..3600662915a5 100644 --- a/parquet-variant-compute/src/unshred_variant.rs +++ b/parquet-variant-compute/src/unshred_variant.rs @@ -17,11 +17,11 @@ //! Module for unshredding VariantArray by folding typed_value columns back into the value column. -use crate::arrow_to_variant::ListLikeArray; use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder}; use arrow::array::{ Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, FixedSizeListArray, - GenericListArray, GenericListViewArray, PrimitiveArray, StringArray, StructArray, + GenericListArray, GenericListViewArray, ListLikeArray, PrimitiveArray, StringArray, + StructArray, }; use arrow::buffer::NullBuffer; use arrow::datatypes::{ From 01d34a8bee7fae52afd167469ef9e75ff9533309 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 2 Mar 2026 22:50:41 +0100 Subject: [PATCH 13/80] Add `append_value_n` to GenericByteBuilder (#9426) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Which issue does this PR close? - Closes #9425. # Rationale for this change I noticed that this method is available on PrimitiveTypeBuilder, but missing on the GenericByteBuilder, which make sense since the gain is less, but after benchmarking, it shows a solid 10%. Mostly because the more efficient allocation of the null-mask. ``` ┌───────────────────┬────────────────┬───────────────────┬─────────┐ │ Benchmark │ append_value_n │ append_value loop │ Speedup │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100/len=5 │ 371 ns │ 408 ns │ 10% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100/len=30 │ 456 ns │ 507 ns │ 10% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100/len=1024 │ 1.81 µs │ 1.95 µs │ 8% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=1000/len=5 │ 2.39 µs │ 2.87 µs │ 17% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=1000/len=30 │ 3.41 µs │ 3.89 µs │ 12% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=1000/len=1024 │ 12.3 µs │ 14.4 µs │ 15% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=10000/len=5 │ 23.8 µs │ 29.3 µs │ 19% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=10000/len=30 │ 33.7 µs │ 39.0 µs │ 14% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=10000/len=1024 │ 115.9 µs │ 135.0 µs │ 14% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100000/len=5 │ 227.5 µs │ 278.6 µs │ 18% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100000/len=30 │ 328.1 µs │ 377.9 µs │ 13% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100000/len=1024 │ 1.16 ms │ 1.34 ms │ 14% │ └───────────────────┴────────────────┴───────────────────┴─────────┘ ``` I think this is still worthwhile to be added. Let me know what the community thinks! # What changes are included in this PR? A new public API. # Are these changes tested? Yes! # Are there any user-facing changes? A new public API. --- .../src/builder/generic_bytes_builder.rs | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/arrow-array/src/builder/generic_bytes_builder.rs b/arrow-array/src/builder/generic_bytes_builder.rs index 7ed4bc5826c0..0a83ff989d4d 100644 --- a/arrow-array/src/builder/generic_bytes_builder.rs +++ b/arrow-array/src/builder/generic_bytes_builder.rs @@ -110,6 +110,21 @@ impl GenericByteBuilder { self.offsets_builder.push(self.next_offset()); } + /// Appends a value of type `T` into the builder `n` times. + /// + /// See [`Self::append_value`] for more panic information. + #[inline] + pub fn append_value_n(&mut self, value: impl AsRef, n: usize) { + let bytes: &[u8] = value.as_ref().as_ref(); + self.value_builder.reserve(bytes.len() * n); + self.offsets_builder.reserve(n); + for _ in 0..n { + self.value_builder.extend_from_slice(bytes); + self.offsets_builder.push(self.next_offset()); + } + self.null_buffer_builder.append_n_non_nulls(n); + } + /// Append an `Option` value into the builder. /// /// - A `None` value will append a null value. @@ -939,4 +954,21 @@ mod tests { assert!(matches!(result, Err(ArrowError::OffsetOverflowError(_)))); } + + #[test] + fn test_append_value_n() { + let mut builder = GenericStringBuilder::::new(); + builder.append_value("hello"); + builder.append_value_n("world", 3); + builder.append_null(); + let array = builder.finish(); + + assert_eq!(5, array.len()); + assert_eq!(1, array.null_count()); + assert_eq!("hello", array.value(0)); + assert_eq!("world", array.value(1)); + assert_eq!("world", array.value(2)); + assert_eq!("world", array.value(3)); + assert!(array.is_null(4)); + } } From bee4595c13665b9dfbd2da3dd0232423a4f2b3c9 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 2 Mar 2026 22:51:03 +0100 Subject: [PATCH 14/80] Add `append_nulls` to `MapBuilder` (#9432) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Which issue does this PR close? Closes #9431 # Rationale for this change It would be nice to add `append_nulls` to MapBuilder, similar to `append_nulls` on `GenericListBuilder`. Appending the nulls at once, instead of using a loop has some nice performance implications: ``` Benchmark results (1,000,000 nulls): ┌─────────────────────────┬─────────┐ │ Method │ Time │ ├─────────────────────────┼─────────┤ │ append(false) in a loop │ 2.36 ms │ ├─────────────────────────┼─────────┤ │ append_nulls(N) │ 50 µs │ └─────────────────────────┴─────────┘ ``` # What changes are included in this PR? A new public API. # Are these changes tested? With some fresh unit tests. # Are there any user-facing changes? A nice and convient new public API --- arrow-array/src/builder/map_builder.rs | 63 ++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index b70d4b73880b..5ff1625b4992 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -154,11 +154,9 @@ impl MapBuilder { (&mut self.key_builder, &mut self.value_builder) } - /// Finish the current map array slot - /// - /// Returns an error if the key and values builders are in an inconsistent state. + /// Validates that key and value builders have equal lengths. #[inline] - pub fn append(&mut self, is_valid: bool) -> Result<(), ArrowError> { + fn validate_equal_lengths(&self) -> Result<(), ArrowError> { if self.key_builder.len() != self.value_builder.len() { return Err(ArrowError::InvalidArgumentError(format!( "Cannot append to a map builder when its keys and values have unequal lengths of {} and {}", @@ -166,11 +164,32 @@ impl MapBuilder { self.value_builder.len() ))); } + Ok(()) + } + + /// Finish the current map array slot + /// + /// Returns an error if the key and values builders are in an inconsistent state. + #[inline] + pub fn append(&mut self, is_valid: bool) -> Result<(), ArrowError> { + self.validate_equal_lengths()?; self.offsets_builder.push(self.key_builder.len() as i32); self.null_buffer_builder.append(is_valid); Ok(()) } + /// Append `n` nulls to this [`MapBuilder`] + /// + /// Returns an error if the key and values builders are in an inconsistent state. + #[inline] + pub fn append_nulls(&mut self, n: usize) -> Result<(), ArrowError> { + self.validate_equal_lengths()?; + let offset = self.key_builder.len() as i32; + self.offsets_builder.extend(std::iter::repeat_n(offset, n)); + self.null_buffer_builder.append_n_nulls(n); + Ok(()) + } + /// Builds the [`MapArray`] pub fn finish(&mut self) -> MapArray { let len = self.len(); @@ -436,6 +455,42 @@ mod tests { ); } + #[test] + fn test_append_nulls() { + let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()); + + builder.keys().append_value(1); + builder.values().append_value(100); + builder.append(true).unwrap(); + + builder.append_nulls(3).unwrap(); + + builder.keys().append_value(2); + builder.values().append_value(200); + builder.append(true).unwrap(); + + let map = builder.finish(); + assert_eq!(map.len(), 5); + assert_eq!(map.null_count(), 3); + assert!(map.is_valid(0)); + assert!(map.is_null(1)); + assert!(map.is_null(2)); + assert!(map.is_null(3)); + assert!(map.is_valid(4)); + assert_eq!(map.value_offsets(), &[0, 1, 1, 1, 1, 2]); + } + + #[test] + fn test_append_nulls_inconsistent_state() { + let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()); + // Add a key without a matching value + builder.keys().append_value(1); + + let result = builder.append_nulls(2); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("unequal lengths")); + } + #[test] #[should_panic(expected = "Keys field must not be nullable")] fn test_with_nullable_keys_field() { From e4b68e6f82e41d3f06182e39723183c28e47afa4 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 2 Mar 2026 22:51:19 +0100 Subject: [PATCH 15/80] Add `append_non_nulls` to `StructBuilder` (#9430) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Which issue does this PR close? - Closes #9429 I'm doing some performance optimization, and noticed that we have a loop adding one value to the null mask at a time. Instead, I'd suggest adding `append_non_nulls` to do this at once. ``` append_non_nulls(n) vs append(true) in a loop (with bitmap allocated) ┌───────────┬───────────────────┬─────────────────────┬─────────┐ │ n │ append(true) loop │ append_non_nulls(n) │ speedup │ ├───────────┼───────────────────┼─────────────────────┼─────────┤ │ 100 │ 251 ns │ 73 ns │ ~3x │ ├───────────┼───────────────────┼─────────────────────┼─────────┤ │ 1,000 │ 2.0 µs │ 94 ns │ ~21x │ ├───────────┼───────────────────┼─────────────────────┼─────────┤ │ 10,000 │ 19.3 µs │ 119 ns │ ~162x │ ├───────────┼───────────────────┼─────────────────────┼─────────┤ │ 100,000 │ 191 µs │ 348 ns │ ~549x │ ├───────────┼───────────────────┼─────────────────────┼─────────┤ │ 1,000,000 │ 1.90 ms │ 3.5 µs │ ~543x │ └───────────┴───────────────────┴─────────────────────┴─────────┘ ``` # Rationale for this change It adds a new public API in favor of performance improvements. # What changes are included in this PR? A new public API # Are these changes tested? Yes, with new unit-tests. # Are there any user-facing changes? Just a new convient API. --- arrow-array/src/builder/struct_builder.rs | 62 +++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index ad58e008572f..795593c98a8a 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -213,6 +213,12 @@ impl StructBuilder { self.null_buffer_builder.append(is_valid); } + /// Appends `n` non-null entries into the builder. + #[inline] + pub fn append_non_nulls(&mut self, n: usize) { + self.null_buffer_builder.append_n_non_nulls(n); + } + /// Appends a null element to the struct. #[inline] pub fn append_null(&mut self) { @@ -727,4 +733,60 @@ mod tests { assert!(a1.is_valid(0)); assert!(a1.is_null(1)); } + + #[test] + fn test_append_non_nulls() { + let int_builder = Int32Builder::new(); + let fields = vec![Field::new("f1", DataType::Int32, false)]; + let field_builders = vec![Box::new(int_builder) as Box]; + + let mut builder = StructBuilder::new(fields, field_builders); + builder + .field_builder::(0) + .unwrap() + .append_slice(&[1, 2, 3, 4, 5]); + builder.append_non_nulls(5); + + let arr = builder.finish(); + assert_eq!(arr.len(), 5); + assert_eq!(arr.null_count(), 0); + for i in 0..5 { + assert!(arr.is_valid(i)); + } + } + + #[test] + fn test_append_non_nulls_with_nulls() { + let mut builder = StructBuilder::new(Fields::empty(), vec![]); + builder.append_null(); + builder.append_non_nulls(3); + builder.append_nulls(2); + builder.append_non_nulls(1); + + let arr = builder.finish(); + assert_eq!(arr.len(), 7); + assert_eq!(arr.null_count(), 3); + assert!(arr.is_null(0)); + assert!(arr.is_valid(1)); + assert!(arr.is_valid(2)); + assert!(arr.is_valid(3)); + assert!(arr.is_null(4)); + assert!(arr.is_null(5)); + assert!(arr.is_valid(6)); + } + + #[test] + fn test_append_non_nulls_zero() { + let mut builder = StructBuilder::new(Fields::empty(), vec![]); + builder.append_non_nulls(0); + assert_eq!(builder.len(), 0); + + builder.append(true); + builder.append_non_nulls(0); + assert_eq!(builder.len(), 1); + + let arr = builder.finish(); + assert_eq!(arr.len(), 1); + assert_eq!(arr.null_count(), 0); + } } From 5025e6825971c7618532515b572026c61f8589b8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Mar 2026 19:22:37 -0700 Subject: [PATCH 16/80] Update strum_macros requirement from 0.27 to 0.28 (#9471) Updates the requirements on [strum_macros](https://github.com/Peternator7/strum) to permit the latest version.
Changelog

Sourced from strum_macros's changelog.

0.28.0

  • #461: Allow any kind of passthrough attributes on EnumDiscriminants.

    • Previously only list-style attributes (e.g. #[strum_discriminants(derive(...))]) were supported. Now path-only (e.g. #[strum_discriminants(non_exhaustive)]) and name/value (e.g. #[strum_discriminants(doc = "foo")]) attributes are also supported.
  • #462: Add missing #[automatically_derived] to generated impls not covered by #444.

  • #466: Bump MSRV to 1.71, required to keep up with updated syn and windows-sys dependencies. This is a breaking change if you're on an old version of rust.

  • #469: Use absolute paths in generated proc macro code to avoid potential name conflicts.

  • #465: Upgrade phf dependency to v0.13.

  • #473: Fix cargo fmt / clippy issues and add GitHub Actions CI.

  • #477: strum::ParseError now implements core::fmt::Display instead std::fmt::Display to make it #[no_std] compatible. Note the Error trait wasn't available in core until 1.81 so strum::ParseError still only implements that in std.

  • #476: Breaking Change - EnumString now implements From<&str> (infallible) instead of TryFrom<&str> when the enum has a #[strum(default)] variant. This more accurately reflects that parsing cannot fail in that case. If you need the old TryFrom behavior, you can opt back in using parse_error_ty and parse_error_fn:

    #[derive(EnumString)]
    #[strum(parse_error_ty = strum::ParseError, parse_error_fn =
    make_error)]
    pub enum Color {
        Red,
        #[strum(default)]
        Other(String),
    }
    

    fn make_error(x: &str) -> strum::ParseError { strum::ParseError::VariantNotFound }

  • #431: Fix bug where EnumString ignored the parse_err_ty attribute when the enum had a #[strum(default)] variant.

  • #474: EnumDiscriminants will now copy default over from the original enum to the Discriminant enum.

    #[derive(Debug, Default, EnumDiscriminants)]
    #[strum_discriminants(derive(Default))] // <- Remove this in 0.28.
    enum MyEnum {
        #[default] // <- Will be the #[default] on the MyEnumDiscriminant
        #[strum_discriminants(default)] // <- Remove this in 0.28
        Variant0,
        Variant1 { a: NonDefault },
    }
    

... (truncated)

Commits
  • 7376771 Peternator7/0.28 (#475)
  • 26e63cd Display exists in core (#477)
  • 9334c72 Make TryFrom and FromStr infallible if there's a default (#476)
  • 0ccbbf8 Honor parse_err_ty attribute when the enum has a default variant (#431)
  • 2c9e5a9 Automatically add Default implementation to EnumDiscriminant if it exists on ...
  • e241243 Fix existing cargo fmt + clippy issues and add GH actions (#473)
  • 639b67f feat: allow any kind of passthrough attributes on EnumDiscriminants (#461)
  • 0ea1e2d docs: Fix typo (#463)
  • 36c051b Upgrade phf to v0.13 (#465)
  • 9328b38 Use absolute paths in proc macro (#469)
  • Additional commits viewable in compare view

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- arrow-avro/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-avro/Cargo.toml b/arrow-avro/Cargo.toml index b7cd7eeb1984..93eb825f9a7b 100644 --- a/arrow-avro/Cargo.toml +++ b/arrow-avro/Cargo.toml @@ -70,7 +70,7 @@ zstd = { version = "0.13", default-features = false, optional = true } bzip2 = { version = "0.6.0", optional = true } xz = { package = "liblzma", version = "0.4", default-features = false, optional = true } crc = { version = "3.0", optional = true } -strum_macros = "0.27" +strum_macros = "0.28" uuid = "1.17" indexmap = "2.10" rand = "0.9" From 8c89814ef12be9603eee6aa6edeacedef0a6c5a3 Mon Sep 17 00:00:00 2001 From: Mikhail Zabaluev Date: Thu, 5 Mar 2026 01:56:08 +0200 Subject: [PATCH 17/80] refactor: simplify dynamic state for Avro record projection (#9419) # Rationale for this change The inner loop in `Projector::project_record` gives the optimizer somewhat complicated dynamic data to branch through. The sparse arrays in `Projector` are redundantly coded: `None` in the index positions of `writer_to_reader` must match `Some` in `skip_decoders` and vice versa. # What changes are included in this PR? Refactor record projection state with a single array of directive-like enums corresponding to each writer schema field. # Are these changes tested? Added a benchmark for record projection (the benchmark code is partially shared with #9397). Somewhat counterintuitively for me, it does not show improvement on a more complex case with a mix of projected fields, but does improve the simpler one-field projection cases. Passes the existing tests. --- arrow-avro/benches/project_record.rs | 65 ++++++++++++++-- arrow-avro/src/codec.rs | 74 ++++++++++-------- arrow-avro/src/reader/record.rs | 108 ++++++++++++--------------- 3 files changed, 149 insertions(+), 98 deletions(-) diff --git a/arrow-avro/benches/project_record.rs b/arrow-avro/benches/project_record.rs index 9bddfea93bb8..91bece6d7e21 100644 --- a/arrow-avro/benches/project_record.rs +++ b/arrow-avro/benches/project_record.rs @@ -121,7 +121,22 @@ fn gen_double(mut rng: impl Rng, sc: &ApacheSchema, n: usize, prefix: &[u8]) -> ) } -const READER_SCHEMA: &str = r#" +fn gen_mixed(mut rng: impl Rng, sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec { + encode_records_with_prefix( + sc, + prefix, + (0..n).map(|i| { + Value::Record(vec![ + ("f1".into(), Value::Int(rng.random())), + ("f2".into(), Value::Long(rng.random())), + ("f3".into(), Value::String(format!("name-{i}"))), + ("f4".into(), Value::Double(rng.random())), + ]) + }), + ) +} + +const SKIP_READER_SCHEMA: &str = r#" { "type":"record", "name":"table", @@ -175,11 +190,42 @@ const DOUBLE_SCHEMA: &str = r#" } "#; -fn new_decoder(schema_json: &'static str, batch_size: usize) -> Decoder { +const MIX_SCHEMA: &str = r#" + { + "type":"record", + "name":"Mix", + "fields": [ + { "name": "f1", "type": "int" }, + { "name": "f2", "type": "long" }, + { "name": "f3", "type": "string" }, + { "name": "f4", "type": "double" } + ] + } + "#; + +// Project the record type writen to MIX_SCHEMA: +// skip "f2" and "f4", add "f5" with a default +const PROJECT_READER_SCHEMA: &str = r#" + { + "type":"record", + "name":"Mix", + "fields": [ + { "name": "f1", "type": "int" }, + { "name": "f3", "type": "string" }, + { "name": "f5", "type": "long", "default": 0 } + ] + } + "#; + +fn new_decoder( + schema_json: &'static str, + reader_schema_json: &'static str, + batch_size: usize, +) -> Decoder { let schema = AvroSchema::new(schema_json.to_owned()); let mut store = SchemaStore::new(); store.register(schema).unwrap(); - let reader_schema = AvroSchema::new(READER_SCHEMA.to_owned()); + let reader_schema = AvroSchema::new(reader_schema_json.to_owned()); ReaderBuilder::new() .with_writer_schema_store(store) .with_batch_size(batch_size) @@ -215,19 +261,24 @@ fn bench_with_decoder( fn criterion_benches(c: &mut Criterion) { let data = gen_avro_data_with(INT_SCHEMA, NUM_ROWS, gen_int); bench_with_decoder(c, "skip_int", &data, NUM_ROWS, || { - new_decoder(INT_SCHEMA, BATCH_SIZE) + new_decoder(INT_SCHEMA, SKIP_READER_SCHEMA, BATCH_SIZE) }); let data = gen_avro_data_with(LONG_SCHEMA, NUM_ROWS, gen_long); bench_with_decoder(c, "skip_long", &data, NUM_ROWS, || { - new_decoder(LONG_SCHEMA, BATCH_SIZE) + new_decoder(LONG_SCHEMA, SKIP_READER_SCHEMA, BATCH_SIZE) }); let data = gen_avro_data_with(FLOAT_SCHEMA, NUM_ROWS, gen_float); bench_with_decoder(c, "skip_float", &data, NUM_ROWS, || { - new_decoder(FLOAT_SCHEMA, BATCH_SIZE) + new_decoder(FLOAT_SCHEMA, SKIP_READER_SCHEMA, BATCH_SIZE) }); let data = gen_avro_data_with(DOUBLE_SCHEMA, NUM_ROWS, gen_double); bench_with_decoder(c, "skip_double", &data, NUM_ROWS, || { - new_decoder(DOUBLE_SCHEMA, BATCH_SIZE) + new_decoder(DOUBLE_SCHEMA, SKIP_READER_SCHEMA, BATCH_SIZE) + }); + + let data = gen_avro_data_with(MIX_SCHEMA, NUM_ROWS, gen_mixed); + bench_with_decoder(c, "project_primitives", &data, NUM_ROWS, || { + new_decoder(MIX_SCHEMA, PROJECT_READER_SCHEMA, BATCH_SIZE) }); } diff --git a/arrow-avro/src/codec.rs b/arrow-avro/src/codec.rs index d20a71425d3e..fc2a914d3514 100644 --- a/arrow-avro/src/codec.rs +++ b/arrow-avro/src/codec.rs @@ -84,14 +84,20 @@ pub(crate) enum AvroLiteral { /// Contains the necessary information to resolve a writer's record against a reader's record schema. #[derive(Debug, Clone, PartialEq)] pub(crate) struct ResolvedRecord { - /// Maps a writer's field index to the corresponding reader's field index. - /// `None` if the writer's field is not present in the reader's schema. - pub(crate) writer_to_reader: Arc<[Option]>, + /// Maps a writer's field index to the field's resolution against the reader's schema. + pub(crate) writer_fields: Arc<[ResolvedField]>, /// A list of indices in the reader's schema for fields that have a default value. pub(crate) default_fields: Arc<[usize]>, +} + +/// Resolution information for record fields in the writer schema. +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum ResolvedField { + /// Resolves to a field indexed in the reader schema. + ToReader(usize), /// For fields present in the writer's schema but not the reader's, this stores their data type. /// This is needed to correctly skip over these fields during deserialization. - pub(crate) skip_fields: Arc<[Option]>, + Skip(AvroDataType), } /// Defines the type of promotion to be applied during schema resolution. @@ -2281,24 +2287,27 @@ impl<'a> Maker<'a> { data_type: dt, }); } - // Build skip_fields in writer order; pre-size and push. - let mut skip_fields: Vec> = - Vec::with_capacity(writer_record.fields.len()); - for (writer_index, writer_field) in writer_record.fields.iter().enumerate() { - if writer_to_reader[writer_index].is_some() { - skip_fields.push(None); - } else { - skip_fields.push(Some(self.parse_type(&writer_field.r#type, writer_ns)?)); - } - } + // Build writer field map. + let writer_fields = writer_record + .fields + .iter() + .enumerate() + .map(|(writer_index, writer_field)| { + if let Some(reader_index) = writer_to_reader[writer_index] { + Ok(ResolvedField::ToReader(reader_index)) + } else { + let dt = self.parse_type(&writer_field.r#type, writer_ns)?; + Ok(ResolvedField::Skip(dt)) + } + }) + .collect::>()?; let resolved = AvroDataType::new_with_resolution( Codec::Struct(Arc::from(reader_fields)), reader_md, None, Some(ResolutionInfo::Record(ResolvedRecord { - writer_to_reader: Arc::from(writer_to_reader), + writer_fields, default_fields: Arc::from(default_fields), - skip_fields: Arc::from(skip_fields), })), ); // Register a resolved record by reader name+namespace for potential named type refs. @@ -2792,16 +2801,13 @@ mod tests { }; match resolution { ResolutionInfo::Record(ResolvedRecord { - writer_to_reader, + writer_fields, default_fields, - skip_fields, }) => { - assert_eq!(writer_to_reader.len(), 1); - assert_eq!(writer_to_reader[0], Some(0)); + assert_eq!(writer_fields.len(), 1); + assert_eq!(writer_fields[0], ResolvedField::ToReader(0)); assert_eq!(default_fields.len(), 1); assert_eq!(default_fields[0], 1); - assert_eq!(skip_fields.len(), 1); - assert_eq!(skip_fields[0], None); } other => panic!("unexpected resolution {other:?}"), } @@ -2888,16 +2894,13 @@ mod tests { }; match resolution { ResolutionInfo::Record(ResolvedRecord { - writer_to_reader, + writer_fields, default_fields, - skip_fields, }) => { - assert_eq!(writer_to_reader.len(), 1); - assert_eq!(writer_to_reader[0], Some(0)); + assert_eq!(writer_fields.len(), 1); + assert_eq!(writer_fields[0], ResolvedField::ToReader(0)); assert_eq!(default_fields.len(), 1); assert_eq!(default_fields[0], 1); - assert_eq!(skip_fields.len(), 1); - assert_eq!(skip_fields[0], None); } other => panic!("unexpected resolution {other:?}"), } @@ -3714,11 +3717,18 @@ mod tests { Some(ResolutionInfo::Record(ref r)) => r.clone(), other => panic!("expected record resolution, got {other:?}"), }; - assert_eq!(rec.writer_to_reader.as_ref(), &[Some(1), None, Some(0)]); + assert!(matches!( + &rec.writer_fields[..], + &[ + ResolvedField::ToReader(1), + ResolvedField::Skip(_), + ResolvedField::ToReader(0), + ] + )); assert_eq!(rec.default_fields.as_ref(), &[2usize, 3usize]); - assert!(rec.skip_fields[0].is_none()); - assert!(rec.skip_fields[2].is_none()); - let skip1 = rec.skip_fields[1].as_ref().expect("skip field present"); + let ResolvedField::Skip(skip1) = &rec.writer_fields[1] else { + panic!("should skip field 1") + }; assert!(matches!(skip1.codec(), Codec::Utf8)); let name_md = &fields[2].data_type().metadata; assert_eq!( diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs index 5e281d1fc6f6..605d29697392 100644 --- a/arrow-avro/src/reader/record.rs +++ b/arrow-avro/src/reader/record.rs @@ -18,8 +18,8 @@ //! Avro Decoder for Arrow types. use crate::codec::{ - AvroDataType, AvroLiteral, Codec, EnumMapping, Promotion, ResolutionInfo, ResolvedRecord, - ResolvedUnion, + AvroDataType, AvroLiteral, Codec, EnumMapping, Promotion, ResolutionInfo, ResolvedField, + ResolvedRecord, ResolvedUnion, }; use crate::errors::AvroError; use crate::reader::cursor::AvroCursor; @@ -2416,11 +2416,16 @@ fn values_equal_at(arr: &dyn Array, i: usize, j: usize) -> bool { #[derive(Debug)] struct Projector { - writer_to_reader: Arc<[Option]>, - skip_decoders: Vec>, + writer_projections: Vec, default_injections: Arc<[(usize, AvroLiteral)]>, } +#[derive(Debug)] +enum FieldProjection { + ToReader(usize), + Skip(Skipper), +} + #[derive(Debug)] struct ProjectorBuilder<'a> { rec: &'a ResolvedRecord, @@ -2448,18 +2453,20 @@ impl<'a> ProjectorBuilder<'a> { .unwrap_or(AvroLiteral::Null); default_injections.push((idx, lit)); } - let mut skip_decoders: Vec> = - Vec::with_capacity(self.rec.skip_fields.len()); - for datatype in self.rec.skip_fields.as_ref() { - let skipper = match datatype { - Some(datatype) => Some(Skipper::from_avro(datatype)?), - None => None, - }; - skip_decoders.push(skipper); - } + let writer_projections = self + .rec + .writer_fields + .iter() + .map(|field| match field { + ResolvedField::ToReader(index) => Ok(FieldProjection::ToReader(*index)), + ResolvedField::Skip(datatype) => { + let skipper = Skipper::from_avro(datatype)?; + Ok(FieldProjection::Skip(skipper)) + } + }) + .collect::>()?; Ok(Projector { - writer_to_reader: self.rec.writer_to_reader.clone(), - skip_decoders, + writer_projections, default_injections: default_injections.into(), }) } @@ -2472,25 +2479,10 @@ impl Projector { buf: &mut AvroCursor<'_>, encodings: &mut [Decoder], ) -> Result<(), AvroError> { - debug_assert_eq!( - self.writer_to_reader.len(), - self.skip_decoders.len(), - "internal invariant: mapping and skipper lists must have equal length" - ); - for (i, (mapping, skipper_opt)) in self - .writer_to_reader - .iter() - .zip(self.skip_decoders.iter()) - .enumerate() - { - match (mapping, skipper_opt.as_ref()) { - (Some(reader_index), _) => encodings[*reader_index].decode(buf)?, - (None, Some(skipper)) => skipper.skip(buf)?, - (None, None) => { - return Err(AvroError::SchemaError(format!( - "No skipper available for writer-only field at index {i}", - ))); - } + for field_proj in self.writer_projections.iter() { + match field_proj { + FieldProjection::ToReader(index) => encodings[*index].decode(buf)?, + FieldProjection::Skip(skipper) => skipper.skip(buf)?, } } for (reader_index, lit) in self.default_injections.as_ref() { @@ -4128,8 +4120,7 @@ mod tests { fn make_record_resolved_decoder( reader_fields: &[(&str, DataType, bool)], - writer_to_reader: Vec>, - skip_decoders: Vec>, + writer_projections: Vec, ) -> Decoder { let mut field_refs: Vec = Vec::with_capacity(reader_fields.len()); let mut encodings: Vec = Vec::with_capacity(reader_fields.len()); @@ -4151,8 +4142,7 @@ mod tests { encodings, vec![None; reader_fields.len()], Some(Projector { - writer_to_reader: Arc::from(writer_to_reader), - skip_decoders, + writer_projections, default_injections: Arc::from(Vec::<(usize, AvroLiteral)>::new()), }), ) @@ -4162,8 +4152,10 @@ mod tests { fn test_skip_writer_trailing_field_int32() { let mut dec = make_record_resolved_decoder( &[("id", arrow_schema::DataType::Int32, false)], - vec![Some(0), None], - vec![None, Some(super::Skipper::Int32)], + vec![ + FieldProjection::ToReader(0), + FieldProjection::Skip(super::Skipper::Int32), + ], ); let mut data = Vec::new(); data.extend_from_slice(&encode_avro_int(7)); @@ -4190,8 +4182,11 @@ mod tests { ("id", DataType::Int32, false), ("score", DataType::Int64, false), ], - vec![Some(0), None, Some(1)], - vec![None, Some(Skipper::String), None], + vec![ + FieldProjection::ToReader(0), + FieldProjection::Skip(Skipper::String), + FieldProjection::ToReader(1), + ], ); let mut data = Vec::new(); data.extend_from_slice(&encode_avro_int(42)); @@ -4222,8 +4217,10 @@ mod tests { fn test_skip_writer_array_with_negative_block_count_fast() { let mut dec = make_record_resolved_decoder( &[("id", DataType::Int32, false)], - vec![None, Some(0)], - vec![Some(super::Skipper::List(Box::new(Skipper::Int32))), None], + vec![ + FieldProjection::Skip(super::Skipper::List(Box::new(Skipper::Int32))), + FieldProjection::ToReader(0), + ], ); let mut array_payload = Vec::new(); array_payload.extend_from_slice(&encode_avro_int(1)); @@ -4254,8 +4251,10 @@ mod tests { fn test_skip_writer_map_with_negative_block_count_fast() { let mut dec = make_record_resolved_decoder( &[("id", DataType::Int32, false)], - vec![None, Some(0)], - vec![Some(Skipper::Map(Box::new(Skipper::Int32))), None], + vec![ + FieldProjection::Skip(Skipper::Map(Box::new(Skipper::Int32))), + FieldProjection::ToReader(0), + ], ); let mut entries = Vec::new(); entries.extend_from_slice(&encode_avro_bytes(b"k1")); @@ -4287,13 +4286,12 @@ mod tests { fn test_skip_writer_nullable_field_union_nullfirst() { let mut dec = make_record_resolved_decoder( &[("id", DataType::Int32, false)], - vec![None, Some(0)], vec![ - Some(super::Skipper::Nullable( + FieldProjection::Skip(super::Skipper::Nullable( Nullability::NullFirst, Box::new(super::Skipper::Int32), )), - None, + FieldProjection::ToReader(0), ], ); let mut row1 = Vec::new(); @@ -4503,7 +4501,6 @@ mod tests { reader_fields: &[(&str, DataType, bool)], field_defaults: Vec>, default_injections: Vec<(usize, AvroLiteral)>, - writer_to_reader_len: usize, ) -> Decoder { assert_eq!( field_defaults.len(), @@ -4526,11 +4523,8 @@ mod tests { encodings.push(enc); } let fields: Fields = field_refs.into(); - let skip_decoders: Vec> = - (0..writer_to_reader_len).map(|_| None::).collect(); let projector = Projector { - writer_to_reader: Arc::from(vec![None; writer_to_reader_len]), - skip_decoders, + writer_projections: vec![], default_injections: Arc::from(default_injections), }; Decoder::Record(fields, encodings, field_defaults, Some(projector)) @@ -4979,7 +4973,6 @@ mod tests { &[("a", DataType::Int32, false), ("b", DataType::Utf8, false)], field_defaults, vec![], - 0, ); let mut map: IndexMap = IndexMap::new(); map.insert("a".to_string(), AvroLiteral::Int(7)); @@ -5012,7 +5005,6 @@ mod tests { &[("a", DataType::Int32, false), ("b", DataType::Utf8, false)], field_defaults, vec![], - 0, ); rec.append_default(&AvroLiteral::Null).unwrap(); let arr = rec.flush(None).unwrap(); @@ -5065,8 +5057,7 @@ mod tests { encoders.push(enc_b); let field_defaults = vec![None, None]; // no defaults -> append_null let projector = Projector { - writer_to_reader: Arc::from(vec![]), - skip_decoders: vec![], + writer_projections: vec![], default_injections: Arc::from(Vec::<(usize, AvroLiteral)>::new()), }; let mut rec = Decoder::Record(field_refs.into(), encoders, field_defaults, Some(projector)); @@ -5106,7 +5097,6 @@ mod tests { ], defaults, injections, - 0, ); rec.decode(&mut AvroCursor::new(&[])).unwrap(); let arr = rec.flush(None).unwrap(); From 5ba451531efd2e98de38f6a8443aad605b6b5cc5 Mon Sep 17 00:00:00 2001 From: Bruno Date: Thu, 5 Mar 2026 04:44:44 +0100 Subject: [PATCH 18/80] Simplify downcast_...!() macro definitions (#9454) 1. Reduce some quantifiers from `*` to `?` when 2+ occurrences would generate invalid Rust code. `$(if $pred:expr)*` 2. Clean up 4-armed recursive macros: * put the base case first * explain the fixups * fix all at once, going directly to the base case, instead of possibly multiple hoops The inital motivation was getting rust-analyzer to stop choking on such macros usage where the left-hand side was a tuple and the right-hand-side an expr. --- arrow-array/src/cast.rs | 107 ++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 49 deletions(-) diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs index de590ff87c77..d6cc242e0267 100644 --- a/arrow-array/src/cast.rs +++ b/arrow-array/src/cast.rs @@ -74,7 +74,7 @@ macro_rules! repeat_pat { /// [`DataType`]: arrow_schema::DataType #[macro_export] macro_rules! downcast_integer { - ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { + ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { match ($($data_type),+) { $crate::repeat_pat!($crate::cast::__private::DataType::Int8, $($data_type),+) => { $m!($crate::types::Int8Type $(, $args)*) @@ -100,7 +100,7 @@ macro_rules! downcast_integer { $crate::repeat_pat!($crate::cast::__private::DataType::UInt64, $($data_type),+) => { $m!($crate::types::UInt64Type $(, $args)*) } - $($p $(if $pred)* => $fallback,)* + $($p $(if $pred)? => $fallback,)* } }; } @@ -138,21 +138,24 @@ macro_rules! downcast_integer { /// [`DataType`]: arrow_schema::DataType #[macro_export] macro_rules! downcast_integer_array { - ($values:ident => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { - $crate::downcast_integer_array!($values => {$e} $($p $(if $pred)* => $fallback)*) - }; - (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { - $crate::downcast_integer_array!($($values),+ => {$e} $($p $(if $pred)* => $fallback)*) - }; - ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { - $crate::downcast_integer_array!(($($values),+) => $e $($p $(if $pred)* => $fallback)*) - }; - (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { + ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { $crate::downcast_integer!{ $($values.data_type()),+ => ($crate::downcast_primitive_array_helper, $($values),+, $e), - $($p $(if $pred)* => $fallback,)* + $($p $(if $pred)? => $fallback,)* } }; + // Turn $e into a block. + ($values:ident => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { + $crate::downcast_integer_array!($values => {$e} $($p $(if $pred)? => $fallback,)*) + }; + // Remove $values parentheses. + (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { + $crate::downcast_integer_array!($($values),+ => $e $($p $(if $pred)? => $fallback,)*) + }; + // Turn $e into a block & remove $values parentheses. + (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { + $crate::downcast_integer_array!($($values),+ => {$e} $($p $(if $pred)? => $fallback,)*) + }; } /// Given one or more expressions evaluating to an integer [`DataType`] invokes the provided macro @@ -189,7 +192,7 @@ macro_rules! downcast_integer_array { /// [`DataType`]: arrow_schema::DataType #[macro_export] macro_rules! downcast_run_end_index { - ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { + ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { match ($($data_type),+) { $crate::repeat_pat!($crate::cast::__private::DataType::Int16, $($data_type),+) => { $m!($crate::types::Int16Type $(, $args)*) @@ -200,7 +203,7 @@ macro_rules! downcast_run_end_index { $crate::repeat_pat!($crate::cast::__private::DataType::Int64, $($data_type),+) => { $m!($crate::types::Int64Type $(, $args)*) } - $($p $(if $pred)* => $fallback,)* + $($p $(if $pred)? => $fallback,)* } }; } @@ -234,7 +237,7 @@ macro_rules! downcast_run_end_index { /// [`DataType`]: arrow_schema::DataType #[macro_export] macro_rules! downcast_temporal { - ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { + ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { match ($($data_type),+) { $crate::repeat_pat!($crate::cast::__private::DataType::Time32($crate::cast::__private::TimeUnit::Second), $($data_type),+) => { $m!($crate::types::Time32SecondType $(, $args)*) @@ -266,7 +269,7 @@ macro_rules! downcast_temporal { $crate::repeat_pat!($crate::cast::__private::DataType::Timestamp($crate::cast::__private::TimeUnit::Nanosecond, _), $($data_type),+) => { $m!($crate::types::TimestampNanosecondType $(, $args)*) } - $($p $(if $pred)* => $fallback,)* + $($p $(if $pred)? => $fallback,)* } }; } @@ -304,21 +307,24 @@ macro_rules! downcast_temporal { /// [`DataType`]: arrow_schema::DataType #[macro_export] macro_rules! downcast_temporal_array { - ($values:ident => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { - $crate::downcast_temporal_array!($values => {$e} $($p $(if $pred)* => $fallback)*) - }; - (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { - $crate::downcast_temporal_array!($($values),+ => {$e} $($p $(if $pred)* => $fallback)*) - }; - ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { - $crate::downcast_temporal_array!(($($values),+) => $e $($p $(if $pred)* => $fallback)*) - }; - (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { + ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { $crate::downcast_temporal!{ $($values.data_type()),+ => ($crate::downcast_primitive_array_helper, $($values),+, $e), - $($p $(if $pred)* => $fallback,)* + $($p $(if $pred)? => $fallback,)* } }; + // Turn $e into a block. + ($values:ident => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { + $crate::downcast_temporal_array!($values => {$e} $($p $(if $pred)? => $fallback,)*) + }; + // Remove $values parentheses. + (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { + $crate::downcast_temporal_array!($($values),+ => $e $($p $(if $pred)? => $fallback,)*) + }; + // Turn $e into a block & remove $values parentheses. + (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { + $crate::downcast_temporal_array!($($values),+ => {$e} $($p $(if $pred)? => $fallback,)*) + }; } /// Given one or more expressions evaluating to primitive [`DataType`] invokes the provided macro @@ -353,7 +359,7 @@ macro_rules! downcast_temporal_array { /// [`DataType`]: arrow_schema::DataType #[macro_export] macro_rules! downcast_primitive { - ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { + ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { $crate::downcast_integer! { $($data_type),+ => ($m $(, $args)*), $crate::repeat_pat!($crate::cast::__private::DataType::Float16, $($data_type),+) => { @@ -401,7 +407,7 @@ macro_rules! downcast_primitive { _ => { $crate::downcast_temporal! { $($data_type),+ => ($m $(, $args)*), - $($p $(if $pred)* => $fallback,)* + $($p $(if $pred)? => $fallback,)* } } } @@ -450,21 +456,24 @@ macro_rules! downcast_primitive_array_helper { /// [`DataType`]: arrow_schema::DataType #[macro_export] macro_rules! downcast_primitive_array { - ($values:ident => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { - $crate::downcast_primitive_array!($values => {$e} $($p $(if $pred)* => $fallback)*) - }; - (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { - $crate::downcast_primitive_array!($($values),+ => {$e} $($p $(if $pred)* => $fallback)*) - }; - ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { - $crate::downcast_primitive_array!(($($values),+) => $e $($p $(if $pred)* => $fallback)*) - }; - (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { + ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { $crate::downcast_primitive!{ $($values.data_type()),+ => ($crate::downcast_primitive_array_helper, $($values),+, $e), - $($p $(if $pred)* => $fallback,)* + $($p $(if $pred)? => $fallback,)* } }; + // Turn $e into a block. + ($values:ident => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { + $crate::downcast_primitive_array!($values => {$e} $($p $(if $pred)? => $fallback,)*) + }; + // Remove $values parentheses. + (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { + $crate::downcast_primitive_array!($($values),+ => $e $($p $(if $pred)? => $fallback,)*) + }; + // Turn $e into a block & remove $values parentheses. + (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { + $crate::downcast_primitive_array!($($values),+ => {$e} $($p $(if $pred)? => $fallback,)*) + }; } /// Force downcast of an [`Array`], such as an [`ArrayRef`], to @@ -546,11 +555,11 @@ macro_rules! downcast_dictionary_array_helper { /// [`DataType`]: arrow_schema::DataType #[macro_export] macro_rules! downcast_dictionary_array { - ($values:ident => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { - downcast_dictionary_array!($values => {$e} $($p $(if $pred)* => $fallback)*) + ($values:ident => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { + downcast_dictionary_array!($values => {$e} $($p $(if $pred)? => $fallback,)*) }; - ($values:ident => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { + ($values:ident => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { match $values.data_type() { $crate::cast::__private::DataType::Dictionary(k, _) => { $crate::downcast_integer! { @@ -558,7 +567,7 @@ macro_rules! downcast_dictionary_array { k => unreachable!("unsupported dictionary key type: {}", k) } } - $($p $(if $pred)* => $fallback,)* + $($p $(if $pred)? => $fallback,)* } } } @@ -654,11 +663,11 @@ macro_rules! downcast_run_array_helper { /// [`DataType`]: arrow_schema::DataType #[macro_export] macro_rules! downcast_run_array { - ($values:ident => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { - downcast_run_array!($values => {$e} $($p $(if $pred)* => $fallback)*) + ($values:ident => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { + downcast_run_array!($values => {$e} $($p $(if $pred)? => $fallback,)*) }; - ($values:ident => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => { + ($values:ident => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => { match $values.data_type() { $crate::cast::__private::DataType::RunEndEncoded(k, _) => { $crate::downcast_run_end_index! { @@ -666,7 +675,7 @@ macro_rules! downcast_run_array { k => unreachable!("unsupported run end index type: {}", k) } } - $($p $(if $pred)* => $fallback,)* + $($p $(if $pred)? => $fallback,)* } } } From 097c2038971b9306f8a9c3c767f64d1794e2eb2f Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Sat, 7 Mar 2026 12:48:33 -0800 Subject: [PATCH 19/80] Add some benchmarks for decoding delta encoded Parquet (#9500) # Which issue does this PR close? - Part of #9476. # Rationale for this change Add benchmarks to show benefit of the optimizations in #9477 # What changes are included in this PR? Adds some benches for DELTA_BINARY_PACKED, DELTA_BYTE_ARRAY, and DELTA_LENGTH_BYTE_ARRAY. The generated data is meant to show the benefit of special casing for miniblocks with a bitwidth of 0. # Are these changes tested? Just benches # Are there any user-facing changes? No --- parquet/benches/arrow_reader.rs | 251 ++++++++++++++++++++++++++++++++ 1 file changed, 251 insertions(+) diff --git a/parquet/benches/arrow_reader.rs b/parquet/benches/arrow_reader.rs index 2ea0706e3517..14fa16b3531e 100644 --- a/parquet/benches/arrow_reader.rs +++ b/parquet/benches/arrow_reader.rs @@ -326,6 +326,58 @@ where InMemoryPageIterator::new(pages) } +fn build_delta_encoded_incr_primitive_page_iterator( + column_desc: ColumnDescPtr, + null_density: f32, + increment: usize, + stepped: bool, +) -> impl PageIterator + Clone +where + T: parquet::data_type::DataType, + T::T: SampleUniform + FromPrimitive, +{ + let max_def_level = column_desc.max_def_level(); + let max_rep_level = column_desc.max_rep_level(); + let rep_levels = vec![0; VALUES_PER_PAGE]; + let mut rng = seedable_rng(); + let mut pages: Vec> = Vec::new(); + let mut running_val: usize = 1; + for _i in 0..NUM_ROW_GROUPS { + let mut column_chunk_pages = Vec::new(); + for _j in 0..PAGES_PER_GROUP { + // generate page + let mut values = Vec::with_capacity(VALUES_PER_PAGE); + let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); + for k in 0..VALUES_PER_PAGE { + let def_level = if rng.random::() < null_density { + max_def_level - 1 + } else { + max_def_level + }; + if def_level == max_def_level { + let value = FromPrimitive::from_usize(running_val).unwrap(); + running_val = if !stepped || k % 2 == 1 { + running_val + increment + } else { + running_val + }; + values.push(value); + } + def_levels.push(def_level); + } + let mut page_builder = + DataPageBuilderImpl::new(column_desc.clone(), values.len() as u32, true); + page_builder.add_rep_levels(max_rep_level, &rep_levels); + page_builder.add_def_levels(max_def_level, &def_levels); + page_builder.add_values::(Encoding::DELTA_BINARY_PACKED, &values); + column_chunk_pages.push(page_builder.consume()); + } + pages.push(column_chunk_pages); + } + + InMemoryPageIterator::new(pages) +} + fn build_dictionary_encoded_primitive_page_iterator( column_desc: ColumnDescPtr, null_density: f32, @@ -439,6 +491,52 @@ fn build_plain_encoded_byte_array_page_iterator_inner( InMemoryPageIterator::new(pages) } +fn build_constant_prefix_byte_array_page_iterator( + column_desc: ColumnDescPtr, + null_density: f32, + encoding: Encoding, + const_string: bool, +) -> impl PageIterator + Clone { + let max_def_level = column_desc.max_def_level(); + let max_rep_level = column_desc.max_rep_level(); + let rep_levels = vec![0; VALUES_PER_PAGE]; + let mut rng = seedable_rng(); + let mut pages: Vec> = Vec::new(); + for i in 0..NUM_ROW_GROUPS { + let mut column_chunk_pages = Vec::new(); + for j in 0..PAGES_PER_GROUP { + // generate page + let mut values = Vec::with_capacity(VALUES_PER_PAGE); + let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); + for k in 0..VALUES_PER_PAGE { + let def_level = if rng.random::() < null_density { + max_def_level - 1 + } else { + max_def_level + }; + if def_level == max_def_level { + let string_value = if const_string { + "01234567890123456789012345678901".to_string() + } else { + format!("01234567890123456789012345678901:{:x}{j}{i}", (k % 16)) + }; + values.push(parquet::data_type::ByteArray::from(string_value.as_str())); + } + def_levels.push(def_level); + } + let mut page_builder = + DataPageBuilderImpl::new(column_desc.clone(), values.len() as u32, true); + page_builder.add_rep_levels(max_rep_level, &rep_levels); + page_builder.add_def_levels(max_def_level, &def_levels); + page_builder.add_values::(encoding, &values); + column_chunk_pages.push(page_builder.consume()); + } + pages.push(column_chunk_pages); + } + + InMemoryPageIterator::new(pages) +} + fn build_plain_encoded_byte_array_page_iterator( column_desc: ColumnDescPtr, null_density: f32, @@ -1094,6 +1192,99 @@ fn bench_primitive( assert_eq!(count, EXPECTED_VALUE_COUNT); }); + // binary packed same value + let data = build_delta_encoded_incr_primitive_page_iterator::( + mandatory_column_desc.clone(), + 0.0, + 0, + false, + ); + group.bench_function("binary packed single value", |b| { + b.iter(|| { + let array_reader = + create_primitive_array_reader(data.clone(), mandatory_column_desc.clone()); + count = bench_array_reader(array_reader); + }); + assert_eq!(count, EXPECTED_VALUE_COUNT); + }); + + let data = build_delta_encoded_incr_primitive_page_iterator::( + mandatory_column_desc.clone(), + 0.0, + 0, + false, + ); + group.bench_function("binary packed skip single value", |b| { + b.iter(|| { + let array_reader = + create_primitive_array_reader(data.clone(), mandatory_column_desc.clone()); + count = bench_array_reader_skip(array_reader); + }); + assert_eq!(count, EXPECTED_VALUE_COUNT); + }); + + // binary packed monotonically increasing + let data = build_delta_encoded_incr_primitive_page_iterator::( + mandatory_column_desc.clone(), + 0.0, + 1, + false, + ); + group.bench_function("binary packed increasing value", |b| { + b.iter(|| { + let array_reader = + create_primitive_array_reader(data.clone(), mandatory_column_desc.clone()); + count = bench_array_reader(array_reader); + }); + assert_eq!(count, EXPECTED_VALUE_COUNT); + }); + + let data = build_delta_encoded_incr_primitive_page_iterator::( + mandatory_column_desc.clone(), + 0.0, + 1, + false, + ); + group.bench_function("binary packed skip increasing value", |b| { + b.iter(|| { + let array_reader = + create_primitive_array_reader(data.clone(), mandatory_column_desc.clone()); + count = bench_array_reader_skip(array_reader); + }); + assert_eq!(count, EXPECTED_VALUE_COUNT); + }); + + // binary packed increasing stepped + let data = build_delta_encoded_incr_primitive_page_iterator::( + mandatory_column_desc.clone(), + 0.0, + 1, + true, + ); + group.bench_function("binary packed stepped increasing value", |b| { + b.iter(|| { + let array_reader = + create_primitive_array_reader(data.clone(), mandatory_column_desc.clone()); + count = bench_array_reader(array_reader); + }); + assert_eq!(count, EXPECTED_VALUE_COUNT); + }); + + let data = build_delta_encoded_incr_primitive_page_iterator::( + mandatory_column_desc.clone(), + 0.0, + 1, + true, + ); + group.bench_function("binary packed skip stepped increasing value", |b| { + b.iter(|| { + let array_reader = + create_primitive_array_reader(data.clone(), mandatory_column_desc.clone()); + count = bench_array_reader_skip(array_reader); + }); + assert_eq!(count, EXPECTED_VALUE_COUNT); + }); + // dictionary encoded, no NULLs let data = build_dictionary_encoded_primitive_page_iterator::(mandatory_column_desc.clone(), 0.0); @@ -1594,6 +1785,66 @@ fn add_benches(c: &mut Criterion) { assert_eq!(count, EXPECTED_VALUE_COUNT); }); + // delta byte array with constant prefix and suffix lengths + let delta_string_const_prefix_no_null_data = build_constant_prefix_byte_array_page_iterator( + mandatory_string_column_desc.clone(), + 0.0, + Encoding::DELTA_BYTE_ARRAY, + false, + ); + group.bench_function( + "const prefix delta byte array encoded, mandatory, no NULLs", + |b| { + b.iter(|| { + let array_reader = create_byte_array_reader( + delta_string_const_prefix_no_null_data.clone(), + mandatory_string_column_desc.clone(), + ); + count = bench_array_reader(array_reader); + }); + assert_eq!(count, EXPECTED_VALUE_COUNT); + }, + ); + + // delta byte array with constant prefix and no suffix + let delta_string_const_no_null_data = build_constant_prefix_byte_array_page_iterator( + mandatory_string_column_desc.clone(), + 0.0, + Encoding::DELTA_BYTE_ARRAY, + true, + ); + group.bench_function("const delta byte array encoded, mandatory, no NULLs", |b| { + b.iter(|| { + let array_reader = create_byte_array_reader( + delta_string_const_no_null_data.clone(), + mandatory_string_column_desc.clone(), + ); + count = bench_array_reader(array_reader); + }); + assert_eq!(count, EXPECTED_VALUE_COUNT); + }); + + // delta length byte array with constant lengths + let delta_string_const_no_null_data = build_constant_prefix_byte_array_page_iterator( + mandatory_string_column_desc.clone(), + 0.0, + Encoding::DELTA_LENGTH_BYTE_ARRAY, + true, + ); + group.bench_function( + "const delta length byte array encoded, mandatory, no NULLs", + |b| { + b.iter(|| { + let array_reader = create_byte_array_reader( + delta_string_const_no_null_data.clone(), + mandatory_string_column_desc.clone(), + ); + count = bench_array_reader(array_reader); + }); + assert_eq!(count, EXPECTED_VALUE_COUNT); + }, + ); + group.finish(); // binary benchmarks From fec3c021e85f34723250c413891f580657a1eb4f Mon Sep 17 00:00:00 2001 From: Tim-53 <82676248+Tim-53@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:45:16 +0100 Subject: [PATCH 20/80] fix: remove incorrect debug assertion in BatchCoalescer (#9508) # Which issue does this PR close? - Closes https://github.com/apache/arrow-rs/issues/9506 # Rationale for this change `Vec::reserve(n)` does not guarantee exact capacity, Rust's `MIN_NON_ZERO_CAP` optimization means `reserve(2)` gives capacity = 4 for most numeric types, causing `debug_assert_eq!(capacity, batch_size)` to panic in debug mode when `batch_size < 4`. # What changes are included in this PR? Replace `reserve` with `reserve_exact` in `ensure_capacity` in both `InProgressPrimitiveArray` and `InProgressByteViewArray`. `reserve_exact` skips the amortized growth optimization and allocates exactly the requested capacity, making the assertion correct. # Are these changes tested? No. This only fixes an incorrect debug assertion. # Are there any user-facing changes? No --- arrow-select/src/coalesce/byte_view.rs | 1 - arrow-select/src/coalesce/primitive.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/arrow-select/src/coalesce/byte_view.rs b/arrow-select/src/coalesce/byte_view.rs index bca811fff1c6..6062cd5e77aa 100644 --- a/arrow-select/src/coalesce/byte_view.rs +++ b/arrow-select/src/coalesce/byte_view.rs @@ -101,7 +101,6 @@ impl InProgressByteViewArray { if self.views.capacity() == 0 { self.views.reserve(self.batch_size); } - debug_assert_eq!(self.views.capacity(), self.batch_size); } /// Finishes in progress buffer, if any diff --git a/arrow-select/src/coalesce/primitive.rs b/arrow-select/src/coalesce/primitive.rs index 69dad221bd52..a7f2fb32ce49 100644 --- a/arrow-select/src/coalesce/primitive.rs +++ b/arrow-select/src/coalesce/primitive.rs @@ -58,7 +58,6 @@ impl InProgressPrimitiveArray { if self.current.capacity() == 0 { self.current.reserve(self.batch_size); } - debug_assert_eq!(self.current.capacity(), self.batch_size); } } From edd2c8eef5a7b702947a25e3223539e3723d5aac Mon Sep 17 00:00:00 2001 From: Matthew Kim <38759997+friendlymatthew@users.noreply.github.com> Date: Mon, 9 Mar 2026 12:57:17 -0400 Subject: [PATCH 21/80] support large string for unshred variant (#9515) # Which issue does this PR close? - Closes https://github.com/apache/arrow-rs/issues/9513 # Rationale for this change `VariantArray::try_new` and `canonicalize_and_verify_data_type` both accept `LargeUtf8` as a valid shredded variant type. However unshred_variant currently only handles Utf8 for string typed_value columns This means a VariantArray with a LargeUtf8 typed_value column can be constructed successfully, but calling unshred_variant on it fails --- .../src/unshred_variant.rs | 44 ++++++++++++++++++- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/parquet-variant-compute/src/unshred_variant.rs b/parquet-variant-compute/src/unshred_variant.rs index 3600662915a5..0fba53b31539 100644 --- a/parquet-variant-compute/src/unshred_variant.rs +++ b/parquet-variant-compute/src/unshred_variant.rs @@ -20,8 +20,8 @@ use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder}; use arrow::array::{ Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, FixedSizeListArray, - GenericListArray, GenericListViewArray, ListLikeArray, PrimitiveArray, StringArray, - StructArray, + GenericListArray, GenericListViewArray, LargeStringArray, ListLikeArray, PrimitiveArray, + StringArray, StructArray, }; use arrow::buffer::NullBuffer; use arrow::datatypes::{ @@ -105,6 +105,7 @@ enum UnshredVariantRowBuilder<'a> { TimestampNanosecond(TimestampUnshredRowBuilder<'a, TimestampNanosecondType>), PrimitiveBoolean(UnshredPrimitiveRowBuilder<'a, BooleanArray>), PrimitiveString(UnshredPrimitiveRowBuilder<'a, StringArray>), + PrimitiveLargeString(UnshredPrimitiveRowBuilder<'a, LargeStringArray>), PrimitiveBinaryView(UnshredPrimitiveRowBuilder<'a, BinaryViewArray>), PrimitiveUuid(UnshredPrimitiveRowBuilder<'a, FixedSizeBinaryArray>), List(ListUnshredVariantBuilder<'a, GenericListArray>), @@ -146,6 +147,7 @@ impl<'a> UnshredVariantRowBuilder<'a> { Self::TimestampNanosecond(b) => b.append_row(builder, metadata, index), Self::PrimitiveBoolean(b) => b.append_row(builder, metadata, index), Self::PrimitiveString(b) => b.append_row(builder, metadata, index), + Self::PrimitiveLargeString(b) => b.append_row(builder, metadata, index), Self::PrimitiveBinaryView(b) => b.append_row(builder, metadata, index), Self::PrimitiveUuid(b) => b.append_row(builder, metadata, index), Self::List(b) => b.append_row(builder, metadata, index), @@ -226,6 +228,7 @@ impl<'a> UnshredVariantRowBuilder<'a> { } DataType::Boolean => primitive_builder!(PrimitiveBoolean, as_boolean), DataType::Utf8 => primitive_builder!(PrimitiveString, as_string), + DataType::LargeUtf8 => primitive_builder!(PrimitiveLargeString, as_string), DataType::BinaryView => primitive_builder!(PrimitiveBinaryView, as_binary_view), DataType::FixedSizeBinary(16) => { primitive_builder!(PrimitiveUuid, as_fixed_size_binary) @@ -405,6 +408,7 @@ macro_rules! impl_append_to_variant_builder { impl_append_to_variant_builder!(BooleanArray); impl_append_to_variant_builder!(StringArray); +impl_append_to_variant_builder!(LargeStringArray); impl_append_to_variant_builder!(BinaryViewArray); impl_append_to_variant_builder!(PrimitiveArray); impl_append_to_variant_builder!(PrimitiveArray); @@ -666,3 +670,39 @@ impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> { // TODO: This code is covered by tests in `parquet/tests/variant_integration.rs`. Does that suffice? // Or do we also need targeted stand-alone unit tests for full coverage? + +#[cfg(test)] +mod tests { + use crate::VariantArray; + use arrow::array::{BinaryViewArray, LargeStringArray}; + use parquet_variant::Variant; + + #[test] + fn test_unshred_largeutf8_typed_value() { + let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00]; + let metadata = + BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]); + + let typed_value: arrow::array::ArrayRef = std::sync::Arc::new( + LargeStringArray::from(vec![ + Some("hello"), + Some("middle"), + Some("world"), + ]), + ); + + let variant_array = VariantArray::from_parts( + metadata, + None, + Some(typed_value), + None, + ); + + let result = crate::unshred_variant(&variant_array).unwrap(); + + assert_eq!(result.len(), 3); + assert_eq!(result.value(0), Variant::from("hello")); + assert_eq!(result.value(1), Variant::from("middle")); + assert_eq!(result.value(2), Variant::from("world")); + } +} From 0b044835a8180100c89b60d856e9f67634b5d5e7 Mon Sep 17 00:00:00 2001 From: Matthew Kim <38759997+friendlymatthew@users.noreply.github.com> Date: Mon, 9 Mar 2026 14:41:30 -0400 Subject: [PATCH 22/80] support string view unshred variant (#9514) # Which issue does this PR close? - Closes https://github.com/apache/arrow-rs/issues/9512 # Rationale for this change You can build a Variant with a StringView type shredded out, but calling `unshred_variant` will fail with not yet implemented --- .../src/unshred_variant.rs | 51 ++++++++++++------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/parquet-variant-compute/src/unshred_variant.rs b/parquet-variant-compute/src/unshred_variant.rs index 0fba53b31539..cfe413460086 100644 --- a/parquet-variant-compute/src/unshred_variant.rs +++ b/parquet-variant-compute/src/unshred_variant.rs @@ -21,7 +21,7 @@ use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder}; use arrow::array::{ Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, FixedSizeListArray, GenericListArray, GenericListViewArray, LargeStringArray, ListLikeArray, PrimitiveArray, - StringArray, StructArray, + StringArray, StringViewArray, StructArray, }; use arrow::buffer::NullBuffer; use arrow::datatypes::{ @@ -105,6 +105,7 @@ enum UnshredVariantRowBuilder<'a> { TimestampNanosecond(TimestampUnshredRowBuilder<'a, TimestampNanosecondType>), PrimitiveBoolean(UnshredPrimitiveRowBuilder<'a, BooleanArray>), PrimitiveString(UnshredPrimitiveRowBuilder<'a, StringArray>), + PrimitiveStringView(UnshredPrimitiveRowBuilder<'a, StringViewArray>), PrimitiveLargeString(UnshredPrimitiveRowBuilder<'a, LargeStringArray>), PrimitiveBinaryView(UnshredPrimitiveRowBuilder<'a, BinaryViewArray>), PrimitiveUuid(UnshredPrimitiveRowBuilder<'a, FixedSizeBinaryArray>), @@ -147,6 +148,7 @@ impl<'a> UnshredVariantRowBuilder<'a> { Self::TimestampNanosecond(b) => b.append_row(builder, metadata, index), Self::PrimitiveBoolean(b) => b.append_row(builder, metadata, index), Self::PrimitiveString(b) => b.append_row(builder, metadata, index), + Self::PrimitiveStringView(b) => b.append_row(builder, metadata, index), Self::PrimitiveLargeString(b) => b.append_row(builder, metadata, index), Self::PrimitiveBinaryView(b) => b.append_row(builder, metadata, index), Self::PrimitiveUuid(b) => b.append_row(builder, metadata, index), @@ -228,6 +230,7 @@ impl<'a> UnshredVariantRowBuilder<'a> { } DataType::Boolean => primitive_builder!(PrimitiveBoolean, as_boolean), DataType::Utf8 => primitive_builder!(PrimitiveString, as_string), + DataType::Utf8View => primitive_builder!(PrimitiveStringView, as_string_view), DataType::LargeUtf8 => primitive_builder!(PrimitiveLargeString, as_string), DataType::BinaryView => primitive_builder!(PrimitiveBinaryView, as_binary_view), DataType::FixedSizeBinary(16) => { @@ -408,6 +411,7 @@ macro_rules! impl_append_to_variant_builder { impl_append_to_variant_builder!(BooleanArray); impl_append_to_variant_builder!(StringArray); +impl_append_to_variant_builder!(StringViewArray); impl_append_to_variant_builder!(LargeStringArray); impl_append_to_variant_builder!(BinaryViewArray); impl_append_to_variant_builder!(PrimitiveArray); @@ -668,35 +672,46 @@ impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> { } } -// TODO: This code is covered by tests in `parquet/tests/variant_integration.rs`. Does that suffice? -// Or do we also need targeted stand-alone unit tests for full coverage? - #[cfg(test)] mod tests { use crate::VariantArray; - use arrow::array::{BinaryViewArray, LargeStringArray}; + use arrow::array::{BinaryViewArray, LargeStringArray, StringViewArray}; use parquet_variant::Variant; + #[test] + fn test_unshred_utf8view_typed_value() { + let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00]; + let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]); + + let typed_value: arrow::array::ArrayRef = std::sync::Arc::new(StringViewArray::from(vec![ + Some("hello"), + Some("middle"), + Some("world"), + ])); + + let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None); + + let result = crate::unshred_variant(&variant_array).unwrap(); + + assert_eq!(result.len(), 3); + assert_eq!(result.value(0), Variant::from("hello")); + assert_eq!(result.value(1), Variant::from("middle")); + assert_eq!(result.value(2), Variant::from("world")); + } + #[test] fn test_unshred_largeutf8_typed_value() { let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00]; - let metadata = - BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]); + let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]); - let typed_value: arrow::array::ArrayRef = std::sync::Arc::new( - LargeStringArray::from(vec![ + let typed_value: arrow::array::ArrayRef = + std::sync::Arc::new(LargeStringArray::from(vec![ Some("hello"), Some("middle"), Some("world"), - ]), - ); - - let variant_array = VariantArray::from_parts( - metadata, - None, - Some(typed_value), - None, - ); + ])); + + let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None); let result = crate::unshred_variant(&variant_array).unwrap(); From d2e2cdafed93a8e0152fe1d018ec2cef154ccb20 Mon Sep 17 00:00:00 2001 From: Jonas Dedden Date: Mon, 9 Mar 2026 21:32:53 +0100 Subject: [PATCH 23/80] Fix skip_records over-counting when partial record precedes num_rows page skip (#9374) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Which issue does this PR close? - Closes #9370 . # Rationale for this change The bug occurs when using RowSelection with nested types (like List) when: 1. A column has multiple pages in a row group 2. The selected rows span across page boundaries 3. The first page is entirely consumed during skip operations The issue was in `arrow-rs/parquet/src/column/reader.rs:287-382` (`skip_records` function). **Root cause:** When `skip_records` completed successfully after crossing page boundaries, the `has_partial` state in the `RepetitionLevelDecoder` could incorrectly remain true. This happened when: - The skip operation exhausted a page where has_record_delimiter was false - The skip found the remaining records on the next page by counting a delimiter at index 0 - When a subsequent read_records(1) was called, the stale has_partial=true state caused count_records to incorrectly interpret the first repetition level (0) at index 0 as ending a "phantom" partial record, returning (1 record, 0 levels, 0 values) instead of properly reading the actual record data. For a more descriptive explanation, look here: https://github.com/apache/arrow-rs/issues/9370#issuecomment-3861143928 # What changes are included in this PR? Added code at the end of skip_records to reset the partial record state when all requested records have been successfully skipped. This ensures that after skip_records completes, we're at a clean record boundary with no lingering partial record state, fixing the array length mismatch in StructArrayReader. # Are these changes tested? Commit https://github.com/apache/arrow-rs/commit/365bd9a4ced7897f391e4533930a0c9683952723 introduces a test showcasing this issue with v2 data pages only on a unit-test level. PR https://github.com/apache/arrow-rs/pull/9399 could be used to showcase the issue in an end-to-end way. Previously wrong assumption that thought it had to do with mixing v1 and v2 data pages: ``` In b52e043 I added a test that I validated to fail whenever I remove my fix. Bug Mechanism The bug requires three ingredients: 1. Page 1 (DataPage v1): Contains a nested column (with rep levels). During skip_records, all levels on this page are consumed. count_records sees no following rep=0 delimiter, so it sets has_partial=true. Since has_record_delimiter is false (the default InMemoryPageReader returns false when more pages exist), flush_partial is not called. 2. Page 2 (DataPage v2): Has num_rows available in its metadata. When num_rows <= remaining_records, the entire page is skipped via skip_next_page() — this does not touch the rep level decoder at all, so has_partial remains stale true from page 1. 3. Page 3 (DataPage v1): When read_records loads this page, the stale has_partial=true causes the rep=0 at position 0 to be misinterpreted as completing a "phantom" partial record. This produces (1 record, 0 levels, 0 values) instead of reading the actual record data. Test Verification - With fix (flush_partial at end of skip_records): read_records(1) correctly returns (1, 2, 2) with values [70, 80] - Without fix: read_records(1) returns (1, 0, 0) — a phantom record with no data, which is what causes the "Not all children array length are the same!" error when different sibling columns in a struct produce different record counts ``` --------- Co-authored-by: Ed Seidl Co-authored-by: Andrew Lamb --- parquet/src/column/page.rs | 9 +- parquet/src/column/reader.rs | 131 ++++++++++++++++++ parquet/src/file/serialized_reader.rs | 7 +- parquet/tests/arrow_reader/row_filter/sync.rs | 2 - 4 files changed, 145 insertions(+), 4 deletions(-) diff --git a/parquet/src/column/page.rs b/parquet/src/column/page.rs index f18b296c1c65..4cfc07a02883 100644 --- a/parquet/src/column/page.rs +++ b/parquet/src/column/page.rs @@ -406,7 +406,14 @@ pub trait PageReader: Iterator> + Send { /// [(#4327)]: https://github.com/apache/arrow-rs/pull/4327 /// [(#4943)]: https://github.com/apache/arrow-rs/pull/4943 fn at_record_boundary(&mut self) -> Result { - Ok(self.peek_next_page()?.is_none()) + match self.peek_next_page()? { + // Last page in the column chunk - always a record boundary + None => Ok(true), + // A V2 data page is required by the parquet spec to start at a + // record boundary, so the current page ends at one. V2 pages + // are identified by having `num_rows` set in their header. + Some(metadata) => Ok(metadata.num_rows.is_some()), + } } } diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs index 387a0602a60d..29cb50185a58 100644 --- a/parquet/src/column/reader.rs +++ b/parquet/src/column/reader.rs @@ -1361,4 +1361,135 @@ mod tests { ); } } + + /// Regression test for + /// + /// Reproduces the production scenario: all DataPage v2 pages for a + /// list column (rep_level=1) read without an offset index (i.e. + /// `at_record_boundary` returns false for non-last pages). + /// + /// When a prior operation (here `skip_records(1)`) loads a v2 page, + /// and a subsequent `skip_records` exhausts the remaining levels on + /// that page, the rep level decoder is left with `has_partial=true`. + /// Because `has_record_delimiter` is false, the partial is not + /// flushed during level-based processing. When the next v2 page is + /// then peeked with `num_rows` available, the whole-page-skip + /// shortcut must flush the pending partial first. Otherwise: + /// + /// 1. The skip over-counts (skips N+1 records instead of N), and + /// 2. The stale `has_partial` causes a subsequent `read_records` to + /// produce a "phantom" record with 0 values. + #[test] + fn test_skip_records_v2_page_skip_accounts_for_partial() { + use crate::encodings::levels::LevelEncoder; + + let max_rep_level: i16 = 1; + let max_def_level: i16 = 1; + + // Column descriptor for a list element column (rep=1, def=1) + let primitive_type = SchemaType::primitive_type_builder("element", PhysicalType::INT32) + .with_repetition(Repetition::REQUIRED) + .build() + .unwrap(); + let desc = Arc::new(ColumnDescriptor::new( + Arc::new(primitive_type), + max_def_level, + max_rep_level, + ColumnPath::new(vec!["list".to_string(), "element".to_string()]), + )); + + // Helper: build a DataPage v2 for this list column. + let make_v2_page = + |rep_levels: &[i16], def_levels: &[i16], values: &[i32], num_rows: u32| -> Page { + let mut rep_enc = LevelEncoder::v2(max_rep_level, rep_levels.len()); + rep_enc.put(rep_levels); + let rep_bytes = rep_enc.consume(); + + let mut def_enc = LevelEncoder::v2(max_def_level, def_levels.len()); + def_enc.put(def_levels); + let def_bytes = def_enc.consume(); + + let val_bytes: Vec = values.iter().flat_map(|v| v.to_le_bytes()).collect(); + + let mut buf = Vec::new(); + buf.extend_from_slice(&rep_bytes); + buf.extend_from_slice(&def_bytes); + buf.extend_from_slice(&val_bytes); + + Page::DataPageV2 { + buf: Bytes::from(buf), + num_values: rep_levels.len() as u32, + encoding: Encoding::PLAIN, + num_nulls: 0, + num_rows, + def_levels_byte_len: def_bytes.len() as u32, + rep_levels_byte_len: rep_bytes.len() as u32, + is_compressed: false, + statistics: None, + } + }; + + // All pages are DataPage v2 (matching the production scenario where + // parquet-rs writes only v2 data pages and no offset index is loaded, + // so at_record_boundary() returns false for non-last pages). + + // Page 1 (v2): 2 records × 2 elements = [10,20], [30,40] + let page1 = make_v2_page(&[0, 1, 0, 1], &[1, 1, 1, 1], &[10, 20, 30, 40], 2); + + // Page 2 (v2): 2 records × 2 elements = [50,60], [70,80] + let page2 = make_v2_page(&[0, 1, 0, 1], &[1, 1, 1, 1], &[50, 60, 70, 80], 2); + + // Page 3 (v2): 1 record × 2 elements = [90,100] + let page3 = make_v2_page(&[0, 1], &[1, 1], &[90, 100], 1); + + // 5 records total: [10,20], [30,40], [50,60], [70,80], [90,100] + let pages = VecDeque::from(vec![page1, page2, page3]); + let page_reader = InMemoryPageReader::new(pages); + let column_reader: ColumnReader = get_column_reader(desc, Box::new(page_reader)); + let mut typed_reader = get_typed_column_reader::(column_reader); + + // Step 1 — skip 1 record: + // Peek page 1: num_rows=2, remaining=1 → rows(2) > remaining(1), + // so the page is LOADED (not whole-page-skipped). + // Level-based skip consumes rep levels [0,1] for record [10,20], + // stopping at the 0 that starts record [30,40]. + let skipped = typed_reader.skip_records(1).unwrap(); + assert_eq!(skipped, 1); + + // Step 2 — skip 2 more records ([30,40] and [50,60]): + // Mid-page in page 1 with 2 remaining levels [0,1] for [30,40]. + // skip_rep_levels(2, 2): the leading 0 does NOT act as a record + // delimiter (has_partial=false, idx==0), so count_records returns + // (true, 0, 2) — all levels consumed, has_partial=true, 0 records. + // + // has_record_delimiter is false → no flush at page boundary. + // Page 1 exhausted → peek page 2 (v2, num_rows=2). + // + // With fix: flush_partial → remaining 2→1, page 2 NOT skipped + // (rows=2 > remaining=1). Load page 2, skip 1 record [50,60]. + // + // Without fix: rows(2) <= remaining(2) → page 2 whole-page-skipped, + // over-counting by 1. has_partial stays true (stale from page 1). + let skipped = typed_reader.skip_records(2).unwrap(); + assert_eq!(skipped, 2); + + // Step 3 — read 1 record: + let mut values = Vec::new(); + let mut def_levels = Vec::new(); + let mut rep_levels = Vec::new(); + + let (records, values_read, levels_read) = typed_reader + .read_records(1, Some(&mut def_levels), Some(&mut rep_levels), &mut values) + .unwrap(); + + // Without the fix: (1, 0, 0) — phantom record from stale has_partial; + // the rep=0 on page 3 "completes" the phantom, yielding 0 values. + // With the fix: (1, 2, 2) — correctly reads record [70, 80]. + assert_eq!(records, 1, "should read exactly 1 record"); + assert_eq!(levels_read, 2, "should read 2 levels for the record"); + assert_eq!(values_read, 2, "should read 2 non-null values"); + assert_eq!(values, vec![70, 80], "should contain 4th record's values"); + assert_eq!(rep_levels, vec![0, 1], "rep levels for a 2-element list"); + assert_eq!(def_levels, vec![1, 1], "def levels (all non-null)"); + } } diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index b3b6383f78bb..254ccb779a4a 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -1158,7 +1158,12 @@ impl PageReader for SerializedPageReader { fn at_record_boundary(&mut self) -> Result { match &mut self.state { - SerializedPageReaderState::Values { .. } => Ok(self.peek_next_page()?.is_none()), + SerializedPageReaderState::Values { .. } => match self.peek_next_page()? { + None => Ok(true), + // V2 data pages must start at record boundaries per the parquet + // spec, so the current page ends at one. + Some(metadata) => Ok(metadata.num_rows.is_some()), + }, SerializedPageReaderState::Pages { .. } => Ok(true), } } diff --git a/parquet/tests/arrow_reader/row_filter/sync.rs b/parquet/tests/arrow_reader/row_filter/sync.rs index e59fa392cfd4..77a75220dc2e 100644 --- a/parquet/tests/arrow_reader/row_filter/sync.rs +++ b/parquet/tests/arrow_reader/row_filter/sync.rs @@ -206,7 +206,6 @@ fn test_row_filter_full_page_skip_is_handled() { /// Without the fix, the list column over-skips by one record, causing /// struct children to disagree on record counts. #[test] -#[should_panic(expected = "StructArrayReader out of sync in read_records, expected 1 read, got 0")] fn test_row_selection_list_column_v2_page_boundary_skip() { use arrow_array::builder::{Int32Builder, ListBuilder}; @@ -327,7 +326,6 @@ fn test_row_selection_list_column_v2_page_boundary_skip() { /// bug causes one leaf to over-skip by one record while the other stays /// correct. #[test] -#[should_panic(expected = "Not all children array length are the same!")] fn test_list_struct_page_boundary_desync_produces_length_mismatch() { use arrow_array::Array; use arrow_array::builder::{Int32Builder, ListBuilder, StringBuilder, StructBuilder}; From 33aed330b962d40e6e6b456bc4cd13ec80967f75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Wed, 11 Mar 2026 08:55:51 +0100 Subject: [PATCH 24/80] Make with_file_decryption_properties pub instead of pub(crate) (#9532) # Which issue does this PR close? - Closes #NNN. # Rationale for this change I would like to use `ParquetMetaDataPushDecoder` in arrow-datafusion, but the `with_file_decryption_properties` function is pub(crate), so I can't fully implement the encryption feature., # What changes are included in this PR? Make it pub # Are these changes tested? Not needed # Are there any user-facing changes? Now pub --- parquet/src/file/metadata/push_decoder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet/src/file/metadata/push_decoder.rs b/parquet/src/file/metadata/push_decoder.rs index abc788426260..e322525b7193 100644 --- a/parquet/src/file/metadata/push_decoder.rs +++ b/parquet/src/file/metadata/push_decoder.rs @@ -308,7 +308,7 @@ impl ParquetMetaDataPushDecoder { #[cfg(feature = "encryption")] /// Provide decryption properties for decoding encrypted Parquet files - pub(crate) fn with_file_decryption_properties( + pub fn with_file_decryption_properties( mut self, file_decryption_properties: Option>, ) -> Self { From d3c79006f2595e144d539f56b3054fe916ab184b Mon Sep 17 00:00:00 2001 From: Qi Zhu <821684824@qq.com> Date: Wed, 11 Mar 2026 18:37:37 +0800 Subject: [PATCH 25/80] fix: handle Null type in try_merge for Struct, List, LargeList, and Union (#9524) # Which issue does this PR close? Field::try_merge correctly handles DataType::Null for primitive types and when self is Null, but fails when self is a compound type (Struct, List, LargeList, Union) and from is Null. This causes Schema::try_merge to error when merging schemas where one has a Null field and another has a concrete compound type for the same field. This is common in JSON inference where some files have null values for fields that are structs/lists in other files. - Closes[ #9523](https://github.com/apache/arrow-rs/issues/9523) # Rationale for this change Add `DataType::Null` arms to the Struct, List, LargeList, and Union branches in `Field::try_merge`, consistent with how primitive types already handle it. # What changes are included in this PR? Add `DataType::Null` arms to the Struct, List, LargeList, and Union branches in `Field::try_merge`, consistent with how primitive types already handle it. # Are these changes tested? - Added test `test_merge_compound_with_null` covering Struct, List, LargeList, and Union merging with Null in both directions. - Existing tests continue to pass. # Are there any user-facing changes? No --- arrow-schema/src/field.rs | 66 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index a1c509abf2e0..1f2b57564ded 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -830,6 +830,9 @@ impl Field { .try_for_each(|f| builder.try_merge(f))?; *nested_fields = builder.finish().fields; } + DataType::Null => { + self.nullable = true; + } _ => { return Err(ArrowError::SchemaError(format!( "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct", @@ -841,6 +844,9 @@ impl Field { DataType::Union(from_nested_fields, _) => { nested_fields.try_merge(from_nested_fields)? } + DataType::Null => { + self.nullable = true; + } _ => { return Err(ArrowError::SchemaError(format!( "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union", @@ -854,6 +860,9 @@ impl Field { f.try_merge(from_field)?; (*field) = Arc::new(f); } + DataType::Null => { + self.nullable = true; + } _ => { return Err(ArrowError::SchemaError(format!( "Fail to merge schema field '{}' because the from data_type = {} is not DataType::List", @@ -867,6 +876,9 @@ impl Field { f.try_merge(from_field)?; (*field) = Arc::new(f); } + DataType::Null => { + self.nullable = true; + } _ => { return Err(ArrowError::SchemaError(format!( "Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList", @@ -1461,4 +1473,58 @@ mod test { assert_binary_serde_round_trip(field) } + + #[test] + fn test_merge_compound_with_null() { + // Struct + Null + let mut field = Field::new( + "s", + DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])), + false, + ); + field + .try_merge(&Field::new("s", DataType::Null, true)) + .expect("Struct should merge with Null"); + assert!(field.is_nullable()); + assert!(matches!(field.data_type(), DataType::Struct(_))); + + // List + Null + let mut field = Field::new( + "l", + DataType::List(Field::new("item", DataType::Utf8, false).into()), + false, + ); + field + .try_merge(&Field::new("l", DataType::Null, true)) + .expect("List should merge with Null"); + assert!(field.is_nullable()); + assert!(matches!(field.data_type(), DataType::List(_))); + + // LargeList + Null + let mut field = Field::new( + "ll", + DataType::LargeList(Field::new("item", DataType::Utf8, false).into()), + false, + ); + field + .try_merge(&Field::new("ll", DataType::Null, true)) + .expect("LargeList should merge with Null"); + assert!(field.is_nullable()); + assert!(matches!(field.data_type(), DataType::LargeList(_))); + + // Union + Null + let mut field = Field::new( + "u", + DataType::Union( + UnionFields::try_new(vec![0], vec![Field::new("f", DataType::Int32, false)]) + .unwrap(), + UnionMode::Dense, + ), + false, + ); + field + .try_merge(&Field::new("u", DataType::Null, true)) + .expect("Union should merge with Null"); + assert!(matches!(field.data_type(), DataType::Union(_, _))); + } } From a475f844d8473eb1d69baebf4337e1c1e1de235c Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Wed, 11 Mar 2026 13:50:02 -0400 Subject: [PATCH 26/80] [Json] Add benchmarks for list json reader (#9507) # Which issue does this PR close? - Relates to #9497. # Rationale for this change Add benchmark for `ListArray` in `json_reader` to support the performance evaluation of #9497 # What changes are included in this PR? - Benchmarks for decoding and serialize json list to `ListArray`. - Benchmarks for `ListArray` and `FixedSizeListArray` json writer # Are these changes tested? Benchmarks only # Are there any user-facing changes? No --- arrow-json/Cargo.toml | 6 +- .../{json-reader.rs => json_reader.rs} | 87 +++++++++++- arrow-json/benches/json_writer.rs | 129 ++++++++++++++++++ 3 files changed, 220 insertions(+), 2 deletions(-) rename arrow-json/benches/{json-reader.rs => json_reader.rs} (73%) create mode 100644 arrow-json/benches/json_writer.rs diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml index 5fcde480eb6d..be1f8d0ccdca 100644 --- a/arrow-json/Cargo.toml +++ b/arrow-json/Cargo.toml @@ -67,5 +67,9 @@ name = "serde" harness = false [[bench]] -name = "json-reader" +name = "json_reader" +harness = false + +[[bench]] +name = "json_writer" harness = false diff --git a/arrow-json/benches/json-reader.rs b/arrow-json/benches/json_reader.rs similarity index 73% rename from arrow-json/benches/json-reader.rs rename to arrow-json/benches/json_reader.rs index 504839f8ffe2..f87ba695eb62 100644 --- a/arrow-json/benches/json-reader.rs +++ b/arrow-json/benches/json_reader.rs @@ -32,6 +32,8 @@ const BATCH_SIZE: usize = 1 << 13; // 8K rows per batch const WIDE_FIELDS: usize = 64; const BINARY_BYTES: usize = 64; const WIDE_PROJECTION_TOTAL_FIELDS: usize = 100; // 100 fields total, select only 3 +const LIST_SHORT_ELEMENTS: usize = 5; +const LIST_LONG_ELEMENTS: usize = 100; fn decode_and_flush(decoder: &mut Decoder, data: &[u8]) { let mut offset = 0; @@ -240,11 +242,94 @@ fn bench_wide_projection(c: &mut Criterion) { ); } +fn build_list_json(rows: usize, elements: usize) -> Vec { + // Builds newline-delimited JSON objects with a single list field. + // Example (rows=2, elements=3): + // {"list":[0,1,2]} + // {"list":[1,2,3]} + let mut out = String::with_capacity(rows * (elements * 6 + 16)); + for row in 0..rows { + out.push_str("{\"list\":["); + for i in 0..elements { + if i > 0 { + out.push(','); + } + write!(&mut out, "{}", (row + i) as i64).unwrap(); + } + out.push_str("]}\n"); + } + out.into_bytes() +} + +fn build_list_values(rows: usize, elements: usize) -> Vec { + // Mirrors build_list_json but returns structured serde_json::Value objects. + let mut out = Vec::with_capacity(rows); + for row in 0..rows { + let arr: Vec = (0..elements) + .map(|i| Value::Number(Number::from((row + i) as i64))) + .collect(); + let mut map = Map::with_capacity(1); + map.insert("list".to_string(), Value::Array(arr)); + out.push(Value::Object(map)); + } + out +} + +fn build_list_schema() -> Arc { + Arc::new(Schema::new(vec![Field::new( + "list", + DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))), + false, + )])) +} + +fn bench_decode_list(c: &mut Criterion) { + let schema = build_list_schema(); + + // Short lists: tests list handling overhead (few elements per row) + let short_data = build_list_json(ROWS, LIST_SHORT_ELEMENTS); + bench_decode_schema(c, "decode_list_short_i64_json", &short_data, schema.clone()); + + // Long lists: tests child element decode throughput (many elements per row) + let long_data = build_list_json(ROWS, LIST_LONG_ELEMENTS); + bench_decode_schema(c, "decode_list_long_i64_json", &long_data, schema); +} + +fn bench_serialize_list(c: &mut Criterion) { + let schema = build_list_schema(); + + let short_values = build_list_values(ROWS, LIST_SHORT_ELEMENTS); + c.bench_function("decode_list_short_i64_serialize", |b| { + b.iter(|| { + let mut decoder = ReaderBuilder::new(schema.clone()) + .with_batch_size(BATCH_SIZE) + .build_decoder() + .unwrap(); + decoder.serialize(&short_values).unwrap(); + while let Some(_batch) = decoder.flush().unwrap() {} + }) + }); + + let long_values = build_list_values(ROWS, LIST_LONG_ELEMENTS); + c.bench_function("decode_list_long_i64_serialize", |b| { + b.iter(|| { + let mut decoder = ReaderBuilder::new(schema.clone()) + .with_batch_size(BATCH_SIZE) + .build_decoder() + .unwrap(); + decoder.serialize(&long_values).unwrap(); + while let Some(_batch) = decoder.flush().unwrap() {} + }) + }); +} + criterion_group!( benches, bench_decode_wide_object, bench_serialize_wide_object, bench_binary_hex, - bench_wide_projection + bench_wide_projection, + bench_decode_list, + bench_serialize_list ); criterion_main!(benches); diff --git a/arrow-json/benches/json_writer.rs b/arrow-json/benches/json_writer.rs new file mode 100644 index 000000000000..b37ea542efee --- /dev/null +++ b/arrow-json/benches/json_writer.rs @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_array::builder::{FixedSizeListBuilder, Int64Builder, ListBuilder}; +use arrow_array::{Array, RecordBatch}; +use arrow_json::LineDelimitedWriter; +use arrow_schema::{Field, Schema}; +use criterion::{Criterion, Throughput, criterion_group, criterion_main}; +use std::sync::Arc; + +const ROWS: usize = 1 << 17; // 128K rows +const LIST_SHORT_ELEMENTS: usize = 5; +const LIST_LONG_ELEMENTS: usize = 100; + +fn build_list_batch(rows: usize, elements: usize) -> RecordBatch { + let mut list_builder = ListBuilder::new(Int64Builder::new()); + for row in 0..rows { + for i in 0..elements { + list_builder.values().append_value((row + i) as i64); + } + list_builder.append(true); + } + let list_array = list_builder.finish(); + + let schema = Arc::new(Schema::new(vec![Field::new( + "list", + list_array.data_type().clone(), + false, + )])); + + RecordBatch::try_new(schema, vec![Arc::new(list_array)]).unwrap() +} + +fn bench_write_list(c: &mut Criterion) { + let short_batch = build_list_batch(ROWS, LIST_SHORT_ELEMENTS); + let long_batch = build_list_batch(ROWS, LIST_LONG_ELEMENTS); + + let mut group = c.benchmark_group("write_list_i64"); + // Short lists: tests per-list overhead (few elements per row) + group.throughput(Throughput::Elements(ROWS as u64)); + group.bench_function("short", |b| { + let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8); + b.iter(|| { + buf.clear(); + let mut writer = LineDelimitedWriter::new(&mut buf); + writer.write(&short_batch).unwrap(); + writer.finish().unwrap(); + }) + }); + + // Long lists: tests child element encode throughput (many elements per row) + group.bench_function("long", |b| { + let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8); + b.iter(|| { + buf.clear(); + let mut writer = LineDelimitedWriter::new(&mut buf); + writer.write(&long_batch).unwrap(); + writer.finish().unwrap(); + }) + }); + + group.finish(); +} + +fn build_fixed_size_list_batch(rows: usize, elements: usize) -> RecordBatch { + let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), elements as i32); + for row in 0..rows { + for i in 0..elements { + builder.values().append_value((row + i) as i64); + } + builder.append(true); + } + let list_array = builder.finish(); + + let schema = Arc::new(Schema::new(vec![Field::new( + "fixed_size_list", + list_array.data_type().clone(), + false, + )])); + + RecordBatch::try_new(schema, vec![Arc::new(list_array)]).unwrap() +} + +fn bench_write_fixed_size_list(c: &mut Criterion) { + let short_batch = build_fixed_size_list_batch(ROWS, LIST_SHORT_ELEMENTS); + let long_batch = build_fixed_size_list_batch(ROWS, LIST_LONG_ELEMENTS); + + let mut group = c.benchmark_group("write_fixed_size_list_i64"); + group.throughput(Throughput::Elements(ROWS as u64)); + + group.bench_function("short", |b| { + let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8); + b.iter(|| { + buf.clear(); + let mut writer = LineDelimitedWriter::new(&mut buf); + writer.write(&short_batch).unwrap(); + writer.finish().unwrap(); + }) + }); + + group.bench_function("long", |b| { + let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8); + b.iter(|| { + buf.clear(); + let mut writer = LineDelimitedWriter::new(&mut buf); + writer.write(&long_batch).unwrap(); + writer.finish().unwrap(); + }) + }); + + group.finish(); +} + +criterion_group!(benches, bench_write_list, bench_write_fixed_size_list); +criterion_main!(benches); From ba02ab9b339480241de32b90a372fd443bf3ab5b Mon Sep 17 00:00:00 2001 From: Filippo <12383260+notfilippo@users.noreply.github.com> Date: Wed, 11 Mar 2026 18:59:51 +0100 Subject: [PATCH 27/80] feat(memory-tracking): expose API to NullBuffer, ArrayData, and Array (#8918) # Which issue does this PR close? Part of #8137. Follow up of #7303. Replaces #8040. # Rationale for this change #7303 implements the fundamental symbols for tracking memory. This patch exposes those APIs to a higher level Array and ArrayData. # What changes are included in this PR? New `claim` API for NullBuffer, ArrayData, and Array. New `pool` feature-flag to arrow, arrow-array, and arrow-data. # Are these changes tested? Added a doctest on the `Array::claim` method. # Are there any user-facing changes? Added API and a new feature-flag for arrow, arrow-array, and arrow-data. --- arrow-array/Cargo.toml | 2 + arrow-array/src/array/boolean_array.rs | 8 ++ arrow-array/src/array/byte_array.rs | 9 +++ arrow-array/src/array/byte_view_array.rs | 11 +++ arrow-array/src/array/dictionary_array.rs | 11 +++ .../src/array/fixed_size_binary_array.rs | 8 ++ .../src/array/fixed_size_list_array.rs | 8 ++ arrow-array/src/array/list_array.rs | 9 +++ arrow-array/src/array/list_view_array.rs | 10 +++ arrow-array/src/array/map_array.rs | 9 +++ arrow-array/src/array/mod.rs | 79 +++++++++++++++++++ arrow-array/src/array/null_array.rs | 5 ++ arrow-array/src/array/primitive_array.rs | 8 ++ arrow-array/src/array/run_array.rs | 11 +++ arrow-array/src/array/struct_array.rs | 10 +++ arrow-array/src/array/union_array.rs | 11 +++ arrow-buffer/Cargo.toml | 1 + arrow-buffer/src/buffer/boolean.rs | 8 ++ arrow-buffer/src/buffer/null.rs | 9 ++- arrow-buffer/src/buffer/offset.rs | 6 ++ arrow-buffer/src/buffer/run.rs | 6 ++ arrow-buffer/src/buffer/scalar.rs | 8 ++ arrow-data/Cargo.toml | 2 + arrow-data/src/data.rs | 24 ++++++ arrow/Cargo.toml | 2 + 25 files changed, 274 insertions(+), 1 deletion(-) diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml index a046fea2b0dc..6be5a6daab56 100644 --- a/arrow-array/Cargo.toml +++ b/arrow-array/Cargo.toml @@ -58,6 +58,8 @@ all-features = true async = ["dep:futures"] ffi = ["arrow-schema/ffi", "arrow-data/ffi"] force_validate = [] +# Enable memory tracking support +pool = ["arrow-buffer/pool", "arrow-data/pool"] [dev-dependencies] rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 65e19c80f8e8..582627b24396 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -346,6 +346,14 @@ unsafe impl Array for BooleanArray { fn get_array_memory_size(&self) -> usize { std::mem::size_of::() + self.get_buffer_memory_size() } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.values.claim(pool); + if let Some(nulls) = &self.nulls { + nulls.claim(pool); + } + } } impl ArrayAccessor for &BooleanArray { diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs index a54e9a5fc781..93924ac76bb2 100644 --- a/arrow-array/src/array/byte_array.rs +++ b/arrow-array/src/array/byte_array.rs @@ -525,6 +525,15 @@ unsafe impl Array for GenericByteArray { fn get_array_memory_size(&self) -> usize { std::mem::size_of::() + self.get_buffer_memory_size() } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.value_offsets.claim(pool); + self.value_data.claim(pool); + if let Some(nulls) = &self.nulls { + nulls.claim(pool); + } + } } impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray { diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs index 0275b628e2cf..a4a319df6426 100644 --- a/arrow-array/src/array/byte_view_array.rs +++ b/arrow-array/src/array/byte_view_array.rs @@ -897,6 +897,17 @@ unsafe impl Array for GenericByteViewArray { fn get_array_memory_size(&self) -> usize { std::mem::size_of::() + self.get_buffer_memory_size() } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.views.claim(pool); + for buffer in self.buffers.iter() { + buffer.claim(pool); + } + if let Some(nulls) = &self.nulls { + nulls.claim(pool); + } + } } impl<'a, T: ByteViewType + ?Sized> ArrayAccessor for &'a GenericByteViewArray { diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index 97e45cc5d68e..0c465ec14446 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -792,6 +792,12 @@ unsafe impl Array for DictionaryArray { + self.keys.get_buffer_memory_size() + self.values.get_array_memory_size() } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.keys.claim(pool); + self.values.claim(pool); + } } impl std::fmt::Debug for DictionaryArray { @@ -911,6 +917,11 @@ unsafe impl Array for TypedDictionaryArray<' fn get_array_memory_size(&self) -> usize { self.dictionary.get_array_memory_size() } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.dictionary.claim(pool); + } } impl IntoIterator for TypedDictionaryArray<'_, K, V> diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs index e3f08c066ee0..72e6d022a58a 100644 --- a/arrow-array/src/array/fixed_size_binary_array.rs +++ b/arrow-array/src/array/fixed_size_binary_array.rs @@ -662,6 +662,14 @@ unsafe impl Array for FixedSizeBinaryArray { fn get_array_memory_size(&self) -> usize { std::mem::size_of::() + self.get_buffer_memory_size() } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.value_data.claim(pool); + if let Some(nulls) = &self.nulls { + nulls.claim(pool); + } + } } impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray { diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index a3db33d61b56..55a9fb9aa49e 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -528,6 +528,14 @@ unsafe impl Array for FixedSizeListArray { } size } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.values.claim(pool); + if let Some(nulls) = &self.nulls { + nulls.claim(pool); + } + } } impl super::ListLikeArray for FixedSizeListArray { diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index d9613c6809ac..24f7774f2b7d 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -620,6 +620,15 @@ unsafe impl Array for GenericListArray } size } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.value_offsets.claim(pool); + self.values.claim(pool); + if let Some(nulls) = &self.nulls { + nulls.claim(pool); + } + } } impl super::ListLikeArray for GenericListArray { diff --git a/arrow-array/src/array/list_view_array.rs b/arrow-array/src/array/list_view_array.rs index eda3be11ac39..75ff6117eeba 100644 --- a/arrow-array/src/array/list_view_array.rs +++ b/arrow-array/src/array/list_view_array.rs @@ -486,6 +486,16 @@ unsafe impl Array for GenericListViewArray super::ListLikeArray for GenericListViewArray { diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs index 07758d59bb14..7a5fe0b46843 100644 --- a/arrow-array/src/array/map_array.rs +++ b/arrow-array/src/array/map_array.rs @@ -430,6 +430,15 @@ unsafe impl Array for MapArray { } size } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.value_offsets.claim(pool); + self.entries.claim(pool); + if let Some(nulls) = &self.nulls { + nulls.claim(pool); + } + } } impl ArrayAccessor for &MapArray { diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index ca3a02577f47..e389b462fbe1 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -354,6 +354,75 @@ pub unsafe trait Array: std::fmt::Debug + Send + Sync { /// This value will always be greater than returned by `get_buffer_memory_size()` and /// includes the overhead of the data structures that contain the pointers to the various buffers. fn get_array_memory_size(&self) -> usize; + + /// Claim memory used by this array in the provided memory pool. + /// + /// This recursively claims memory for: + /// - All data buffers in this array + /// - All child arrays (for nested types like List, Struct, etc.) + /// - The null bitmap buffer if present + /// + /// This method guarantees that the memory pool will only compute occupied memory + /// exactly once. For example, if this array is derived from operations like `slice`, + /// calling `claim` on it would not change the memory pool's usage if the underlying buffers + /// are already counted before. + /// + /// # Example + /// ``` + /// # use arrow_array::{Int32Array, Array}; + /// # use arrow_buffer::TrackingMemoryPool; + /// # use arrow_buffer::MemoryPool; + /// + /// let pool = TrackingMemoryPool::default(); + /// + /// let small_array = Int32Array::from(vec![1, 2, 3, 4, 5]); + /// let small_array_size = small_array.get_buffer_memory_size(); + /// + /// // Claim the array's memory in the pool + /// small_array.claim(&pool); + /// + /// // Create and claim slices of `small_array`; should not increase memory usage + /// let slice1 = small_array.slice(0, 2); + /// let slice2 = small_array.slice(2, 2); + /// slice1.claim(&pool); + /// slice2.claim(&pool); + /// + /// assert_eq!(pool.used(), small_array_size); + /// + /// // Create a `large_array` which does not derive from the original `small_array` + /// + /// let large_array = Int32Array::from((0..1000).collect::>()); + /// let large_array_size = large_array.get_buffer_memory_size(); + /// + /// large_array.claim(&pool); + /// + /// // Trying to claim more than once is a no-op + /// large_array.claim(&pool); + /// large_array.claim(&pool); + /// + /// assert_eq!(pool.used(), small_array_size + large_array_size); + /// + /// let sum_of_all_sizes = small_array_size + large_array_size + slice1.get_buffer_memory_size() + slice2.get_buffer_memory_size(); + /// + /// // `get_buffer_memory_size` works independently of the memory pool, so a sum of all the + /// // arrays in scope will always be >= the memory used reported by the memory pool. + /// assert_ne!(pool.used(), sum_of_all_sizes); + /// + /// // Until the final claim is dropped the buffer size remains accounted for + /// drop(small_array); + /// drop(slice1); + /// + /// assert_eq!(pool.used(), small_array_size + large_array_size); + /// + /// // Dropping this finally releases the buffer that was backing `small_array` + /// drop(slice2); + /// + /// assert_eq!(pool.used(), large_array_size); + /// ``` + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.to_data().claim(pool) + } } /// A reference-counted reference to a generic `Array` @@ -437,6 +506,11 @@ unsafe impl Array for ArrayRef { fn get_array_memory_size(&self) -> usize { self.as_ref().get_array_memory_size() } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.as_ref().claim(pool) + } } unsafe impl Array for &T { @@ -507,6 +581,11 @@ unsafe impl Array for &T { fn get_array_memory_size(&self) -> usize { T::get_array_memory_size(self) } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + T::claim(self, pool) + } } /// A generic trait for accessing the values of an [`Array`] diff --git a/arrow-array/src/array/null_array.rs b/arrow-array/src/array/null_array.rs index 00b30935d425..05dd114be71b 100644 --- a/arrow-array/src/array/null_array.rs +++ b/arrow-array/src/array/null_array.rs @@ -133,6 +133,11 @@ unsafe impl Array for NullArray { fn get_array_memory_size(&self) -> usize { std::mem::size_of::() } + + #[cfg(feature = "pool")] + fn claim(&self, _pool: &dyn arrow_buffer::MemoryPool) { + // NullArray has no buffers to claim + } } impl From for NullArray { diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index d9c8ff66d0cb..b51f5f518668 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -1246,6 +1246,14 @@ unsafe impl Array for PrimitiveArray { fn get_array_memory_size(&self) -> usize { std::mem::size_of::() + self.get_buffer_memory_size() } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.values.claim(pool); + if let Some(nulls) = &self.nulls { + nulls.claim(pool); + } + } } impl ArrayAccessor for &PrimitiveArray { diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs index 4770bad05e7d..a3cb4565f413 100644 --- a/arrow-array/src/array/run_array.rs +++ b/arrow-array/src/array/run_array.rs @@ -375,6 +375,12 @@ unsafe impl Array for RunArray { + self.run_ends.inner().inner().capacity() + self.values.get_array_memory_size() } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.run_ends.claim(pool); + self.values.claim(pool); + } } impl std::fmt::Debug for RunArray { @@ -603,6 +609,11 @@ unsafe impl Array for TypedRunArray<'_, R, V> { fn get_array_memory_size(&self) -> usize { self.run_array.get_array_memory_size() } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.run_array.claim(pool); + } } // Array accessor converts the index of logical array to the index of the physical array diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs index b5f25fff181c..da837ba16b75 100644 --- a/arrow-array/src/array/struct_array.rs +++ b/arrow-array/src/array/struct_array.rs @@ -468,6 +468,16 @@ unsafe impl Array for StructArray { } size } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + for field in &self.fields { + field.claim(pool); + } + if let Some(nulls) = &self.nulls { + nulls.claim(pool); + } + } } impl From> for StructArray { diff --git a/arrow-array/src/array/union_array.rs b/arrow-array/src/array/union_array.rs index 03d69a584524..5ba7b947c724 100644 --- a/arrow-array/src/array/union_array.rs +++ b/arrow-array/src/array/union_array.rs @@ -946,6 +946,17 @@ unsafe impl Array for UnionArray { .sum::() + sum } + + #[cfg(feature = "pool")] + fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + self.type_ids.claim(pool); + if let Some(offsets) = &self.offsets { + offsets.claim(pool); + } + for field in self.fields.iter().flatten() { + field.claim(pool); + } + } } impl std::fmt::Debug for UnionArray { diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml index 02ea49c37c46..1400c1986361 100644 --- a/arrow-buffer/Cargo.toml +++ b/arrow-buffer/Cargo.toml @@ -36,6 +36,7 @@ bench = false all-features = true [features] +# Enable memory tracking support pool = [] [dependencies] diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs index f9148c7eb245..c1c7529e0a2d 100644 --- a/arrow-buffer/src/buffer/boolean.rs +++ b/arrow-buffer/src/buffer/boolean.rs @@ -489,6 +489,14 @@ impl BooleanBuffer { self.buffer } + /// Claim memory used by this buffer in the provided memory pool. + /// + /// See [`Buffer::claim`] for details. + #[cfg(feature = "pool")] + pub fn claim(&self, pool: &dyn crate::MemoryPool) { + self.buffer.claim(pool); + } + /// Returns an iterator over the bits in this [`BooleanBuffer`] pub fn iter(&self) -> BitIterator<'_> { self.into_iter() diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs index 97034a631ef8..6046369c62a7 100644 --- a/arrow-buffer/src/buffer/null.rs +++ b/arrow-buffer/src/buffer/null.rs @@ -26,7 +26,7 @@ use crate::{Buffer, MutableBuffer}; /// that it is null. /// /// # See also -/// * [`NullBufferBuilder`] for creating `NullBuffer`s +/// * [`NullBufferBuilder`] for creating `NullBuffer`s /// /// [Arrow specification]: https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps /// [`NullBufferBuilder`]: crate::NullBufferBuilder @@ -231,6 +231,13 @@ impl NullBuffer { let nb = NullBuffer::new(bb); (nb.null_count() > 0).then_some(nb) } + + /// Claim memory used by this null buffer in the provided memory pool. + #[cfg(feature = "pool")] + pub fn claim(&self, pool: &dyn crate::MemoryPool) { + // NullBuffer wraps a BooleanBuffer which wraps a Buffer + self.buffer.inner().claim(pool); + } } impl<'a> IntoIterator for &'a NullBuffer { diff --git a/arrow-buffer/src/buffer/offset.rs b/arrow-buffer/src/buffer/offset.rs index 66fa7dd22ec5..bb34c8b23892 100644 --- a/arrow-buffer/src/buffer/offset.rs +++ b/arrow-buffer/src/buffer/offset.rs @@ -220,6 +220,12 @@ impl OffsetBuffer { self.0 } + /// Claim memory used by this buffer in the provided memory pool. + #[cfg(feature = "pool")] + pub fn claim(&self, pool: &dyn crate::MemoryPool) { + self.0.claim(pool); + } + /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset` pub fn slice(&self, offset: usize, len: usize) -> Self { Self(self.0.slice(offset, len.saturating_add(1))) diff --git a/arrow-buffer/src/buffer/run.rs b/arrow-buffer/src/buffer/run.rs index 0f4d9234e4cf..703ae913801d 100644 --- a/arrow-buffer/src/buffer/run.rs +++ b/arrow-buffer/src/buffer/run.rs @@ -294,6 +294,12 @@ where self.run_ends } + /// Claim memory used by this buffer in the provided memory pool. + #[cfg(feature = "pool")] + pub fn claim(&self, pool: &dyn crate::MemoryPool) { + self.run_ends.claim(pool); + } + /// Returns the physical indices corresponding to the provided logical indices. /// /// Given a slice of logical indices, this method returns a `Vec` containing the diff --git a/arrow-buffer/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs index 3c5334ca5118..f74b93ab8914 100644 --- a/arrow-buffer/src/buffer/scalar.rs +++ b/arrow-buffer/src/buffer/scalar.rs @@ -126,6 +126,14 @@ impl ScalarBuffer { self.buffer } + /// Claim memory used by this buffer in the provided memory pool. + /// + /// See [`Buffer::claim`] for details. + #[cfg(feature = "pool")] + pub fn claim(&self, pool: &dyn crate::MemoryPool) { + self.buffer.claim(pool); + } + /// Returns true if this [`ScalarBuffer`] is equal to `other`, using pointer comparisons /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may /// return false when the arrays are logically equal diff --git a/arrow-data/Cargo.toml b/arrow-data/Cargo.toml index 9c7a5206b2f4..9f1b50ed14d9 100644 --- a/arrow-data/Cargo.toml +++ b/arrow-data/Cargo.toml @@ -39,6 +39,8 @@ bench = false force_validate = [] # Enable ffi support ffi = ["arrow-schema/ffi"] +# Enable memory tracking support +pool = ["arrow-buffer/pool"] [package.metadata.docs.rs] all-features = true diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index 21cf4e5b5e2c..a5a64dfe9f38 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -1659,6 +1659,30 @@ impl ArrayData { pub fn into_builder(self) -> ArrayDataBuilder { self.into() } + + /// Claim memory used by this ArrayData in the provided memory pool. + /// + /// This claims memory for: + /// - All buffers in self.buffers + /// - All child ArrayData recursively + /// - The null buffer if present + #[cfg(feature = "pool")] + pub fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + // Claim all data buffers + for buffer in &self.buffers { + buffer.claim(pool); + } + + // Claim null buffer if present + if let Some(nulls) = &self.nulls { + nulls.claim(pool); + } + + // Recursively claim child data + for child in &self.child_data { + child.claim(pool); + } + } } /// Return the expected [`DataTypeLayout`] Arrays of this data diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 137d785eee88..8e56457ff0a5 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -82,6 +82,8 @@ force_validate = ["arrow-array/force_validate", "arrow-data/force_validate"] ffi = ["arrow-schema/ffi", "arrow-data/ffi", "arrow-array/ffi"] chrono-tz = ["arrow-array/chrono-tz"] canonical_extension_types = ["arrow-schema/canonical_extension_types"] +# Enable memory tracking support +pool = ["arrow-array/pool"] [dev-dependencies] chrono = { workspace = true } From b3e047f59a562020a0fd50e7c68c4e6cbd53687d Mon Sep 17 00:00:00 2001 From: Peter L Date: Thu, 12 Mar 2026 05:15:07 +1030 Subject: [PATCH 28/80] Fix Invalid offset in sparse column chunk data error for multiple predicates (#9509) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Which issue does this PR close? Raised an issue at https://github.com/apache/arrow-rs/issues/9516 for this one Same issue as https://github.com/apache/arrow-rs/issues/9239 but extended to another scenario # Rationale for this change When there are multiple predicates being evaluated, we need to reset the row selection policy before overriding the strategy. Scenario: - Dense initial RowSelection (alternating select/skip) covers all pages → Auto resolves to Mask - Predicate 1 evaluates on column A, narrows selection to skip middle pages - Predicate 2's column B is fetched sparsely with the narrowed selection (missing middle pages) - Without the fix, the override for predicate 2 returns early (policy=Mask, not Auto), so Mask is used and tries to read missing pages → "Invalid offset" error # What changes are included in this PR? This is a one line change to reset the selection policy in the `RowGroupDecoderState::WaitingOnFilterData` arm # Are these changes tested? Yes a new test added that fails currently on `main`, but as you can see it's a doozy to set up. # Are there any user-facing changes? Nope --- .../arrow/push_decoder/reader_builder/mod.rs | 7 ++ .../tests/arrow_reader/row_filter/async.rs | 111 +++++++++++++++++- 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/parquet/src/arrow/push_decoder/reader_builder/mod.rs b/parquet/src/arrow/push_decoder/reader_builder/mod.rs index 8fa299be884f..d3d78ca7c263 100644 --- a/parquet/src/arrow/push_decoder/reader_builder/mod.rs +++ b/parquet/src/arrow/push_decoder/reader_builder/mod.rs @@ -437,6 +437,13 @@ impl RowGroupReaderBuilder { .with_parquet_metadata(&self.metadata) .build_array_reader(self.fields.as_deref(), predicate.projection())?; + // Reset to original policy before each predicate so the override + // can detect page skipping for THIS predicate's columns. + // Without this reset, a prior predicate's override (e.g. Mask) + // carries forward and the check returns early, missing unfetched + // pages for subsequent predicates. + plan_builder = plan_builder.with_row_selection_policy(self.row_selection_policy); + // Prepare to evaluate the filter. // Note: first update the selection strategy to properly handle any pages // pruned during fetch diff --git a/parquet/tests/arrow_reader/row_filter/async.rs b/parquet/tests/arrow_reader/row_filter/async.rs index 6fa616d714f1..66840bb8147b 100644 --- a/parquet/tests/arrow_reader/row_filter/async.rs +++ b/parquet/tests/arrow_reader/row_filter/async.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use arrow::{ array::AsArray, compute::{concat_batches, kernels::cmp::eq, or}, - datatypes::TimestampNanosecondType, + datatypes::{Int32Type, TimestampNanosecondType}, }; use arrow_array::{ ArrayRef, BooleanArray, Int8Array, Int32Array, Int64Array, RecordBatch, Scalar, StringArray, @@ -525,3 +525,112 @@ async fn test_predicate_pushdown_with_skipped_pages() { assert_eq!(batch.column(0).as_string(), &expected); } } + +/// Regression test: when multiple predicates are used, the first predicate's +/// override of the selection strategy (to Mask) must NOT carry forward to +/// subsequent predicates. Each predicate must get a fresh Auto policy so the +/// override can detect page skipping for that predicate's specific columns. +/// +/// Scenario: +/// - Dense initial RowSelection (alternating select/skip) covers all pages → Auto resolves to Mask +/// - Predicate 1 evaluates on column A, narrows selection to skip middle pages +/// - Predicate 2's column B is fetched sparsely with the narrowed selection (missing middle pages) +/// - Without the fix, the override for predicate 2 returns early (policy=Mask, not Auto), +/// so Mask is used and tries to read missing pages → "Invalid offset" error +#[tokio::test] +async fn test_multi_predicate_mask_policy_carryover() { + // 300 rows, 1 row group, 100 rows per page (3 pages) + let num_rows = 300usize; + let rows_per_page = 100; + + let schema = Arc::new(Schema::new(vec![ + Field::new("filter_col", DataType::Int32, false), + Field::new("value_col", DataType::Int32, false), + ])); + + let props = WriterProperties::builder() + .set_max_row_group_row_count(Some(num_rows)) + .set_data_page_row_count_limit(rows_per_page) + .set_write_batch_size(rows_per_page) + .set_dictionary_enabled(false) + .build(); + + // filter_col: 0 for first and last 100 rows, 1 for middle 100 rows + // value_col: just row index + let filter_values: Vec = (0..num_rows as i32) + .map(|i| if (100..200).contains(&i) { 1 } else { 0 }) + .collect(); + let value_values: Vec = (0..num_rows as i32).collect(); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from(filter_values)) as ArrayRef, + Arc::new(Int32Array::from(value_values)) as ArrayRef, + ], + ) + .unwrap(); + + let mut buffer = Vec::new(); + let mut writer = ArrowWriter::try_new(&mut buffer, schema.clone(), Some(props)).unwrap(); + writer.write(&batch).unwrap(); + writer.close().unwrap(); + let buffer = Bytes::from(buffer); + + let reader = TestReader::new(buffer); + let options = ArrowReaderOptions::default().with_page_index_policy(PageIndexPolicy::Required); + let builder = ParquetRecordBatchStreamBuilder::new_with_options(reader, options) + .await + .unwrap(); + + let schema_descr = builder.metadata().file_metadata().schema_descr_ptr(); + + // Dense initial selection: Select(1), Skip(1) repeated → triggers Mask strategy + // Covers all pages since every page has selected rows + let selectors: Vec = (0..num_rows / 2) + .flat_map(|_| vec![RowSelector::select(1), RowSelector::skip(1)]) + .collect(); + let selection = RowSelection::from(selectors); + + // Predicate 1 on filter_col: keeps only rows where filter_col == 0 + // (first 100 and last 100 rows). After this, middle page is excluded. + let pred1 = ArrowPredicateFn::new(ProjectionMask::roots(&schema_descr, [0]), |batch| { + let col = batch.column(0).as_primitive::(); + Ok(BooleanArray::from_iter( + col.iter().map(|v| v.map(|val| val == 0)), + )) + }); + + // Predicate 2 on value_col: keeps rows where value_col < 250 + // This column is fetched AFTER predicate 1 narrows the selection. + // Its sparse data will be missing the middle page. + let pred2 = ArrowPredicateFn::new(ProjectionMask::roots(&schema_descr, [1]), |batch| { + let col = batch.column(0).as_primitive::(); + Ok(BooleanArray::from_iter( + col.iter().map(|v| v.map(|val| val < 250)), + )) + }); + + let row_filter = RowFilter::new(vec![Box::new(pred1), Box::new(pred2)]); + + // Output projection: both columns + let projection = ProjectionMask::roots(&schema_descr, [0, 1]); + + let stream = builder + .with_row_filter(row_filter) + .with_row_selection(selection) + .with_projection(projection) + .with_max_predicate_cache_size(0) + .build() + .unwrap(); + + // Without the fix, this panics with: + // "Invalid offset in sparse column chunk data: ..., no matching page found." + let batches: Vec = stream.try_collect().await.unwrap(); + let batch = concat_batches(&batches[0].schema(), &batches).unwrap(); + + // Verify results: rows where filter_col==0 AND value_col<250 AND original alternating selection + // That's even-indexed rows in [0,100) with value<250 → rows 0,2,4,...,98 (50 rows) + // Plus even-indexed rows in [200,250) with value<250 → rows 200,202,...,248 (25 rows) + assert_eq!(batch.num_rows(), 75); +} From 2956dbf30fe5b50f8f76e6bad93505a8e7b86eb5 Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Wed, 11 Mar 2026 12:46:51 -0600 Subject: [PATCH 29/80] fix: Do not assume missing nullcount stat means zero nullcount (#9481) # Which issue does this PR close? - Closes https://github.com/apache/arrow-rs/issues/9451 - Closes https://github.com/apache/arrow-rs/issues/6256 # Rationale for this change A reader might be annoyed (performance wart) if a parquet footer lacks nullcount stats, but inferring nullcount=0 for missing stats makes the stats untrustworthy and can lead to incorrect behavior. # What changes are included in this PR? If a parquet footer nullcount stat is missing, surface it as None, reserving `Some(0)` for known-no-null cases. # Are these changes tested? Fixed one unit test that broke, added a missing unit test that covers the other change site. # Are there any user-facing changes? The stats API doesn't change signature, but there is a behavior change. The existing doc that called out the incorrect behavior has been removed to reflect that the incorrect behavior no longer occurs. --- parquet/src/file/metadata/thrift/mod.rs | 73 ++++++++++++++++++++----- parquet/src/file/statistics.rs | 56 +++++++++---------- 2 files changed, 85 insertions(+), 44 deletions(-) diff --git a/parquet/src/file/metadata/thrift/mod.rs b/parquet/src/file/metadata/thrift/mod.rs index ddb5aa16b068..88cb96f35555 100644 --- a/parquet/src/file/metadata/thrift/mod.rs +++ b/parquet/src/file/metadata/thrift/mod.rs @@ -192,20 +192,19 @@ fn convert_stats( use crate::file::statistics::Statistics as FStatistics; Ok(match thrift_stats { Some(stats) => { - // Number of nulls recorded, when it is not available, we just mark it as 0. - // TODO this should be `None` if there is no information about NULLS. - // see https://github.com/apache/arrow-rs/pull/6216/files - let null_count = stats.null_count.unwrap_or(0); - - if null_count < 0 { - return Err(general_err!( - "Statistics null count is negative {}", - null_count - )); - } - // Generic null count. - let null_count = Some(null_count as u64); + let null_count = stats + .null_count + .map(|null_count| { + if null_count < 0 { + return Err(general_err!( + "Statistics null count is negative {}", + null_count + )); + } + Ok(null_count as u64) + }) + .transpose()?; // Generic distinct count (count of distinct values occurring) let distinct_count = stats.distinct_count.map(|value| value as u64); // Whether or not statistics use deprecated min/max fields. @@ -1722,6 +1721,7 @@ write_thrift_field!(RustBoundingBox, FieldType::Struct); #[cfg(test)] pub(crate) mod tests { + use crate::basic::Type as PhysicalType; use crate::errors::Result; use crate::file::metadata::thrift::{BoundingBox, SchemaElement, write_schema}; use crate::file::metadata::{ColumnChunkMetaData, ParquetMetaDataOptions, RowGroupMetaData}; @@ -1730,7 +1730,8 @@ pub(crate) mod tests { ElementType, ThriftCompactOutputProtocol, ThriftSliceInputProtocol, read_thrift_vec, }; use crate::schema::types::{ - ColumnDescriptor, SchemaDescriptor, TypePtr, num_nodes, parquet_schema_from_array, + ColumnDescriptor, ColumnPath, SchemaDescriptor, TypePtr, num_nodes, + parquet_schema_from_array, }; use std::sync::Arc; @@ -1828,4 +1829,48 @@ pub(crate) mod tests { mmax: Some(42.0.into()), }); } + + #[test] + fn test_convert_stats_preserves_missing_null_count() { + let primitive = + crate::schema::types::Type::primitive_type_builder("col", PhysicalType::INT32) + .build() + .unwrap(); + let column_descr = Arc::new(ColumnDescriptor::new( + Arc::new(primitive), + 0, + 0, + ColumnPath::new(vec![]), + )); + + let none_null_count = super::Statistics { + max: None, + min: None, + null_count: None, + distinct_count: None, + max_value: None, + min_value: None, + is_max_value_exact: None, + is_min_value_exact: None, + }; + let decoded_none = super::convert_stats(&column_descr, Some(none_null_count)) + .unwrap() + .unwrap(); + assert_eq!(decoded_none.null_count_opt(), None); + + let zero_null_count = super::Statistics { + max: None, + min: None, + null_count: Some(0), + distinct_count: None, + max_value: None, + min_value: None, + is_max_value_exact: None, + is_min_value_exact: None, + }; + let decoded_zero = super::convert_stats(&column_descr, Some(zero_null_count)) + .unwrap() + .unwrap(); + assert_eq!(decoded_zero.null_count_opt(), Some(0)); + } } diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs index a813e82d13f2..9682fd54b8df 100644 --- a/parquet/src/file/statistics.rs +++ b/parquet/src/file/statistics.rs @@ -125,19 +125,18 @@ pub(crate) fn from_thrift_page_stats( ) -> Result> { Ok(match thrift_stats { Some(stats) => { - // Number of nulls recorded, when it is not available, we just mark it as 0. - // TODO this should be `None` if there is no information about NULLS. - // see https://github.com/apache/arrow-rs/pull/6216/files - let null_count = stats.null_count.unwrap_or(0); - - if null_count < 0 { - return Err(ParquetError::General(format!( - "Statistics null count is negative {null_count}", - ))); - } - // Generic null count. - let null_count = Some(null_count as u64); + let null_count = stats + .null_count + .map(|null_count| { + if null_count < 0 { + return Err(ParquetError::General(format!( + "Statistics null count is negative {null_count}", + ))); + } + Ok(null_count as u64) + }) + .transpose()?; // Generic distinct count (count of distinct values occurring) let distinct_count = stats.distinct_count.map(|value| value as u64); // Whether or not statistics use deprecated min/max fields. @@ -431,9 +430,20 @@ impl Statistics { /// Returns number of null values for the column, if known. /// Note that this includes all nulls when column is part of the complex type. /// - /// Note this API returns Some(0) even if the null count was not present - /// in the statistics. - /// See + /// Note: Versions of this library prior to `58.1.0` returned `0` if the null count + /// was not available. This method now returns `None` in that case. + /// + /// Also, versions of this library prior to `53.1.0` did not store a null count + /// statistic when the null count was `0`. + /// + /// It is unsound to assume that missing nullcount stats mean the column contains no nulls, + /// but code that depends on the old behavior can restore it by defaulting to zero: + /// + /// ```no_run + /// # use parquet::file::statistics::Statistics; + /// # let statistics: Statistics = todo!(); + /// let null_count = statistics.null_count_opt().unwrap_or(0); + /// ``` pub fn null_count_opt(&self) -> Option { statistics_enum_func![self, null_count_opt] } @@ -1064,21 +1074,7 @@ mod tests { let round_tripped = from_thrift_page_stats(Type::BOOLEAN, Some(thrift_stats)) .unwrap() .unwrap(); - // TODO: remove branch when we no longer support assuming null_count==None in the thrift - // means null_count = Some(0) - if null_count.is_none() { - assert_ne!(round_tripped, statistics); - assert!(round_tripped.null_count_opt().is_some()); - assert_eq!(round_tripped.null_count_opt(), Some(0)); - assert_eq!(round_tripped.min_bytes_opt(), statistics.min_bytes_opt()); - assert_eq!(round_tripped.max_bytes_opt(), statistics.max_bytes_opt()); - assert_eq!( - round_tripped.distinct_count_opt(), - statistics.distinct_count_opt() - ); - } else { - assert_eq!(round_tripped, statistics); - } + assert_eq!(round_tripped, statistics); } fn make_bool_stats(distinct_count: Option, null_count: Option) -> Statistics { From 6931d881d88b515574133e4edda7757b5ee2dd56 Mon Sep 17 00:00:00 2001 From: Mikhail Zabaluev Date: Wed, 11 Mar 2026 23:59:10 +0200 Subject: [PATCH 30/80] feat: expose arrow schema on async avro reader (#9534) # Rationale for this change Exposes the Arrow schema produced by the async Avro file reader, similarly to the `schema` method on the synchronous reader. This allows an application to prepare casting or other schema transformations with no need to fetch the first record batch to learn the produced Arrow schema. Since the async reader only parses OCF content for the moment, the schema does not change from batch to batch. # What changes are included in this PR? The `schema` method for `AsyncAvroFileReader` exposes the Arrow schema of record batches that are produced by the reader. # Are these changes tested? Added tests verifying that the returned schema matches the expected. # Are there any user-facing changes? Added a `schema` method to `AsyncAvroFileReader`. --- arrow-avro/src/reader/async_reader/mod.rs | 161 +++++++++++++++++++--- 1 file changed, 140 insertions(+), 21 deletions(-) diff --git a/arrow-avro/src/reader/async_reader/mod.rs b/arrow-avro/src/reader/async_reader/mod.rs index 53229f8576eb..02c00a60e0ef 100644 --- a/arrow-avro/src/reader/async_reader/mod.rs +++ b/arrow-avro/src/reader/async_reader/mod.rs @@ -19,7 +19,7 @@ use crate::compression::CompressionCodec; use crate::reader::Decoder; use crate::reader::block::{BlockDecoder, BlockDecoderState}; use arrow_array::RecordBatch; -use arrow_schema::ArrowError; +use arrow_schema::{ArrowError, SchemaRef}; use bytes::Bytes; use futures::future::BoxFuture; use futures::{FutureExt, Stream}; @@ -173,6 +173,13 @@ impl AsyncAvroFileReader { } } + /// Returns the Arrow schema for batches produced by this reader. + /// + /// The schema is determined by the writer schema in the file and the reader schema provided to the builder. + pub fn schema(&self) -> SchemaRef { + self.decoder.schema() + } + /// Calculate the byte range needed to complete the current block. /// Only valid when block_decoder is in Data or Sync state. /// Returns the range to fetch, or an error if EOF would be reached. @@ -534,7 +541,9 @@ impl Stream for AsyncAvroFileReader { #[cfg(all(test, feature = "object_store"))] mod tests { use super::*; - use crate::schema::{AvroSchema, SCHEMA_METADATA_KEY}; + use crate::schema::{ + AVRO_NAME_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY, AvroSchema, SCHEMA_METADATA_KEY, + }; use arrow_array::cast::AsArray; use arrow_array::types::{Int32Type, Int64Type}; use arrow_array::*; @@ -758,39 +767,63 @@ mod tests { vec![Field::new("f1_3_1", DataType::Float64, false)].into(), ), false, - ), + ) + .with_metadata(HashMap::from([ + (AVRO_NAMESPACE_METADATA_KEY.to_owned(), "ns3".to_owned()), + (AVRO_NAME_METADATA_KEY.to_owned(), "record3".to_owned()), + ])), ] .into(), ), false, - ), + ) + .with_metadata(HashMap::from([ + (AVRO_NAMESPACE_METADATA_KEY.to_owned(), "ns2".to_owned()), + (AVRO_NAME_METADATA_KEY.to_owned(), "record2".to_owned()), + ])), Field::new( "f2", - DataType::List(Arc::new(Field::new( - "item", - DataType::Struct( - vec![ - Field::new("f2_1", DataType::Boolean, false), - Field::new("f2_2", DataType::Float32, false), - ] - .into(), - ), - false, - ))), + DataType::List(Arc::new( + Field::new( + "item", + DataType::Struct( + vec![ + Field::new("f2_1", DataType::Boolean, false), + Field::new("f2_2", DataType::Float32, false), + ] + .into(), + ), + false, + ) + .with_metadata(HashMap::from([ + (AVRO_NAMESPACE_METADATA_KEY.to_owned(), "ns4".to_owned()), + (AVRO_NAME_METADATA_KEY.to_owned(), "record4".to_owned()), + ])), + )), false, ), Field::new( "f3", DataType::Struct(vec![Field::new("f3_1", DataType::Utf8, false)].into()), true, - ), + ) + .with_metadata(HashMap::from([ + (AVRO_NAMESPACE_METADATA_KEY.to_owned(), "ns5".to_owned()), + (AVRO_NAME_METADATA_KEY.to_owned(), "record5".to_owned()), + ])), Field::new( "f4", - DataType::List(Arc::new(Field::new( - "item", - DataType::Struct(vec![Field::new("f4_1", DataType::Int64, false)].into()), - true, - ))), + DataType::List(Arc::new( + Field::new( + "item", + DataType::Struct(vec![Field::new("f4_1", DataType::Int64, false)].into()), + true, + ) + .with_metadata(HashMap::from([ + (AVRO_NAMESPACE_METADATA_KEY.to_owned(), "ns6".to_owned()), + (AVRO_NAME_METADATA_KEY.to_owned(), "record6".to_owned()), + ])), + )), false, ), ]) @@ -1538,6 +1571,92 @@ mod tests { assert!(err.to_string().contains("Duplicate projection index")); } + #[tokio::test] + async fn test_arrow_schema_from_reader_no_reader_schema() { + let file = arrow_test_data("avro/alltypes_plain.avro"); + let store: Arc = Arc::new(LocalFileSystem::new()); + let location = Path::from_filesystem_path(&file).unwrap(); + let file_size = store.head(&location).await.unwrap().size; + + let file_reader = AvroObjectReader::new(store, location); + let expected_schema = get_alltypes_schema() + .as_ref() + .clone() + .with_metadata(Default::default()); + + // Build reader without providing reader schema - should use writer schema from file + let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024) + .try_build() + .await + .unwrap(); + + assert_eq!(reader.schema().as_ref(), &expected_schema); + + let batches: Vec = reader.try_collect().await.unwrap(); + let batch = &batches[0]; + + assert_eq!(batch.schema().as_ref(), &expected_schema); + } + + #[tokio::test] + async fn test_arrow_schema_from_reader_with_reader_schema() { + let file = arrow_test_data("avro/alltypes_plain.avro"); + let store: Arc = Arc::new(LocalFileSystem::new()); + let location = Path::from_filesystem_path(&file).unwrap(); + let file_size = store.head(&location).await.unwrap().size; + + let file_reader = AvroObjectReader::new(store, location); + let schema = get_alltypes_schema() + .project(&[0, 1, 7]) + .unwrap() + .with_metadata(Default::default()); + let reader_schema = AvroSchema::try_from(&schema).unwrap(); + let expected_schema = schema.clone(); + + // Build reader with provided reader schema - must apply the projection + let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024) + .with_reader_schema(reader_schema) + .try_build() + .await + .unwrap(); + + assert_eq!(reader.schema().as_ref(), &expected_schema); + + let batches: Vec = reader.try_collect().await.unwrap(); + let batch = &batches[0]; + + assert_eq!(batch.schema().as_ref(), &expected_schema); + } + + #[tokio::test] + async fn test_arrow_schema_from_reader_nested_records() { + let file = arrow_test_data("avro/nested_records.avro"); + let store: Arc = Arc::new(LocalFileSystem::new()); + let location = Path::from_filesystem_path(&file).unwrap(); + let file_size = store.head(&location).await.unwrap().size; + + let file_reader = AvroObjectReader::new(store, location); + + // The schema produced by the reader should match the expected schema, + // attaching Avro type name metadata to fields of record and list types. + let expected_schema = get_nested_records_schema() + .as_ref() + .clone() + .with_metadata(Default::default()); + + let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024) + .try_build() + .await + .unwrap(); + + assert_eq!(reader.schema().as_ref(), &expected_schema); + + let batches: Vec = reader.try_collect().await.unwrap(); + let batch = &batches[0]; + + assert_eq!(batch.schema().as_ref(), &expected_schema); + } + #[tokio::test] async fn test_with_header_size_hint_small() { // Use a very small header size hint to force multiple fetches From 92a239a54e33043f05fef98d81d3c7bd2b926467 Mon Sep 17 00:00:00 2001 From: Bruno Date: Thu, 12 Mar 2026 07:31:45 +0100 Subject: [PATCH 31/80] Implement min, max, sum for run-end-encoded arrays. (#9409) Efficient implementations: * min & max work directly on the values child array. * sum folds over run lengths & values, without decompressing the array. In particular, those implementations takes care of the logical offset & len of the run-end-encoded arrays. This is non-trivial: * We get the physical start & end indices in O(log(#runs)), but those are incorrect for empty arrays. * Slicing can happen in the middle of a run. For sum, we need to track the logical start & end and reduce the run length accordingly. Finally, one caveat: the aggregation functions only work when the child values array is a primitive array. That's fine ~always, but some client might store the values in an unexpected type. They'll either get None or an Error, depending on the aggregation function used. This feature is tracked in https://github.com/apache/arrow-rs/issues/3520. --- arrow-arith/src/aggregate.rs | 296 ++++++++++++++++++++++++++++++++++- 1 file changed, 292 insertions(+), 4 deletions(-) diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs index a043259694c1..59792d0c5b1d 100644 --- a/arrow-arith/src/aggregate.rs +++ b/arrow-arith/src/aggregate.rs @@ -540,7 +540,7 @@ pub fn min_string_view(array: &StringViewArray) -> Option<&str> { /// Returns the sum of values in the array. /// /// This doesn't detect overflow. Once overflowing, the result will wrap around. -/// For an overflow-checking variant, use `sum_array_checked` instead. +/// For an overflow-checking variant, use [`sum_array_checked`] instead. pub fn sum_array>(array: A) -> Option where T: ArrowNumericType, @@ -567,6 +567,12 @@ where Some(sum) } + DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() { + DataType::Int16 => ree::sum_wrapping::(&array), + DataType::Int32 => ree::sum_wrapping::(&array), + DataType::Int64 => ree::sum_wrapping::(&array), + _ => unreachable!(), + }, _ => sum::(as_primitive_array(&array)), } } @@ -574,7 +580,9 @@ where /// Returns the sum of values in the array. /// /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, -/// use `sum_array` instead. +/// use [`sum_array`] instead. +/// Additionally returns an `Err` on run-end-encoded arrays with a provided +/// values type parameter that is incorrect. pub fn sum_array_checked>( array: A, ) -> Result, ArrowError> @@ -603,10 +611,110 @@ where Ok(Some(sum)) } + DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() { + DataType::Int16 => ree::sum_checked::(&array), + DataType::Int32 => ree::sum_checked::(&array), + DataType::Int64 => ree::sum_checked::(&array), + _ => unreachable!(), + }, _ => sum_checked::(as_primitive_array(&array)), } } +// Logic for summing run-end-encoded arrays. +mod ree { + use std::convert::Infallible; + + use arrow_array::cast::AsArray; + use arrow_array::types::RunEndIndexType; + use arrow_array::{Array, ArrowNativeTypeOp, ArrowNumericType, PrimitiveArray, TypedRunArray}; + use arrow_buffer::ArrowNativeType; + use arrow_schema::ArrowError; + + /// Downcasts an array to a TypedRunArray. + fn downcast<'a, I: RunEndIndexType, V: ArrowNumericType>( + array: &'a dyn Array, + ) -> Option>> { + let array = array.as_run_opt::()?; + // We only support RunArray wrapping primitive types. + array.downcast::>() + } + + /// Computes the sum (wrapping) of the array values. + pub(super) fn sum_wrapping( + array: &dyn Array, + ) -> Option { + let ree = downcast::(array)?; + let Ok(sum) = fold(ree, |acc, val, len| -> Result { + Ok(acc.add_wrapping(val.mul_wrapping(V::Native::usize_as(len)))) + }); + sum + } + + /// Computes the sum (erroring on overflow) of the array values. + pub(super) fn sum_checked( + array: &dyn Array, + ) -> Result, ArrowError> { + let Some(ree) = downcast::(array) else { + return Err(ArrowError::InvalidArgumentError( + "Input run array values are not a PrimitiveArray".to_string(), + )); + }; + fold(ree, |acc, val, len| -> Result { + let Some(len) = V::Native::from_usize(len) else { + return Err(ArrowError::ArithmeticOverflow(format!( + "Cannot convert a run-end index ({:?}) to the value type ({})", + len, + std::any::type_name::() + ))); + }; + acc.add_checked(val.mul_checked(len)?) + }) + } + + /// Folds over the values in a run-end-encoded array. + fn fold<'a, I: RunEndIndexType, V: ArrowNumericType, F, E>( + array: TypedRunArray<'a, I, PrimitiveArray>, + mut f: F, + ) -> Result, E> + where + F: FnMut(V::Native, V::Native, usize) -> Result, + { + let run_ends = array.run_ends(); + let logical_start = run_ends.offset(); + let logical_end = run_ends.offset() + run_ends.len(); + let run_ends = run_ends.sliced_values(); + + let values_slice = array.run_array().values_slice(); + let values = values_slice + .as_any() + .downcast_ref::>() + // Safety: we know the values array is PrimitiveArray. + .unwrap(); + + let mut prev_end = 0; + let mut acc = V::Native::ZERO; + let mut has_non_null_value = false; + + for (run_end, value) in run_ends.zip(values) { + let current_run_end = run_end.as_usize().clamp(logical_start, logical_end); + let run_length = current_run_end - prev_end; + + if let Some(value) = value { + has_non_null_value = true; + acc = f(acc, value, run_length)?; + } + + prev_end = current_run_end; + if current_run_end == logical_end { + break; + } + } + + Ok(if has_non_null_value { Some(acc) } else { None }) + } +} + /// Returns the min of values in the array of `ArrowNumericType` type, or dictionary /// array with value of `ArrowNumericType` type. pub fn min_array>(array: A) -> Option @@ -639,6 +747,20 @@ where { match array.data_type() { DataType::Dictionary(_, _) => min_max_helper::(array, cmp), + DataType::RunEndEncoded(run_ends, _) => { + // We can directly perform min/max on the values child array, as any + // run must have non-zero length. + let array: &dyn Array = &array; + let values = match run_ends.data_type() { + DataType::Int16 => array.as_run_opt::()?.values_slice(), + DataType::Int32 => array.as_run_opt::()?.values_slice(), + DataType::Int64 => array.as_run_opt::()?.values_slice(), + _ => return None, + }; + // We only support RunArray wrapping primitive types. + let values = values.as_any().downcast_ref::>()?; + m(values) + } _ => m(as_primitive_array(&array)), } } @@ -751,7 +873,7 @@ pub fn bool_or(array: &BooleanArray) -> Option { /// Returns `Ok(None)` if the array is empty or only contains null values. /// /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, -/// use `sum` instead. +/// use [`sum`] instead. pub fn sum_checked(array: &PrimitiveArray) -> Result, ArrowError> where T: ArrowNumericType, @@ -799,7 +921,7 @@ where /// Returns `None` if the array is empty or only contains null values. /// /// This doesn't detect overflow in release mode by default. Once overflowing, the result will -/// wrap around. For an overflow-checking variant, use `sum_checked` instead. +/// wrap around. For an overflow-checking variant, use [`sum_checked`] instead. pub fn sum(array: &PrimitiveArray) -> Option where T::Native: ArrowNativeTypeOp, @@ -1750,4 +1872,170 @@ mod tests { sum_checked(&a).expect_err("overflow should be detected"); sum_array_checked::(&a).expect_err("overflow should be detected"); } + + /// Helper for building a RunArray. + fn make_run_array<'a, I: RunEndIndexType, V: ArrowNumericType, ItemType>( + values: impl IntoIterator, + ) -> RunArray + where + ItemType: Clone + Into> + 'static, + { + let mut builder = arrow_array::builder::PrimitiveRunBuilder::::new(); + for v in values.into_iter() { + builder.append_option((*v).clone().into()); + } + builder.finish() + } + + #[test] + fn test_ree_sum_array_basic() { + let run_array = make_run_array::(&[10, 10, 20, 30, 30, 30]); + let typed_array = run_array.downcast::().unwrap(); + + let result = sum_array::(typed_array); + assert_eq!(result, Some(130)); + + let result = sum_array_checked::(typed_array).unwrap(); + assert_eq!(result, Some(130)); + } + + #[test] + fn test_ree_sum_array_empty() { + let run_array = make_run_array::(&[]); + let typed_array = run_array.downcast::().unwrap(); + + let result = sum_array::(typed_array); + assert_eq!(result, None); + + let result = sum_array_checked::(typed_array).unwrap(); + assert_eq!(result, None); + } + + #[test] + fn test_ree_sum_array_with_nulls() { + let run_array = + make_run_array::(&[Some(10), None, Some(20), None, Some(30)]); + let typed_array = run_array.downcast::().unwrap(); + + let result = sum_array::(typed_array); + assert_eq!(result, Some(60)); + + let result = sum_array_checked::(typed_array).unwrap(); + assert_eq!(result, Some(60)); + } + + #[test] + fn test_ree_sum_array_with_only_nulls() { + let run_array = make_run_array::(&[None, None, None, None, None]); + let typed_array = run_array.downcast::().unwrap(); + + let result = sum_array::(typed_array); + assert_eq!(result, None); + + let result = sum_array_checked::(typed_array).unwrap(); + assert_eq!(result, None); + } + + #[test] + fn test_ree_sum_array_overflow() { + let run_array = make_run_array::(&[126, 2]); + let typed_array = run_array.downcast::().unwrap(); + + // i8 range is -128..=127. 126+2 overflows to -128. + let result = sum_array::(typed_array); + assert_eq!(result, Some(-128)); + + let result = sum_array_checked::(typed_array); + assert!(result.is_err()); + } + + #[test] + fn test_ree_sum_array_sliced() { + let run_array = make_run_array::(&[0, 10, 10, 10, 20, 30, 30, 30]); + // Skip 2 values at the start and 1 at the end. + let sliced = run_array.slice(2, 5); + let typed_array = sliced.downcast::().unwrap(); + + let result = sum_array::(typed_array); + assert_eq!(result, Some(100)); + + let result = sum_array_checked::(typed_array).unwrap(); + assert_eq!(result, Some(100)); + } + + #[test] + fn test_ree_min_max_array_basic() { + let run_array = make_run_array::(&[30, 30, 10, 20, 20]); + let typed_array = run_array.downcast::().unwrap(); + + let result = min_array::(typed_array); + assert_eq!(result, Some(10)); + + let result = max_array::(typed_array); + assert_eq!(result, Some(30)); + } + + #[test] + fn test_ree_min_max_array_empty() { + let run_array = make_run_array::(&[]); + let typed_array = run_array.downcast::().unwrap(); + + let result = min_array::(typed_array); + assert_eq!(result, None); + + let result = max_array::(typed_array); + assert_eq!(result, None); + } + + #[test] + fn test_ree_min_max_array_float() { + let run_array = make_run_array::(&[5.5, 5.5, 2.1, 8.9, 8.9]); + let typed_array = run_array.downcast::().unwrap(); + + let result = min_array::(typed_array); + assert_eq!(result, Some(2.1)); + + let result = max_array::(typed_array); + assert_eq!(result, Some(8.9)); + } + + #[test] + fn test_ree_min_max_array_with_nulls() { + let run_array = make_run_array::(&[None, Some(10)]); + let typed_array = run_array.downcast::().unwrap(); + + let result = min_array::(typed_array); + assert_eq!(result, Some(10)); + + let result = max_array::(typed_array); + assert_eq!(result, Some(10)); + } + + #[test] + fn test_ree_min_max_array_sliced() { + let run_array = make_run_array::(&[0, 30, 30, 10, 20, 20, 100]); + // Skip 1 value at the start and 1 at the end. + let sliced = run_array.slice(1, 5); + let typed_array = sliced.downcast::().unwrap(); + + let result = min_array::(typed_array); + assert_eq!(result, Some(10)); + + let result = max_array::(typed_array); + assert_eq!(result, Some(30)); + } + + #[test] + fn test_ree_min_max_array_sliced_mid_run() { + let run_array = make_run_array::(&[0, 0, 30, 10, 20, 100, 100]); + // Skip 1 value at the start and 1 at the end. + let sliced = run_array.slice(1, 5); + let typed_array = sliced.downcast::().unwrap(); + + let result = min_array::(typed_array); + assert_eq!(result, Some(0)); + + let result = max_array::(typed_array); + assert_eq!(result, Some(100)); + } } From c214c3c6f539c50ff644a3d92571375c57ffe11b Mon Sep 17 00:00:00 2001 From: Alexander Rafferty Date: Fri, 13 Mar 2026 20:54:04 +1100 Subject: [PATCH 32/80] Add benchmark for `infer_json_schema` (#9546) # Which issue does this PR close? Split out from #9494 to make review easier. It simply adds a benchmark for JSON schema inference. # Rationale for this change I have an open PR that significantly refactors the JSON schema inference code, so I want confidence that not only is the new code correct, but also has better performance than the existing code. # What changes are included in this PR? Adds a benchmark. # Are these changes tested? N/A # Are there any user-facing changes? No --- arrow-json/Cargo.toml | 1 + arrow-json/benches/json_reader.rs | 76 ++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml index be1f8d0ccdca..851f0a244f53 100644 --- a/arrow-json/Cargo.toml +++ b/arrow-json/Cargo.toml @@ -61,6 +61,7 @@ tokio = { version = "1.27", default-features = false, features = ["io-util"] } bytes = "1.4" criterion = { workspace = true, default-features = false } rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } +arbitrary = { version = "1.4.2", features = ["derive"] } [[bench]] name = "serde" diff --git a/arrow-json/benches/json_reader.rs b/arrow-json/benches/json_reader.rs index f87ba695eb62..fccac68d9bfa 100644 --- a/arrow-json/benches/json_reader.rs +++ b/arrow-json/benches/json_reader.rs @@ -15,12 +15,14 @@ // specific language governing permissions and limitations // under the License. +use arbitrary::{Arbitrary, Unstructured}; use arrow_json::ReaderBuilder; -use arrow_json::reader::Decoder; +use arrow_json::reader::{Decoder, infer_json_schema}; use arrow_schema::{DataType, Field, Schema}; use criterion::{ BenchmarkId, Criterion, SamplingMode, Throughput, criterion_group, criterion_main, }; +use serde::Serialize; use serde_json::{Map, Number, Value}; use std::fmt::Write; use std::hint::black_box; @@ -323,6 +325,75 @@ fn bench_serialize_list(c: &mut Criterion) { }); } +fn bench_schema_inference(c: &mut Criterion) { + const ROWS: usize = 1000; + + #[derive(Serialize, Arbitrary, Debug)] + struct Row { + a: Option, + b: Option, + c: Option<[i16; 8]>, + d: Option<[bool; 8]>, + e: Option, + f: f64, + } + + #[derive(Serialize, Arbitrary, Debug)] + struct Inner { + a: Option, + b: Option, + c: Option, + } + + let mut data = vec![]; + for row in pseudorandom_sequence::(ROWS) { + serde_json::to_writer(&mut data, &row).unwrap(); + data.push(b'\n'); + } + + let mut group = c.benchmark_group("infer_json_schema"); + group.throughput(Throughput::Bytes(data.len() as u64)); + group.sample_size(50); + group.measurement_time(std::time::Duration::from_secs(5)); + group.warm_up_time(std::time::Duration::from_secs(2)); + group.sampling_mode(SamplingMode::Flat); + group.bench_function(BenchmarkId::from_parameter(ROWS), |b| { + b.iter(|| infer_json_schema(black_box(&data[..]), None).unwrap()) + }); + group.finish(); +} + +fn pseudorandom_sequence Arbitrary<'a>>(len: usize) -> Vec { + static RAND_BYTES: &[u8; 255] = &[ + 12, 135, 254, 243, 18, 5, 38, 175, 60, 58, 204, 103, 15, 88, 201, 199, 57, 63, 56, 234, + 106, 111, 238, 119, 214, 50, 110, 89, 129, 185, 112, 115, 35, 239, 188, 189, 49, 184, 194, + 146, 108, 131, 213, 43, 236, 81, 61, 20, 21, 52, 223, 220, 215, 74, 210, 27, 190, 107, 174, + 142, 237, 66, 75, 1, 53, 181, 82, 158, 68, 134, 176, 229, 157, 116, 233, 153, 84, 139, 151, + 8, 171, 59, 105, 242, 40, 69, 94, 170, 4, 187, 212, 156, 65, 90, 192, 216, 29, 222, 122, + 230, 198, 154, 155, 245, 45, 178, 123, 23, 117, 168, 149, 17, 177, 48, 54, 241, 202, 44, + 232, 64, 221, 252, 161, 91, 93, 143, 240, 102, 172, 209, 224, 186, 197, 219, 247, 71, 36, + 101, 133, 113, 6, 137, 231, 162, 31, 7, 22, 138, 47, 136, 2, 244, 141, 173, 99, 25, 95, 96, + 85, 249, 42, 251, 217, 16, 205, 98, 203, 92, 114, 14, 163, 150, 144, 10, 125, 13, 195, 72, + 41, 67, 246, 11, 77, 132, 83, 37, 24, 183, 226, 250, 109, 248, 33, 76, 9, 55, 159, 34, 62, + 196, 87, 3, 39, 28, 166, 167, 255, 206, 79, 191, 228, 193, 179, 97, 182, 148, 73, 120, 211, + 253, 70, 227, 51, 169, 130, 145, 218, 78, 180, 165, 46, 127, 152, 26, 140, 207, 19, 100, + 104, 80, 164, 126, 118, 200, 128, 86, 160, 32, 30, 225, 147, 124, 121, 235, 208, + ]; + + let bytes: Vec = RAND_BYTES + .iter() + .flat_map(|i| RAND_BYTES.map(|j| i.wrapping_add(j))) + .take(1000 * len) + .collect(); + + let mut u = Unstructured::new(&bytes); + + (0..len) + .map(|_| u.arbitrary::().unwrap()) + .take(len) + .collect() +} + criterion_group!( benches, bench_decode_wide_object, @@ -330,6 +401,7 @@ criterion_group!( bench_binary_hex, bench_wide_projection, bench_decode_list, - bench_serialize_list + bench_serialize_list, + bench_schema_inference ); criterion_main!(benches); From 393117979882e97a15125edd142c70a5e2c16386 Mon Sep 17 00:00:00 2001 From: Oleks V Date: Fri, 13 Mar 2026 02:54:56 -0700 Subject: [PATCH 33/80] chore: Protect `main` branch with required reviews (#9547) # Which issue does this PR close? - Closes #NNN. # Rationale for this change Currently any user with `write` access can merge the PR without review. Good practice to get at least 1 review before the merge # What changes are included in this PR? # Are these changes tested? # Are there any user-facing changes? --- .asf.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.asf.yaml b/.asf.yaml index 36f01b88a724..9214924add68 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -46,6 +46,8 @@ github: strict: true # don't require any jobs to pass contexts: [] + required_pull_request_reviews: + required_approving_review_count: 1 pull_requests: # enable updating head branches of pull requests allow_update_branch: true From 002426087ea9106b616194a5d0942aedba2bc884 Mon Sep 17 00:00:00 2001 From: "xudong.w" Date: Sat, 14 Mar 2026 22:18:18 +0800 Subject: [PATCH 34/80] Replace interleave overflow panic with error (#9549) # Which issue does this PR close? - Closes #NNN. # Rationale for this change # What changes are included in this PR? Replace interleave overflow panic with error # Are these changes tested? Yes UT # Are there any user-facing changes? --- arrow-select/src/interleave.rs | 46 +++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index 6598a5eb0da0..be4e98ffccd7 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -173,12 +173,15 @@ fn interleave_bytes( let mut capacity = 0; let mut offsets = Vec::with_capacity(indices.len() + 1); offsets.push(T::Offset::from_usize(0).unwrap()); - offsets.extend(indices.iter().map(|(a, b)| { + for (a, b) in indices { let o = interleaved.arrays[*a].value_offsets(); let element_len = o[*b + 1].as_usize() - o[*b].as_usize(); capacity += element_len; - T::Offset::from_usize(capacity).expect("overflow") - })); + offsets.push( + T::Offset::from_usize(capacity) + .ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?, + ); + } let mut values = Vec::with_capacity(capacity); for (a, b) in indices { @@ -331,12 +334,14 @@ fn interleave_list( let mut capacity = 0usize; let mut offsets = Vec::with_capacity(indices.len() + 1); offsets.push(O::from_usize(0).unwrap()); - offsets.extend(indices.iter().map(|(array, row)| { + for (array, row) in indices { let o = interleaved.arrays[*array].value_offsets(); let element_len = o[*row + 1].as_usize() - o[*row].as_usize(); capacity += element_len; - O::from_usize(capacity).expect("offset overflow") - })); + offsets.push( + O::from_usize(capacity).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?, + ); + } let mut child_indices = Vec::with_capacity(capacity); for (array, row) in indices { @@ -1414,4 +1419,33 @@ mod tests { ] ); } + + #[test] + fn test_interleave_bytes_offset_overflow() { + let indices: Vec<(usize, usize)> = vec![(0, 0); (i32::MAX >> 4) as usize]; + let text = ('a'..='z').collect::(); + let values = StringArray::from(vec![Some(text)]); + assert!(matches!( + interleave(&[&values], &indices), + Err(ArrowError::OffsetOverflowError(_)) + )); + } + + #[test] + fn test_interleave_list_offset_overflow() { + // Build a ListArray with a single row containing many elements + let mut builder = GenericListBuilder::::new(Int32Builder::new()); + for i in 0..32 { + builder.values().append_value(i); + } + builder.append(true); + let list = builder.finish(); + + // Interleave enough copies to overflow i32 offsets + let indices: Vec<(usize, usize)> = vec![(0, 0); (i32::MAX as usize / 32) + 1]; + assert!(matches!( + interleave(&[&list], &indices), + Err(ArrowError::OffsetOverflowError(_)) + )); + } } From 83b6908f92de32c6695d95d7dc2b0a0116aa3185 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Sat, 14 Mar 2026 18:50:38 +0100 Subject: [PATCH 35/80] Unroll interleave -25-30% (#9542) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Which issue does this PR close? - Closes #NNN. # Rationale for this change ``` 🤖: Benchmark completed Details group main interleave ----- ---- ----------- interleave dict(20, 0.0) 100 [0..100, 100..230, 450..1000] 1.08 805.6±8.28ns ? ?/sec 1.00 748.5±14.05ns ? ?/sec interleave dict(20, 0.0) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.18 2.6±0.00µs ? ?/sec 1.00 2.2±0.01µs ? ?/sec interleave dict(20, 0.0) 1024 [0..100, 100..230, 450..1000] 1.21 2.6±0.01µs ? ?/sec 1.00 2.2±0.02µs ? ?/sec interleave dict(20, 0.0) 400 [0..100, 100..230, 450..1000] 1.16 1431.6±3.11ns ? ?/sec 1.00 1232.9±14.26ns ? ?/sec interleave dict_distinct 100 1.03 2.9±0.12µs ? ?/sec 1.00 2.9±0.07µs ? ?/sec interleave dict_distinct 1024 1.02 2.9±0.06µs ? ?/sec 1.00 2.8±0.03µs ? ?/sec interleave dict_distinct 2048 1.03 2.9±0.02µs ? ?/sec 1.00 2.8±0.08µs ? ?/sec interleave dict_sparse(20, 0.0) 100 [0..100, 100..230, 450..1000] 1.00 2.7±0.26µs ? ?/sec 1.02 2.8±0.21µs ? ?/sec interleave dict_sparse(20, 0.0) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.11 5.3±0.31µs ? ?/sec 1.00 4.8±0.40µs ? ?/sec interleave dict_sparse(20, 0.0) 1024 [0..100, 100..230, 450..1000] 1.16 4.8±0.25µs ? ?/sec 1.00 4.1±0.23µs ? ?/sec interleave dict_sparse(20, 0.0) 400 [0..100, 100..230, 450..1000] 1.05 3.5±0.31µs ? ?/sec 1.00 3.3±0.29µs ? ?/sec interleave i32(0.0) 100 [0..100, 100..230, 450..1000] 1.21 313.8±1.03ns ? ?/sec 1.00 258.9±4.98ns ? ?/sec interleave i32(0.0) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.34 1856.5±17.40ns ? ?/sec 1.00 1385.9±32.73ns ? ?/sec interleave i32(0.0) 1024 [0..100, 100..230, 450..1000] 1.34 1848.6±8.80ns ? ?/sec 1.00 1382.4±48.64ns ? ?/sec interleave i32(0.0) 400 [0..100, 100..230, 450..1000] 1.37 843.3±7.37ns ? ?/sec 1.00 615.5±22.71ns ? ?/sec interleave i32(0.5) 100 [0..100, 100..230, 450..1000] 1.09 604.2±5.60ns ? ?/sec 1.00 555.1±4.48ns ? ?/sec interleave i32(0.5) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.12 4.3±0.01µs ? ?/sec 1.00 3.8±0.04µs ? ?/sec interleave i32(0.5) 1024 [0..100, 100..230, 450..1000] 1.13 4.4±0.06µs ? ?/sec 1.00 3.9±0.17µs ? ?/sec interleave i32(0.5) 400 [0..100, 100..230, 450..1000] 1.12 1889.4±19.68ns ? ?/sec 1.00 1691.5±17.15ns ? ?/sec interleave list(0.0,0.0,20) 100 [0..100, 100..230, 450..1000] 1.07 2.7±0.03µs ? ?/sec 1.00 2.5±0.03µs ? ?/sec interleave list(0.0,0.0,20) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.06 26.2±0.11µs ? ?/sec 1.00 24.6±0.31µs ? ?/sec interleave list(0.0,0.0,20) 1024 [0..100, 100..230, 450..1000] 1.06 25.9±0.14µs ? ?/sec 1.00 24.5±0.29µs ? ?/sec interleave list(0.0,0.0,20) 400 [0..100, 100..230, 450..1000] 1.07 10.5±0.21µs ? ?/sec 1.00 9.9±0.06µs ? ?/sec interleave list(0.1,0.1,20) 100 [0..100, 100..230, 450..1000] 1.05 5.8±0.25µs ? ?/sec 1.00 5.5±0.06µs ? ?/sec interleave list(0.1,0.1,20) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.05 47.4±2.23µs ? ?/sec 1.00 45.2±0.14µs ? ?/sec interleave list(0.1,0.1,20) 1024 [0..100, 100..230, 450..1000] 1.06 48.0±2.35µs ? ?/sec 1.00 45.5±0.64µs ? ?/sec interleave list(0.1,0.1,20) 400 [0..100, 100..230, 450..1000] 1.05 19.2±0.90µs ? ?/sec 1.00 18.2±0.03µs ? ?/sec interleave str(20, 0.0) 100 [0..100, 100..230, 450..1000] 1.01 786.8±1.50ns ? ?/sec 1.00 779.4±4.35ns ? ?/sec interleave str(20, 0.0) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.04 6.3±0.12µs ? ?/sec 1.00 6.0±0.02µs ? ?/sec interleave str(20, 0.0) 1024 [0..100, 100..230, 450..1000] 1.04 6.2±0.08µs ? ?/sec 1.00 6.0±0.01µs ? ?/sec interleave str(20, 0.0) 400 [0..100, 100..230, 450..1000] 1.09 2.7±0.01µs ? ?/sec 1.00 2.4±0.01µs ? ?/sec interleave str(20, 0.5) 100 [0..100, 100..230, 450..1000] 1.04 1064.4±19.37ns ? ?/sec 1.00 1023.8±3.56ns ? ?/sec interleave str(20, 0.5) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.03 10.3±0.06µs ? ?/sec 1.00 10.1±0.13µs ? ?/sec interleave str(20, 0.5) 1024 [0..100, 100..230, 450..1000] 1.02 10.3±0.05µs ? ?/sec 1.00 10.1±0.54µs ? ?/sec interleave str(20, 0.5) 400 [0..100, 100..230, 450..1000] 1.04 3.7±0.03µs ? ?/sec 1.00 3.6±0.17µs ? ?/sec interleave str_view(0.0) 100 [0..100, 100..230, 450..1000] 1.01 856.9±2.90ns ? ?/sec 1.00 849.1±7.00ns ? ?/sec interleave str_view(0.0) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.00 5.0±0.15µs ? ?/sec 1.02 5.1±0.02µs ? ?/sec interleave str_view(0.0) 1024 [0..100, 100..230, 450..1000] 1.00 4.9±0.05µs ? ?/sec 1.04 5.1±0.02µs ? ?/sec interleave str_view(0.0) 400 [0..100, 100..230, 450..1000] 1.00 2.2±0.05µs ? ?/sec 1.03 2.2±0.01µs ? ?/sec interleave struct(i32(0.0), i32(0.0) 100 [0..100, 100..230, 450..1000] 1.20 874.3±4.12ns ? ?/sec 1.00 729.1±12.04ns ? ?/sec interleave struct(i32(0.0), i32(0.0) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.34 4.0±0.01µs ? ?/sec 1.00 3.0±0.02µs ? ?/sec interleave struct(i32(0.0), i32(0.0) 1024 [0..100, 100..230, 450..1000] 1.31 4.0±0.04µs ? ?/sec 1.00 3.0±0.01µs ? ?/sec interleave struct(i32(0.0), i32(0.0) 400 [0..100, 100..230, 450..1000] 1.24 1905.1±19.48ns ? ?/sec 1.00 1532.8±33.13ns ? ?/sec interleave struct(i32(0.0), str(20, 0.0) 100 [0..100, 100..230, 450..1000] 1.00 1340.9±6.76ns ? ?/sec 1.01 1347.8±12.50ns ? ?/sec interleave struct(i32(0.0), str(20, 0.0) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.08 8.3±0.16µs ? ?/sec 1.00 7.7±0.02µs ? ?/sec interleave struct(i32(0.0), str(20, 0.0) 1024 [0..100, 100..230, 450..1000] 1.08 8.3±0.06µs ? ?/sec 1.00 7.7±0.06µs ? ?/sec interleave struct(i32(0.0), str(20, 0.0) 400 [0..100, 100..230, 450..1000] 1.09 3.7±0.13µs ? ?/sec 1.00 3.4±0.02µs ? ?/sec interleave struct(str(20, 0.0), str(20, 0.0)) 100 [0..100, 100..230, 450..1000] 1.05 1927.3±9.31ns ? ?/sec 1.00 1842.2±18.19ns ? ?/sec interleave struct(str(20, 0.0), str(20, 0.0)) 1024 [0..100, 100..230, 450..1000, 0..1000] 1.04 12.6±0.06µs ? ?/sec 1.00 12.1±0.08µs ? ?/sec interleave struct(str(20, 0.0), str(20, 0.0)) 1024 [0..100, 100..230, 450..1000] 1.04 12.6±0.03µs ? ?/sec 1.00 12.1±0.14µs ? ?/sec interleave struct(str(20, 0.0), str(20, 0.0)) 400 [0..100, 100..230, 450..1000] 1.04 5.4±0.07µs ? ?/sec 1.00 5.2±0.04µs ? ?/sec ``` # What changes are included in this PR? # Are these changes tested? # Are there any user-facing changes? --------- Co-authored-by: Claude Opus 4.6 (1M context) --- arrow-select/src/interleave.rs | 51 ++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index be4e98ffccd7..711e816f70d5 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -154,13 +154,54 @@ fn interleave_primitive( data_type: &DataType, ) -> Result { let interleaved = Interleave::<'_, PrimitiveArray>::new(values, indices); + let arrays = &interleaved.arrays; + let len = indices.len(); + + let mut output = Vec::with_capacity(len); + let dst: *mut T::Native = output.as_mut_ptr(); + let mut base = 0; + + // Process 8 elements at a time to issue multiple independent loads + // and increase memory-level parallelism for random access patterns. + let chunks = indices.chunks_exact(8); + let remainder = chunks.remainder(); + for chunk in chunks { + let v0 = arrays[chunk[0].0].value(chunk[0].1); + let v1 = arrays[chunk[1].0].value(chunk[1].1); + let v2 = arrays[chunk[2].0].value(chunk[2].1); + let v3 = arrays[chunk[3].0].value(chunk[3].1); + let v4 = arrays[chunk[4].0].value(chunk[4].1); + let v5 = arrays[chunk[5].0].value(chunk[5].1); + let v6 = arrays[chunk[6].0].value(chunk[6].1); + let v7 = arrays[chunk[7].0].value(chunk[7].1); + + // SAFETY: base+7 < len == output capacity + debug_assert!(base + 7 < len); + unsafe { + dst.add(base).write(v0); + dst.add(base + 1).write(v1); + dst.add(base + 2).write(v2); + dst.add(base + 3).write(v3); + dst.add(base + 4).write(v4); + dst.add(base + 5).write(v5); + dst.add(base + 6).write(v6); + dst.add(base + 7).write(v7); + } + base += 8; + } - let values = indices - .iter() - .map(|(a, b)| interleaved.arrays[*a].value(*b)) - .collect::>(); + for idx in remainder { + // SAFETY: base < len == output capacity + debug_assert!(base < len); + unsafe { dst.add(base).write(arrays[idx.0].value(idx.1)) }; + base += 1; + } + + // SAFETY: all `len` elements have been initialized + debug_assert!(base == len); + unsafe { output.set_len(len) }; - let array = PrimitiveArray::::try_new(values.into(), interleaved.nulls)?; + let array = PrimitiveArray::::try_new(output.into(), interleaved.nulls)?; Ok(Arc::new(array.with_data_type(data_type.clone()))) } From fcab5d234458de9dd6a9222f6336d51c18ae141d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 14 Mar 2026 19:28:21 +0100 Subject: [PATCH 36/80] chore(deps): bump black from 24.3.0 to 26.3.1 in /parquet/pytest (#9545) Bumps [black](https://github.com/psf/black) from 24.3.0 to 26.3.1.
Release notes

Sourced from black's releases.

26.3.1

Stable style

  • Prevent Jupyter notebook magic masking collisions from corrupting cells by using exact-length placeholders for short magics and aborting if a placeholder can no longer be unmasked safely (#5038)

Configuration

  • Always hash cache filename components derived from --python-cell-magics so custom magic names cannot affect cache paths (#5038)

Blackd

  • Disable browser-originated requests by default, add configurable origin allowlisting and request body limits, and bound executor submissions to improve backpressure (#5039)

26.3.0

Stable style

  • Don't double-decode input, causing non-UTF-8 files to be corrupted (#4964)
  • Fix crash on standalone comment in lambda default arguments (#4993)
  • Preserve parentheses when # type: ignore comments would be merged with other comments on the same line, preventing AST equivalence failures (#4888)

Preview style

  • Fix bug where if guards in case blocks were incorrectly split when the pattern had a trailing comma (#4884)
  • Fix string_processing crashing on unassigned long string literals with trailing commas (one-item tuples) (#4929)
  • Simplify implementation of the power operator "hugging" logic (#4918)

Packaging

  • Fix shutdown errors in PyInstaller builds on macOS by disabling multiprocessing in frozen environments (#4930)

Performance

  • Introduce winloop for windows as an alternative to uvloop (#4996)
  • Remove deprecated function uvloop.install() in favor of uvloop.new_event_loop() (#4996)
  • Rename maybe_install_uvloop function to maybe_use_uvloop to simplify loop installation and creation of either a uvloop/winloop evenloop or default eventloop (#4996)

Output

... (truncated)

Changelog

Sourced from black's changelog.

26.3.1

Stable style

  • Prevent Jupyter notebook magic masking collisions from corrupting cells by using exact-length placeholders for short magics and aborting if a placeholder can no longer be unmasked safely (#5038)

Configuration

  • Always hash cache filename components derived from --python-cell-magics so custom magic names cannot affect cache paths (#5038)

Blackd

  • Disable browser-originated requests by default, add configurable origin allowlisting and request body limits, and bound executor submissions to improve backpressure (#5039)

26.3.0

Stable style

  • Don't double-decode input, causing non-UTF-8 files to be corrupted (#4964)
  • Fix crash on standalone comment in lambda default arguments (#4993)
  • Preserve parentheses when # type: ignore comments would be merged with other comments on the same line, preventing AST equivalence failures (#4888)

Preview style

  • Fix bug where if guards in case blocks were incorrectly split when the pattern had a trailing comma (#4884)
  • Fix string_processing crashing on unassigned long string literals with trailing commas (one-item tuples) (#4929)
  • Simplify implementation of the power operator "hugging" logic (#4918)

Packaging

  • Fix shutdown errors in PyInstaller builds on macOS by disabling multiprocessing in frozen environments (#4930)

Performance

  • Introduce winloop for windows as an alternative to uvloop (#4996)
  • Remove deprecated function uvloop.install() in favor of uvloop.new_event_loop() (#4996)
  • Rename maybe_install_uvloop function to maybe_use_uvloop to simplify loop installation and creation of either a uvloop/winloop evenloop or default eventloop (#4996)

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=black&package-manager=pip&previous-version=24.3.0&new-version=26.3.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/arrow-rs/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- parquet/pytest/requirements.txt | 101 ++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 26 deletions(-) diff --git a/parquet/pytest/requirements.txt b/parquet/pytest/requirements.txt index 40797e5e7a20..696fc2f0d63c 100644 --- a/parquet/pytest/requirements.txt +++ b/parquet/pytest/requirements.txt @@ -24,29 +24,34 @@ attrs==22.1.0 \ --hash=sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6 \ --hash=sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c # via pytest -black==24.3.0 \ - --hash=sha256:2818cf72dfd5d289e48f37ccfa08b460bf469e67fb7c4abb07edc2e9f16fb63f \ - --hash=sha256:41622020d7120e01d377f74249e677039d20e6344ff5851de8a10f11f513bf93 \ - --hash=sha256:4acf672def7eb1725f41f38bf6bf425c8237248bb0804faa3965c036f7672d11 \ - --hash=sha256:4be5bb28e090456adfc1255e03967fb67ca846a03be7aadf6249096100ee32d0 \ - --hash=sha256:4f1373a7808a8f135b774039f61d59e4be7eb56b2513d3d2f02a8b9365b8a8a9 \ - --hash=sha256:56f52cfbd3dabe2798d76dbdd299faa046a901041faf2cf33288bc4e6dae57b5 \ - --hash=sha256:65b76c275e4c1c5ce6e9870911384bff5ca31ab63d19c76811cb1fb162678213 \ - --hash=sha256:65c02e4ea2ae09d16314d30912a58ada9a5c4fdfedf9512d23326128ac08ac3d \ - --hash=sha256:6905238a754ceb7788a73f02b45637d820b2f5478b20fec82ea865e4f5d4d9f7 \ - --hash=sha256:79dcf34b33e38ed1b17434693763301d7ccbd1c5860674a8f871bd15139e7837 \ - --hash=sha256:7bb041dca0d784697af4646d3b62ba4a6b028276ae878e53f6b4f74ddd6db99f \ - --hash=sha256:7d5e026f8da0322b5662fa7a8e752b3fa2dac1c1cbc213c3d7ff9bdd0ab12395 \ - --hash=sha256:9f50ea1132e2189d8dff0115ab75b65590a3e97de1e143795adb4ce317934995 \ - --hash=sha256:a0c9c4a0771afc6919578cec71ce82a3e31e054904e7197deacbc9382671c41f \ - --hash=sha256:aadf7a02d947936ee418777e0247ea114f78aff0d0959461057cae8a04f20597 \ - --hash=sha256:b5991d523eee14756f3c8d5df5231550ae8993e2286b8014e2fdea7156ed0959 \ - --hash=sha256:bf21b7b230718a5f08bd32d5e4f1db7fc8788345c8aea1d155fc17852b3410f5 \ - --hash=sha256:c45f8dff244b3c431b36e3224b6be4a127c6aca780853574c00faf99258041eb \ - --hash=sha256:c7ed6668cbbfcd231fa0dc1b137d3e40c04c7f786e626b405c62bcd5db5857e4 \ - --hash=sha256:d7de8d330763c66663661a1ffd432274a2f92f07feeddd89ffd085b5744f85e7 \ - --hash=sha256:e19cb1c6365fd6dc38a6eae2dcb691d7d83935c10215aef8e6c38edee3f77abd \ - --hash=sha256:e2af80566f43c85f5797365077fb64a393861a3730bd110971ab7a0c94e873e7 +black==26.3.1 \ + --hash=sha256:0126ae5b7c09957da2bdbd91a9ba1207453feada9e9fe51992848658c6c8e01c \ + --hash=sha256:0f76ff19ec5297dd8e66eb64deda23631e642c9393ab592826fd4bdc97a4bce7 \ + --hash=sha256:28ef38aee69e4b12fda8dba75e21f9b4f979b490c8ac0baa7cb505369ac9e1ff \ + --hash=sha256:2bd5aa94fc267d38bb21a70d7410a89f1a1d318841855f698746f8e7f51acd1b \ + --hash=sha256:2c50f5063a9641c7eed7795014ba37b0f5fa227f3d408b968936e24bc0566b07 \ + --hash=sha256:2d6bfaf7fd0993b420bed691f20f9492d53ce9a2bcccea4b797d34e947318a78 \ + --hash=sha256:41cd2012d35b47d589cb8a16faf8a32ef7a336f56356babd9fcf70939ad1897f \ + --hash=sha256:474c27574d6d7037c1bc875a81d9be0a9a4f9ee95e62800dab3cfaadbf75acd5 \ + --hash=sha256:5602bdb96d52d2d0672f24f6ffe5218795736dd34807fd0fd55ccd6bf206168b \ + --hash=sha256:5e9d0d86df21f2e1677cc4bd090cd0e446278bcbbe49bf3659c308c3e402843e \ + --hash=sha256:5ed0ca58586c8d9a487352a96b15272b7fa55d139fc8496b519e78023a8dab0a \ + --hash=sha256:6c54a4a82e291a1fee5137371ab488866b7c86a3305af4026bdd4dc78642e1ac \ + --hash=sha256:6e131579c243c98f35bce64a7e08e87fb2d610544754675d4a0e73a070a5aa3a \ + --hash=sha256:855822d90f884905362f602880ed8b5df1b7e3ee7d0db2502d4388a954cc8c54 \ + --hash=sha256:86a8b5035fce64f5dcd1b794cf8ec4d31fe458cf6ce3986a30deb434df82a1d2 \ + --hash=sha256:8a33d657f3276328ce00e4d37fe70361e1ec7614da5d7b6e78de5426cb56332f \ + --hash=sha256:92c0ec1f2cc149551a2b7b47efc32c866406b6891b0ee4625e95967c8f4acfb1 \ + --hash=sha256:9a5e9f45e5d5e1c5b5c29b3bd4265dcc90e8b92cf4534520896ed77f791f4da5 \ + --hash=sha256:afc622538b430aa4c8c853f7f63bc582b3b8030fd8c80b70fb5fa5b834e575c2 \ + --hash=sha256:b07fc0dab849d24a80a29cfab8d8a19187d1c4685d8a5e6385a5ce323c1f015f \ + --hash=sha256:b5e6f89631eb88a7302d416594a32faeee9fb8fb848290da9d0a5f2903519fc1 \ + --hash=sha256:bf9bf162ed91a26f1adba8efda0b573bc6924ec1408a52cc6f82cb73ec2b142c \ + --hash=sha256:c7e72339f841b5a237ff14f7d3880ddd0fc7f98a1199e8c4327f9a4f478c1839 \ + --hash=sha256:ddb113db38838eb9f043623ba274cfaf7d51d5b0c22ecb30afe58b1bb8322983 \ + --hash=sha256:dfdd51fc3e64ea4f35873d1b3fb25326773d55d2329ff8449139ebaad7357efb \ + --hash=sha256:f1cd08e99d2f9317292a311dfe578fd2a24b15dbce97792f9c4d752275c1fa56 \ + --hash=sha256:f89f2ab047c76a9c03f78d0d66ca519e389519902fa27e7a91117ef7611c0568 # via -r requirements.in click==8.1.3 \ --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \ @@ -129,9 +134,9 @@ pandas==1.5.2 \ --hash=sha256:e9dbacd22555c2d47f262ef96bb4e30880e5956169741400af8b306bbb24a273 \ --hash=sha256:f6257b314fc14958f8122779e5a1557517b0f8e500cfb2bd53fa1f75a8ad0af2 # via -r requirements.in -pathspec==0.10.2 \ - --hash=sha256:88c2606f2c1e818b978540f73ecc908e13999c6c3a383daf3705652ae79807a5 \ - --hash=sha256:8f6bf73e5758fd365ef5d58ce09ac7c27d2833a8d7da51712eac6e27e35141b0 +pathspec==1.0.4 \ + --hash=sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645 \ + --hash=sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723 # via black platformdirs==2.5.4 \ --hash=sha256:1006647646d80f16130f052404c6b901e80ee4ed6bef6792e1f238a8969106f7 \ @@ -156,6 +161,50 @@ python-dateutil==2.8.2 \ --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 # via pandas +pytokens==0.4.1 \ + --hash=sha256:0fc71786e629cef478cbf29d7ea1923299181d0699dbe7c3c0f4a583811d9fc1 \ + --hash=sha256:11edda0942da80ff58c4408407616a310adecae1ddd22eef8c692fe266fa5009 \ + --hash=sha256:140709331e846b728475786df8aeb27d24f48cbcf7bcd449f8de75cae7a45083 \ + --hash=sha256:24afde1f53d95348b5a0eb19488661147285ca4dd7ed752bbc3e1c6242a304d1 \ + --hash=sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de \ + --hash=sha256:27b83ad28825978742beef057bfe406ad6ed524b2d28c252c5de7b4a6dd48fa2 \ + --hash=sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a \ + --hash=sha256:29d1d8fb1030af4d231789959f21821ab6325e463f0503a61d204343c9b355d1 \ + --hash=sha256:2a44ed93ea23415c54f3face3b65ef2b844d96aeb3455b8a69b3df6beab6acc5 \ + --hash=sha256:30f51edd9bb7f85c748979384165601d028b84f7bd13fe14d3e065304093916a \ + --hash=sha256:34bcc734bd2f2d5fe3b34e7b3c0116bfb2397f2d9666139988e7a3eb5f7400e3 \ + --hash=sha256:3ad72b851e781478366288743198101e5eb34a414f1d5627cdd585ca3b25f1db \ + --hash=sha256:3f901fe783e06e48e8cbdc82d631fca8f118333798193e026a50ce1b3757ea68 \ + --hash=sha256:42f144f3aafa5d92bad964d471a581651e28b24434d184871bd02e3a0d956037 \ + --hash=sha256:4a14d5f5fc78ce85e426aa159489e2d5961acf0e47575e08f35584009178e321 \ + --hash=sha256:4a58d057208cb9075c144950d789511220b07636dd2e4708d5645d24de666bdc \ + --hash=sha256:4e691d7f5186bd2842c14813f79f8884bb03f5995f0575272009982c5ac6c0f7 \ + --hash=sha256:5502408cab1cb18e128570f8d598981c68a50d0cbd7c61312a90507cd3a1276f \ + --hash=sha256:584c80c24b078eec1e227079d56dc22ff755e0ba8654d8383b2c549107528918 \ + --hash=sha256:5ad948d085ed6c16413eb5fec6b3e02fa00dc29a2534f088d3302c47eb59adf9 \ + --hash=sha256:670d286910b531c7b7e3c0b453fd8156f250adb140146d234a82219459b9640c \ + --hash=sha256:682fa37ff4d8e95f7df6fe6fe6a431e8ed8e788023c6bcc0f0880a12eab80ad1 \ + --hash=sha256:6d6c4268598f762bc8e91f5dbf2ab2f61f7b95bdc07953b602db879b3c8c18e1 \ + --hash=sha256:79fc6b8699564e1f9b521582c35435f1bd32dd06822322ec44afdeba666d8cb3 \ + --hash=sha256:8bdb9d0ce90cbf99c525e75a2fa415144fd570a1ba987380190e8b786bc6ef9b \ + --hash=sha256:8fcb9ba3709ff77e77f1c7022ff11d13553f3c30299a9fe246a166903e9091eb \ + --hash=sha256:941d4343bf27b605e9213b26bfa1c4bf197c9c599a9627eb7305b0defcfe40c1 \ + --hash=sha256:967cf6e3fd4adf7de8fc73cd3043754ae79c36475c1c11d514fc72cf5490094a \ + --hash=sha256:970b08dd6b86058b6dc07efe9e98414f5102974716232d10f32ff39701e841c4 \ + --hash=sha256:97f50fd18543be72da51dd505e2ed20d2228c74e0464e4262e4899797803d7fa \ + --hash=sha256:9bd7d7f544d362576be74f9d5901a22f317efc20046efe2034dced238cbbfe78 \ + --hash=sha256:add8bf86b71a5d9fb5b89f023a80b791e04fba57960aa790cc6125f7f1d39dfe \ + --hash=sha256:b35d7e5ad269804f6697727702da3c517bb8a5228afa450ab0fa787732055fc9 \ + --hash=sha256:b49750419d300e2b5a3813cf229d4e5a4c728dae470bcc89867a9ad6f25a722d \ + --hash=sha256:d31b97b3de0f61571a124a00ffe9a81fb9939146c122c11060725bd5aea79975 \ + --hash=sha256:d70e77c55ae8380c91c0c18dea05951482e263982911fc7410b1ffd1dadd3440 \ + --hash=sha256:d9907d61f15bf7261d7e775bd5d7ee4d2930e04424bab1972591918497623a16 \ + --hash=sha256:da5baeaf7116dced9c6bb76dc31ba04a2dc3695f3d9f74741d7910122b456edc \ + --hash=sha256:dc74c035f9bfca0255c1af77ddd2d6ae8419012805453e4b0e7513e17904545d \ + --hash=sha256:dcafc12c30dbaf1e2af0490978352e0c4041a7cde31f4f81435c2a5e8b9cabb6 \ + --hash=sha256:ee44d0f85b803321710f9239f335aafe16553b39106384cef8e6de40cb4ef2f6 \ + --hash=sha256:f66a6bbe741bd431f6d741e617e0f39ec7257ca1f89089593479347cc4d13324 + # via black pytz==2022.6 \ --hash=sha256:222439474e9c98fced559f1709d89e6c9cbf8d79c794ff3eb9f8800064291427 \ --hash=sha256:e89512406b793ca39f5971bc999cc538ce125c0e51c27941bef4568b460095e2 From 55ff6eb7885f757f2d8637400f223eb84bb6a500 Mon Sep 17 00:00:00 2001 From: Konstantin Tarasov <33369833+sdf-jkl@users.noreply.github.com> Date: Mon, 16 Mar 2026 10:23:01 -0400 Subject: [PATCH 37/80] add `shred_variant` support for `LargeUtf8` and `LargeBinary` (#9554) # Which issue does this PR close? - Closes #9525 . # Rationale for this change check issue. # What changes are included in this PR? Add `shred_variant` support for `LargeUtf8` and `LargeBinary` # Are these changes tested? Yes, unit tests. # Are there any user-facing changes? No --- parquet-variant-compute/src/shred_variant.rs | 122 ++++++++++++++++++- 1 file changed, 118 insertions(+), 4 deletions(-) diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index 6fa3a930fc37..d0087fd2c7ea 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -147,8 +147,10 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>( | DataType::Timestamp(TimeUnit::Microsecond | TimeUnit::Nanosecond, _) | DataType::Binary | DataType::BinaryView + | DataType::LargeBinary | DataType::Utf8 | DataType::Utf8View + | DataType::LargeUtf8 | DataType::FixedSizeBinary(16) // UUID => { let builder = @@ -654,8 +656,8 @@ mod tests { use crate::VariantArrayBuilder; use arrow::array::{ Array, BinaryViewArray, FixedSizeBinaryArray, Float64Array, GenericListArray, - GenericListViewArray, Int64Array, ListArray, ListLikeArray, OffsetSizeTrait, - PrimitiveArray, StringArray, + GenericListViewArray, Int64Array, LargeBinaryArray, LargeStringArray, ListArray, + ListLikeArray, OffsetSizeTrait, PrimitiveArray, StringArray, }; use arrow::datatypes::{ ArrowPrimitiveType, DataType, Field, Fields, Int64Type, TimeUnit, UnionFields, UnionMode, @@ -1144,6 +1146,120 @@ mod tests { assert!(typed_value_float64.is_null(2)); // string doesn't convert } + #[test] + // TODO(#9518): Drop this once variant_get tests build shredded fixtures via shred_variant. + fn test_largeutf8_shredding() { + let input = VariantArray::from_iter(vec![ + Some(Variant::from("hello")), + Some(Variant::from(42i64)), + None, + Some(Variant::Null), + Some(Variant::from("world")), + ]); + + let result = shred_variant(&input, &DataType::LargeUtf8).unwrap(); + let metadata = result.metadata_field(); + let value = result.value_field().unwrap(); + let typed_value = result + .typed_value_field() + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + + assert_eq!(result.len(), 5); + + // Row 0: string shreds to typed_value + assert!(result.is_valid(0)); + assert!(value.is_null(0)); + assert_eq!(typed_value.value(0), "hello"); + + // Row 1: integer falls back to value + assert!(result.is_valid(1)); + assert!(value.is_valid(1)); + assert!(typed_value.is_null(1)); + assert_eq!( + Variant::new(metadata.value(1), value.value(1)), + Variant::from(42i64) + ); + + // Row 2: top-level null + assert!(result.is_null(2)); + assert!(value.is_null(2)); + assert!(typed_value.is_null(2)); + + // Row 3: variant null falls back to value + assert!(result.is_valid(3)); + assert!(value.is_valid(3)); + assert!(typed_value.is_null(3)); + assert_eq!( + Variant::new(metadata.value(3), value.value(3)), + Variant::Null + ); + + // Row 4: string shreds to typed_value + assert!(result.is_valid(4)); + assert!(value.is_null(4)); + assert_eq!(typed_value.value(4), "world"); + } + + #[test] + // TODO(#9518): Drop this once variant_get tests build shredded fixtures via shred_variant. + fn test_largebinary_shredding() { + let input = VariantArray::from_iter(vec![ + Some(Variant::from(&b"\x00\x01\x02"[..])), + Some(Variant::from("not_binary")), + None, + Some(Variant::Null), + Some(Variant::from(&b"\xff\xaa"[..])), + ]); + + let result = shred_variant(&input, &DataType::LargeBinary).unwrap(); + let metadata = result.metadata_field(); + let value = result.value_field().unwrap(); + let typed_value = result + .typed_value_field() + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + + assert_eq!(result.len(), 5); + + // Row 0: binary shreds to typed_value + assert!(result.is_valid(0)); + assert!(value.is_null(0)); + assert_eq!(typed_value.value(0), &[0x00, 0x01, 0x02]); + + // Row 1: string falls back to value + assert!(result.is_valid(1)); + assert!(value.is_valid(1)); + assert!(typed_value.is_null(1)); + assert_eq!( + Variant::new(metadata.value(1), value.value(1)), + Variant::from("not_binary") + ); + + // Row 2: top-level null + assert!(result.is_null(2)); + assert!(value.is_null(2)); + assert!(typed_value.is_null(2)); + + // Row 3: variant null falls back to value + assert!(result.is_valid(3)); + assert!(value.is_valid(3)); + assert!(typed_value.is_null(3)); + assert_eq!( + Variant::new(metadata.value(3), value.value(3)), + Variant::Null + ); + + // Row 4: binary shreds to typed_value + assert!(result.is_valid(4)); + assert!(value.is_null(4)); + assert_eq!(typed_value.value(4), &[0xff, 0xaa]); + } + #[test] fn test_invalid_shredded_types_rejected() { let input = VariantArray::from_iter([Variant::from(42)]); @@ -1156,8 +1272,6 @@ mod tests { DataType::Time32(TimeUnit::Second), DataType::Time64(TimeUnit::Nanosecond), DataType::Timestamp(TimeUnit::Millisecond, None), - DataType::LargeBinary, - DataType::LargeUtf8, DataType::FixedSizeBinary(17), DataType::Union( UnionFields::from_fields(vec![ From a8fe8b32045f32bc59794b9ad919ba08d22ef514 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 07:26:00 +0100 Subject: [PATCH 38/80] chore(deps): update lz4_flex requirement from 0.12 to 0.13 (#9565) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updates the requirements on [lz4_flex](https://github.com/pseitz/lz4_flex) to permit the latest version.
Changelog

Sourced from lz4_flex's changelog.

0.13.0 (2026-03-15)

Features

Fixes

Invalid match offsets (offset == 0) during decompression were
not properly
handled, which could lead to invalid memory reads. This is a security
fix
that was also backported to 0.12.1 and 0.11.6.
  • Fix get_maximum_output_size overflow on 32-bit targets #205 (thanks @​dglittle)
Cast input_len to u64 before multiplying by 110, avoiding
overflow on
32-bit targets (e.g. wasm32) where input_len * 110 overflows usize
when input_len > ~39MB.

0.12.1 (2026-03-14)

Security Fix

Invalid match offsets (offset == 0) during decompression were
not properly
handled, which could lead to invalid memory reads on untrusted input.
Users on 0.12.x should upgrade to 0.12.1.

0.12.0 (2025-11-11)

  • Fix integer overflows when decoding large payloads #192 (thanks @​teh-cmc)
This fixes an u32 integer overflow when decoding large
payloads in the block format.
Note: The block format is not suitable for such large payloads, since it
keeps everything in memory. Consider using the frame format for large
data.

This change also removes a unsafe fast-path for write_integer to simplify the code.
The performance impact is on incompressible data, which is already fast enough.

0.11.6 (2026-03-14)

Security Fix

Invalid match offsets (offset == 0) during decompression were
not properly
handled, which could lead to invalid memory reads on untrusted input.
Users on 0.11.x should upgrade to 0.11.6.

... (truncated)

Commits
  • bfaae84 release 0.13.0
  • 055502e fix handling of invalid match offsets during decompression
  • 7191df8 make hashtable visibility crate public
  • 1bdafca add doc comments
  • c90fc91 lz4_block exposes option to reuse compression dict
  • 22e77f9 Delete .github/workflows/typos.yml
  • 2991a09 fix get_maximum_output_size overflow on 32-bit targets
  • 7b5fb80 add minimal security policy
  • See full diff in compare view

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- arrow-ipc/Cargo.toml | 2 +- parquet/Cargo.toml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arrow-ipc/Cargo.toml b/arrow-ipc/Cargo.toml index 943852ffdec9..ae603d2acd93 100644 --- a/arrow-ipc/Cargo.toml +++ b/arrow-ipc/Cargo.toml @@ -42,7 +42,7 @@ arrow-data = { workspace = true } arrow-schema = { workspace = true } arrow-select = { workspace = true} flatbuffers = { version = "25.2.10", default-features = false } -lz4_flex = { version = "0.12", default-features = false, features = ["std", "frame"], optional = true } +lz4_flex = { version = "0.13", default-features = false, features = ["std", "frame"], optional = true } zstd = { version = "0.13.0", default-features = false, optional = true } [features] diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 75ab432cceb8..9ab59f4e7ed7 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -57,7 +57,7 @@ snap = { version = "1.0", default-features = false, optional = true } brotli = { version = "8.0", default-features = false, features = ["std"], optional = true } # To use `flate2` you must enable either the `flate2-zlib-rs` or `flate2-rust_backened` backends flate2 = { version = "1.1", default-features = false, optional = true } -lz4_flex = { version = "0.12", default-features = false, features = ["std", "frame"], optional = true } +lz4_flex = { version = "0.13", default-features = false, features = ["std", "frame"], optional = true } zstd = { version = "0.13", optional = true, default-features = false } chrono = { workspace = true } num-bigint = { version = "0.4", default-features = false } @@ -86,7 +86,7 @@ tempfile = { version = "3.0", default-features = false } insta = { workspace = true, default-features = true } brotli = { version = "8.0", default-features = false, features = ["std"] } flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] } -lz4_flex = { version = "0.12", default-features = false, features = ["std", "frame"] } +lz4_flex = { version = "0.13", default-features = false, features = ["std", "frame"] } zstd = { version = "0.13", default-features = false } serde_json = { version = "1.0", features = ["std"], default-features = false } arrow = { workspace = true, features = ["ipc", "test_utils", "prettyprint", "json"] } From 68b607631dc930d7220b82356be30cc0e5b9cac2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Tue, 17 Mar 2026 19:01:00 +0100 Subject: [PATCH 39/80] [minor] Download clickbench file when missing (#9553) # Which issue does this PR close? - Closes #NNN. # Rationale for this change I want it to download the file when it's not there # What changes are included in this PR? # Are these changes tested? # Are there any user-facing changes? --- parquet/benches/arrow_reader_clickbench.rs | 41 ++++++++++++++-------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/parquet/benches/arrow_reader_clickbench.rs b/parquet/benches/arrow_reader_clickbench.rs index 5a6fb36d5800..039829f1b975 100644 --- a/parquet/benches/arrow_reader_clickbench.rs +++ b/parquet/benches/arrow_reader_clickbench.rs @@ -598,27 +598,38 @@ impl Display for Query { /// FULL path to the ClickBench hits_1.parquet file static HITS_1_PATH: OnceLock = OnceLock::new(); -/// Finds the paths to the ClickBench file, or panics with a useful message -/// explaining how to download if it is not found +/// Finds the paths to the ClickBench file, downloading it if not found fn hits_1() -> &'static Path { HITS_1_PATH.get_or_init(|| { + let current_dir = std::env::current_dir().expect("Failed to get current directory"); + println!( + "Looking for ClickBench files starting in current_dir and all parent directories: {current_dir:?}" + ); - let current_dir = std::env::current_dir().expect("Failed to get current directory"); - println!( - "Looking for ClickBench files starting in current_dir and all parent directories: {current_dir:?}" - - ); + if let Some(hits_1_path) = find_file_if_exists(current_dir.clone(), "hits_1.parquet") { + return hits_1_path; + } - let Some(hits_1_path) = find_file_if_exists(current_dir.clone(), "hits_1.parquet") else { - eprintln!( - "Could not find hits_1.parquet in directory or parents: {current_dir:?}. Download it via", + // File not found, download it + let download_path = current_dir.join("hits_1.parquet"); + let url = "https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_1.parquet"; + println!("hits_1.parquet not found, downloading from {url}..."); + + let status = std::process::Command::new("wget") + .args(["--continue", "-O"]) + .arg(&download_path) + .arg(url) + .status() + .expect("Failed to execute wget. Please install wget or download manually."); + + assert!( + status.success(), + "Failed to download hits_1.parquet. You can download it manually via:\n\ + wget --continue {url}" ); - eprintln!(); - eprintln!("wget --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_1.parquet"); - panic!("Stopping"); - }; - hits_1_path + println!("Downloaded hits_1.parquet to {download_path:?}"); + download_path }) } From e7b4842f7b4a8a1766baef3ddd35d5d305e63b5f Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Tue, 17 Mar 2026 20:25:36 +0200 Subject: [PATCH 40/80] feat: add `RunArray::new_unchecked` and `RunArray::into_parts` (#9376) # Which issue does this PR close? N/A # Rationale for this change Allow to make easy changes without validation (for example replace the values) # What changes are included in this PR? added 2 functions # Are these changes tested? yes # Are there any user-facing changes? yes new functions --- arrow-array/src/array/run_array.rs | 105 +++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs index a3cb4565f413..f317af6a10f0 100644 --- a/arrow-array/src/array/run_array.rs +++ b/arrow-array/src/array/run_array.rs @@ -123,6 +123,70 @@ impl RunArray { Ok(array_data.into()) } + /// Create a new [`RunArray`] from the provided parts, without validation + /// + /// # Safety + /// + /// Safe if [`Self::try_new`] would not error + pub unsafe fn new_unchecked( + data_type: DataType, + run_ends: RunEndBuffer, + values: ArrayRef, + ) -> Self { + if cfg!(feature = "force_validate") { + match &data_type { + DataType::RunEndEncoded(run_ends, values_field) => { + assert!(!run_ends.is_nullable(), "run_ends should not be nullable"); + assert_eq!( + run_ends.data_type(), + &R::DATA_TYPE, + "Incorrect run ends type" + ); + assert_eq!( + values_field.data_type(), + values.data_type(), + "Incorrect values type" + ); + } + _ => { + panic!( + "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded" + ); + } + } + + let run_array = Self { + data_type, + run_ends, + values, + }; + + // Safety: `validate_data` checks below + // 1. The given array data has exactly two child arrays. + // 2. The first child array (run_ends) has valid data type. + // 3. run_ends array does not have null values + // 4. run_ends array has non-zero and strictly increasing values. + // 5. The length of run_ends array and values array are the same. + run_array + .to_data() + .validate_data() + .expect("RunArray data should be valid"); + + return run_array; + } + + Self { + data_type, + run_ends, + values, + } + } + + /// Deconstruct this array into its constituent parts + pub fn into_parts(self) -> (DataType, RunEndBuffer, ArrayRef) { + (self.data_type, self.run_ends, self.values) + } + /// Returns a reference to the [`RunEndBuffer`]. pub fn run_ends(&self) -> &RunEndBuffer { &self.run_ends @@ -258,6 +322,7 @@ impl From for RunArray { let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset, len) }; let values = make_array(values_child); + Self { data_type, run_ends, @@ -1306,4 +1371,44 @@ mod tests { let slice3 = array1.slice(0, 4); // a, a, b, b assert_ne!(slice1, slice3); } + + #[test] + #[cfg(not(feature = "force_validate"))] + fn allow_to_create_invalid_array_using_new_unchecked() { + let valid = RunArray::::from_iter(["32"]); + let (_, buffer, values) = valid.into_parts(); + + let _ = unsafe { + // mismatch data type + RunArray::::new_unchecked(DataType::Int64, buffer, values) + }; + } + + #[test] + #[should_panic( + expected = "Invalid data type Int64 for RunArray. Should be DataType::RunEndEncoded" + )] + #[cfg(feature = "force_validate")] + fn should_not_be_able_to_create_invalid_array_using_new_unchecked_when_force_validate_is_enabled() + { + let valid = RunArray::::from_iter(["32"]); + let (_, buffer, values) = valid.into_parts(); + + let _ = unsafe { + // mismatch data type + RunArray::::new_unchecked(DataType::Int64, buffer, values) + }; + } + + #[test] + fn test_run_array_roundtrip() { + let run = Int32Array::from(vec![3, 6, 9, 12]); + let values = Int32Array::from(vec![Some(0), None, Some(1), None]); + let array = RunArray::try_new(&run, &values).unwrap(); + + let (dt, buffer, values) = array.clone().into_parts(); + let created_from_parts = + unsafe { RunArray::::new_unchecked(dt, buffer, values) }; + assert_eq!(array, created_from_parts); + } } From d1ec77065c6b606bce97b7acd51b2079182822ad Mon Sep 17 00:00:00 2001 From: Val Lorentz Date: Tue, 17 Mar 2026 20:00:09 +0100 Subject: [PATCH 41/80] DeltaBitPackEncoderConversion: Fix panic message on invalid type (#9552) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Which issue does this PR close? - Closes #9551. # Rationale for this change DeltaBitPackDecoder supports Int32Type, UInt32Type, Int64Type, and UInt64Type; but the error message claimed it supported only Int32Type and Int64Type # What changes are included in this PR? * changed the error message * deduplicated the string * extended `ensure_phys_ty!()` to allow anything `panic!()` does # Are these changes tested? no # Are there any user-facing changes? only the panic message --------- Co-authored-by: Daniël Heres Co-authored-by: Andrew Lamb --- parquet/Cargo.toml | 5 +++ parquet/src/data_type.rs | 4 +-- parquet/src/encodings/encoding/mod.rs | 16 ++++----- parquet/tests/arrow_writer.rs | 50 +++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 parquet/tests/arrow_writer.rs diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 9ab59f4e7ed7..4be779302448 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -165,6 +165,11 @@ name = "arrow_reader" required-features = ["arrow"] path = "./tests/arrow_reader/mod.rs" +[[test]] +name = "arrow_writer" +required-features = ["arrow"] +path = "./tests/arrow_writer.rs" + [[test]] name = "encryption" required-features = ["arrow"] diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index df5702d1bb00..d8c7b9201389 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -1331,10 +1331,10 @@ impl AsRef<[u8]> for FixedLenByteArray { /// Macro to reduce repetition in making type assertions on the physical type against `T` macro_rules! ensure_phys_ty { - ($($ty:pat_param)|+ , $err: literal) => { + ($($ty:pat_param)|+ , $($arg:tt)*) => { match T::get_physical_type() { $($ty => (),)* - _ => panic!($err), + _ => panic!($($arg)*), }; } } diff --git a/parquet/src/encodings/encoding/mod.rs b/parquet/src/encodings/encoding/mod.rs index e5e74ac53fa7..eeabcf4ba5ce 100644 --- a/parquet/src/encodings/encoding/mod.rs +++ b/parquet/src/encodings/encoding/mod.rs @@ -522,20 +522,18 @@ trait DeltaBitPackEncoderConversion { fn subtract_u64(&self, left: i64, right: i64) -> u64; } +const DELTA_BIT_PACK_TYPE_ERROR: &str = + "DeltaBitPackDecoder only supports Int32Type, UInt32Type, Int64Type, and UInt64Type"; + impl DeltaBitPackEncoderConversion for DeltaBitPackEncoder { #[inline] fn assert_supported_type() { - ensure_phys_ty!( - Type::INT32 | Type::INT64, - "DeltaBitPackDecoder only supports Int32Type and Int64Type" - ); + ensure_phys_ty!(Type::INT32 | Type::INT64, "{}", DELTA_BIT_PACK_TYPE_ERROR); } #[inline] fn as_i64(&self, values: &[T::T], index: usize) -> i64 { - values[index] - .as_i64() - .expect("DeltaBitPackDecoder only supports Int32Type and Int64Type") + values[index].as_i64().expect(DELTA_BIT_PACK_TYPE_ERROR) } #[inline] @@ -544,7 +542,7 @@ impl DeltaBitPackEncoderConversion for DeltaBitPackEncoder { match T::get_physical_type() { Type::INT32 => (left as i32).wrapping_sub(right as i32) as i64, Type::INT64 => left.wrapping_sub(right), - _ => panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type"), + _ => panic!("{}", DELTA_BIT_PACK_TYPE_ERROR), } } @@ -554,7 +552,7 @@ impl DeltaBitPackEncoderConversion for DeltaBitPackEncoder { // Conversion of i32 -> u32 -> u64 is to avoid non-zero left most bytes in int repr Type::INT32 => (left as i32).wrapping_sub(right as i32) as u32 as u64, Type::INT64 => left.wrapping_sub(right) as u64, - _ => panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type"), + _ => panic!("{}", DELTA_BIT_PACK_TYPE_ERROR), } } } diff --git a/parquet/tests/arrow_writer.rs b/parquet/tests/arrow_writer.rs new file mode 100644 index 000000000000..020b4c6267e0 --- /dev/null +++ b/parquet/tests/arrow_writer.rs @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Tests for [`ArrowWriter`] + +use arrow::array::Float64Array; +use arrow::datatypes::{DataType, Field, Schema}; +use arrow::record_batch::RecordBatch; +use parquet::arrow::ArrowWriter; +use parquet::basic::Encoding; +use parquet::file::properties::WriterProperties; +use std::sync::Arc; + +#[test] +#[should_panic( + expected = "DeltaBitPackDecoder only supports Int32Type, UInt32Type, Int64Type, and UInt64Type" +)] +fn test_delta_bit_pack_type() { + let props = WriterProperties::builder() + .set_column_encoding("col".into(), Encoding::DELTA_BINARY_PACKED) + .build(); + + let record_batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![Field::new( + "col", + DataType::Float64, + false, + )])), + vec![Arc::new(Float64Array::from_iter_values(vec![1., 2.]))], + ) + .unwrap(); + + let mut buffer = Vec::new(); + let mut writer = ArrowWriter::try_new(&mut buffer, record_batch.schema(), Some(props)).unwrap(); + let _ = writer.write(&record_batch); +} From bedabc59eb80e24e222398d6c4e38a4f783bf999 Mon Sep 17 00:00:00 2001 From: Mikhail Zabaluev Date: Tue, 17 Mar 2026 22:52:29 +0200 Subject: [PATCH 42/80] feat(arrow-avro): Configurable Arrow timezone ID for Avro timestamps (#9280) # Which issue does this PR close? - Closes #9279. # Rationale for this change Enable an alternative representation of UTC timestamp data types with the "UTC" timezone ID, which is useful for interoperability with applications preferring that form. # What changes are included in this PR? In the `ReaderBuilder` API, add a new method `with_tz` that allows users to specify the timezone ID for Avro logical types that represent UTC timestamps. The choices are between "+00:00" and "UTC" and can be selected by the new `Tz` enumeration. # Are these changes tested? Added unit tests to verify the representation with different `Tz` parameter values. # Are there any user-facing changes? A new `with_tz` method is added to `arrow_avro::reader::Builder`. --------- Co-authored-by: Connor Sanders <170039284+jecsand838@users.noreply.github.com> Co-authored-by: Andrew Lamb --- arrow-avro/src/codec.rs | 234 ++++++++++++------ arrow-avro/src/reader/async_reader/builder.rs | 13 +- arrow-avro/src/reader/async_reader/mod.rs | 43 +++- arrow-avro/src/reader/mod.rs | 53 +++- arrow-avro/src/reader/record.rs | 66 +++-- 5 files changed, 309 insertions(+), 100 deletions(-) diff --git a/arrow-avro/src/codec.rs b/arrow-avro/src/codec.rs index fc2a914d3514..92a0ed051951 100644 --- a/arrow-avro/src/codec.rs +++ b/arrow-avro/src/codec.rs @@ -621,7 +621,7 @@ impl<'a> TryFrom<&Schema<'a>> for AvroField { fn try_from(schema: &Schema<'a>) -> Result { match schema { Schema::Complex(ComplexType::Record(r)) => { - let mut resolver = Maker::new(false, false); + let mut resolver = Maker::new(false, false, Tz::default()); let data_type = resolver.make_data_type(schema, None, None)?; Ok(AvroField { data_type, @@ -642,6 +642,7 @@ pub(crate) struct AvroFieldBuilder<'a> { reader_schema: Option<&'a Schema<'a>>, use_utf8view: bool, strict_mode: bool, + tz: Tz, } impl<'a> AvroFieldBuilder<'a> { @@ -652,6 +653,7 @@ impl<'a> AvroFieldBuilder<'a> { reader_schema: None, use_utf8view: false, strict_mode: false, + tz: Tz::default(), } } @@ -677,11 +679,17 @@ impl<'a> AvroFieldBuilder<'a> { self } + /// Sets the timezone representation for timestamps. + pub(crate) fn with_tz(mut self, tz: Tz) -> Self { + self.tz = tz; + self + } + /// Build an [`AvroField`] from the builder pub(crate) fn build(self) -> Result { match self.writer_schema { Schema::Complex(ComplexType::Record(r)) => { - let mut resolver = Maker::new(self.use_utf8view, self.strict_mode); + let mut resolver = Maker::new(self.use_utf8view, self.strict_mode, self.tz); let data_type = resolver.make_data_type(self.writer_schema, self.reader_schema, None)?; Ok(AvroField { @@ -697,6 +705,36 @@ impl<'a> AvroFieldBuilder<'a> { } } +/// Timezone representation for timestamps. +/// +/// Avro only distinguishes between UTC and local time (no timezone), but Arrow supports +/// any of the two identifiers of the UTC timezone: "+00:00" and "UTC". +/// The data types using these time zone IDs behave identically, but are not logically equal. +#[derive(Debug, Copy, Clone, PartialEq, Default)] +pub enum Tz { + /// Represent Avro `timestamp-*` logical types with "+00:00" timezone ID + #[default] + OffsetZero, + /// Represent Avro `timestamp-*` logical types with "UTC" timezone ID + Utc, +} + +impl Tz { + /// Returns the string identifier for this timezone representation + pub fn as_str(&self) -> &'static str { + match self { + Self::OffsetZero => "+00:00", + Self::Utc => "UTC", + } + } +} + +impl Display for Tz { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + /// An Avro encoding /// /// @@ -732,18 +770,18 @@ pub(crate) enum Codec { /// Represents Avro timestamp-millis or local-timestamp-millis logical type /// /// Maps to Arrow's Timestamp(TimeUnit::Millisecond) data type - /// The boolean parameter indicates whether the timestamp has a UTC timezone (true) or is local time (false) - TimestampMillis(bool), + /// The parameter indicates whether the timestamp has a UTC timezone (Some) or is local time (None) + TimestampMillis(Option), /// Represents Avro timestamp-micros or local-timestamp-micros logical type /// /// Maps to Arrow's Timestamp(TimeUnit::Microsecond) data type - /// The boolean parameter indicates whether the timestamp has a UTC timezone (true) or is local time (false) - TimestampMicros(bool), + /// The parameter indicates whether the timestamp has a UTC timezone (Some) or is local time (None) + TimestampMicros(Option), /// Represents Avro timestamp-nanos or local-timestamp-nanos logical type /// /// Maps to Arrow's Timestamp(TimeUnit::Nanosecond) data type - /// The boolean parameter indicates whether the timestamp has a UTC timezone (true) or is local time (false) - TimestampNanos(bool), + /// The parameter indicates whether the timestamp has a UTC timezone (Some) or is local time (None) + TimestampNanos(Option), /// Represents Avro fixed type, maps to Arrow's FixedSizeBinary data type /// The i32 parameter indicates the fixed binary size Fixed(i32), @@ -844,15 +882,18 @@ impl Codec { Self::Date32 => DataType::Date32, Self::TimeMillis => DataType::Time32(TimeUnit::Millisecond), Self::TimeMicros => DataType::Time64(TimeUnit::Microsecond), - Self::TimestampMillis(is_utc) => { - DataType::Timestamp(TimeUnit::Millisecond, is_utc.then(|| "+00:00".into())) - } - Self::TimestampMicros(is_utc) => { - DataType::Timestamp(TimeUnit::Microsecond, is_utc.then(|| "+00:00".into())) - } - Self::TimestampNanos(is_utc) => { - DataType::Timestamp(TimeUnit::Nanosecond, is_utc.then(|| "+00:00".into())) - } + Self::TimestampMillis(tz) => DataType::Timestamp( + TimeUnit::Millisecond, + tz.as_ref().map(|tz| tz.as_str().into()), + ), + Self::TimestampMicros(tz) => DataType::Timestamp( + TimeUnit::Microsecond, + tz.as_ref().map(|tz| tz.as_str().into()), + ), + Self::TimestampNanos(tz) => DataType::Timestamp( + TimeUnit::Nanosecond, + tz.as_ref().map(|tz| tz.as_str().into()), + ), Self::Interval => DataType::Interval(IntervalUnit::MonthDayNano), Self::Fixed(size) => DataType::FixedSizeBinary(*size), Self::Decimal(precision, scale, _size) => { @@ -1112,12 +1153,15 @@ impl From<&Codec> for UnionFieldKind { Codec::Date32 => Self::Date, Codec::TimeMillis => Self::TimeMillis, Codec::TimeMicros => Self::TimeMicros, - Codec::TimestampMillis(true) => Self::TimestampMillisUtc, - Codec::TimestampMillis(false) => Self::TimestampMillisLocal, - Codec::TimestampMicros(true) => Self::TimestampMicrosUtc, - Codec::TimestampMicros(false) => Self::TimestampMicrosLocal, - Codec::TimestampNanos(true) => Self::TimestampNanosUtc, - Codec::TimestampNanos(false) => Self::TimestampNanosLocal, + Codec::TimestampMillis(Some(Tz::OffsetZero)) => Self::TimestampMillisUtc, + Codec::TimestampMillis(Some(Tz::Utc)) => Self::TimestampMillisUtc, + Codec::TimestampMillis(None) => Self::TimestampMillisLocal, + Codec::TimestampMicros(Some(Tz::OffsetZero)) => Self::TimestampMicrosUtc, + Codec::TimestampMicros(Some(Tz::Utc)) => Self::TimestampMicrosUtc, + Codec::TimestampMicros(None) => Self::TimestampMicrosLocal, + Codec::TimestampNanos(Some(Tz::OffsetZero)) => Self::TimestampNanosUtc, + Codec::TimestampNanos(Some(Tz::Utc)) => Self::TimestampNanosUtc, + Codec::TimestampNanos(None) => Self::TimestampNanosLocal, Codec::Interval => Self::Duration, Codec::Fixed(_) => Self::Fixed, Codec::Decimal(..) => Self::Decimal, @@ -1338,14 +1382,16 @@ struct Maker<'a> { resolver: Resolver<'a>, use_utf8view: bool, strict_mode: bool, + tz: Tz, } impl<'a> Maker<'a> { - fn new(use_utf8view: bool, strict_mode: bool) -> Self { + fn new(use_utf8view: bool, strict_mode: bool, tz: Tz) -> Self { Self { resolver: Default::default(), use_utf8view, strict_mode, + tz, } } @@ -1609,20 +1655,22 @@ impl<'a> Maker<'a> { (Some("time-millis"), c @ Codec::Int32) => *c = Codec::TimeMillis, (Some("time-micros"), c @ Codec::Int64) => *c = Codec::TimeMicros, (Some("timestamp-millis"), c @ Codec::Int64) => { - *c = Codec::TimestampMillis(true) + *c = Codec::TimestampMillis(Some(self.tz)) } (Some("timestamp-micros"), c @ Codec::Int64) => { - *c = Codec::TimestampMicros(true) + *c = Codec::TimestampMicros(Some(self.tz)) } (Some("local-timestamp-millis"), c @ Codec::Int64) => { - *c = Codec::TimestampMillis(false) + *c = Codec::TimestampMillis(None) } (Some("local-timestamp-micros"), c @ Codec::Int64) => { - *c = Codec::TimestampMicros(false) + *c = Codec::TimestampMicros(None) + } + (Some("timestamp-nanos"), c @ Codec::Int64) => { + *c = Codec::TimestampNanos(Some(self.tz)) } - (Some("timestamp-nanos"), c @ Codec::Int64) => *c = Codec::TimestampNanos(true), (Some("local-timestamp-nanos"), c @ Codec::Int64) => { - *c = Codec::TimestampNanos(false) + *c = Codec::TimestampNanos(None) } (Some("uuid"), c @ Codec::Utf8) => { // Map Avro string+logicalType=uuid into the UUID Codec, @@ -1721,7 +1769,7 @@ impl<'a> Maker<'a> { .and_then(|v| v.as_str()) { if unit == "nanosecond" { - field.codec = Codec::TimestampNanos(false); + field.codec = Codec::TimestampNanos(Some(self.tz)); } } } @@ -2345,7 +2393,7 @@ mod tests { fn resolve_promotion(writer: PrimitiveType, reader: PrimitiveType) -> AvroDataType { let writer_schema = Schema::TypeName(TypeName::Primitive(writer)); let reader_schema = Schema::TypeName(TypeName::Primitive(reader)); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); maker .make_data_type(&writer_schema, Some(&reader_schema), None) .expect("promotion should resolve") @@ -2362,7 +2410,7 @@ mod tests { fn test_date_logical_type() { let schema = create_schema_with_logical_type(PrimitiveType::Int, "date"); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let result = maker.make_data_type(&schema, None, None).unwrap(); assert!(matches!(result.codec, Codec::Date32)); @@ -2372,7 +2420,7 @@ mod tests { fn test_time_millis_logical_type() { let schema = create_schema_with_logical_type(PrimitiveType::Int, "time-millis"); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let result = maker.make_data_type(&schema, None, None).unwrap(); assert!(matches!(result.codec, Codec::TimeMillis)); @@ -2382,7 +2430,7 @@ mod tests { fn test_time_micros_logical_type() { let schema = create_schema_with_logical_type(PrimitiveType::Long, "time-micros"); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let result = maker.make_data_type(&schema, None, None).unwrap(); assert!(matches!(result.codec, Codec::TimeMicros)); @@ -2390,42 +2438,77 @@ mod tests { #[test] fn test_timestamp_millis_logical_type() { - let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-millis"); + for tz in [Tz::OffsetZero, Tz::Utc] { + let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-millis"); - let mut maker = Maker::new(false, false); - let result = maker.make_data_type(&schema, None, None).unwrap(); + let mut maker = Maker::new(false, false, tz); + let result = maker.make_data_type(&schema, None, None).unwrap(); - assert!(matches!(result.codec, Codec::TimestampMillis(true))); + let Codec::TimestampMillis(Some(actual_tz)) = result.codec else { + panic!("Expected TimestampMillis codec"); + }; + assert_eq!(actual_tz, tz); + } } #[test] fn test_timestamp_micros_logical_type() { - let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-micros"); + for tz in [Tz::OffsetZero, Tz::Utc] { + let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-micros"); - let mut maker = Maker::new(false, false); - let result = maker.make_data_type(&schema, None, None).unwrap(); + let mut maker = Maker::new(false, false, tz); + let result = maker.make_data_type(&schema, None, None).unwrap(); + + let Codec::TimestampMicros(Some(actual_tz)) = result.codec else { + panic!("Expected TimestampMicros codec"); + }; + assert_eq!(actual_tz, tz); + } + } + + #[test] + fn test_timestamp_nanos_logical_type() { + for tz in [Tz::OffsetZero, Tz::Utc] { + let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-nanos"); + + let mut maker = Maker::new(false, false, tz); + let result = maker.make_data_type(&schema, None, None).unwrap(); - assert!(matches!(result.codec, Codec::TimestampMicros(true))); + let Codec::TimestampNanos(Some(actual_tz)) = result.codec else { + panic!("Expected TimestampNanos codec"); + }; + assert_eq!(actual_tz, tz); + } } #[test] fn test_local_timestamp_millis_logical_type() { let schema = create_schema_with_logical_type(PrimitiveType::Long, "local-timestamp-millis"); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let result = maker.make_data_type(&schema, None, None).unwrap(); - assert!(matches!(result.codec, Codec::TimestampMillis(false))); + assert!(matches!(result.codec, Codec::TimestampMillis(None))); } #[test] fn test_local_timestamp_micros_logical_type() { let schema = create_schema_with_logical_type(PrimitiveType::Long, "local-timestamp-micros"); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); + let result = maker.make_data_type(&schema, None, None).unwrap(); + + assert!(matches!(result.codec, Codec::TimestampMicros(None))); + } + + #[test] + fn test_local_timestamp_nanos_logical_type() { + let schema = create_schema_with_logical_type(PrimitiveType::Long, "local-timestamp-nanos"); + + let mut maker = Maker::new(false, false, Tz::default()); let result = maker.make_data_type(&schema, None, None).unwrap(); - assert!(matches!(result.codec, Codec::TimestampMicros(false))); + assert!(matches!(result.codec, Codec::TimestampNanos(None))); } #[test] @@ -2474,7 +2557,7 @@ mod tests { fn test_unknown_logical_type_added_to_metadata() { let schema = create_schema_with_logical_type(PrimitiveType::Int, "custom-type"); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let result = maker.make_data_type(&schema, None, None).unwrap(); assert_eq!( @@ -2487,7 +2570,7 @@ mod tests { fn test_string_with_utf8view_enabled() { let schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String)); - let mut maker = Maker::new(true, false); + let mut maker = Maker::new(true, false, Tz::default()); let result = maker.make_data_type(&schema, None, None).unwrap(); assert!(matches!(result.codec, Codec::Utf8View)); @@ -2497,7 +2580,7 @@ mod tests { fn test_string_without_utf8view_enabled() { let schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String)); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let result = maker.make_data_type(&schema, None, None).unwrap(); assert!(matches!(result.codec, Codec::Utf8)); @@ -2526,7 +2609,7 @@ mod tests { let schema = Schema::Complex(ComplexType::Record(record)); - let mut maker = Maker::new(true, false); + let mut maker = Maker::new(true, false, Tz::default()); let result = maker.make_data_type(&schema, None, None).unwrap(); if let Codec::Struct(fields) = &result.codec { @@ -2544,7 +2627,7 @@ mod tests { Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)), ]); - let mut maker = Maker::new(false, true); + let mut maker = Maker::new(false, true, Tz::default()); let result = maker.make_data_type(&schema, None, None); assert!(result.is_err()); @@ -2632,7 +2715,7 @@ mod tests { fn test_resolve_illegal_promotion_double_to_float_errors() { let writer_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Double)); let reader_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Float)); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let result = maker.make_data_type(&writer_schema, Some(&reader_schema), None); assert!(result.is_err()); match result { @@ -2653,7 +2736,7 @@ mod tests { Schema::TypeName(TypeName::Primitive(PrimitiveType::Double)), Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)), ]); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let result = maker.make_data_type(&writer, Some(&reader), None).unwrap(); assert!(matches!(result.codec, Codec::Float64)); assert_eq!( @@ -2678,7 +2761,7 @@ mod tests { mk_primitive(PrimitiveType::Long), ]); let reader = mk_primitive(PrimitiveType::Bytes); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap(); assert!(matches!(dt.codec(), Codec::Binary)); let resolved = match dt.resolution { @@ -2702,7 +2785,7 @@ mod tests { mk_primitive(PrimitiveType::Long), mk_primitive(PrimitiveType::Double), ]); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap(); let resolved = match dt.resolution { Some(ResolutionInfo::Union(u)) => u, @@ -2723,7 +2806,7 @@ mod tests { mk_primitive(PrimitiveType::Long), mk_primitive(PrimitiveType::String), ]); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap(); let resolved = match dt.resolution { Some(ResolutionInfo::Union(u)) => u, @@ -2780,7 +2863,7 @@ mod tests { })), mk_primitive(PrimitiveType::String), ]); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Default::default()); let dt = maker .make_data_type(&writer, Some(&reader), None) .expect("resolution should succeed"); @@ -2877,7 +2960,7 @@ mod tests { })), mk_primitive(PrimitiveType::String), ]); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Default::default()); let dt = maker .make_data_type(&writer, Some(&reader), None) .expect("resolution should succeed"); @@ -2932,7 +3015,7 @@ mod tests { mk_primitive(PrimitiveType::String), mk_primitive(PrimitiveType::Null), ]); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap(); assert!(matches!(dt.codec(), Codec::Utf8)); assert_eq!(dt.nullability, Some(Nullability::NullFirst)); @@ -2960,7 +3043,7 @@ mod tests { mk_primitive(PrimitiveType::Double), mk_primitive(PrimitiveType::Null), ]); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap(); assert!(matches!(dt.codec(), Codec::Float64)); assert_eq!(dt.nullability, Some(Nullability::NullFirst)); @@ -2982,7 +3065,7 @@ mod tests { fn test_resolve_type_promotion() { let writer_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)); let reader_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let result = maker .make_data_type(&writer_schema, Some(&reader_schema), None) .unwrap(); @@ -3019,7 +3102,7 @@ mod tests { let schema: Schema = serde_json::from_str(schema_str).unwrap(); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let avro_data_type = maker.make_data_type(&schema, None, None).unwrap(); if let Codec::Struct(fields) = avro_data_type.codec() { @@ -3099,7 +3182,7 @@ mod tests { let schema: Schema = serde_json::from_str(schema_str).unwrap(); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let avro_data_type = maker.make_data_type(&schema, None, None).unwrap(); if let Codec::Struct(fields) = avro_data_type.codec() { @@ -3155,7 +3238,7 @@ mod tests { fn test_resolve_from_writer_and_reader_defaults_root_name_for_non_record_reader() { let writer_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String)); let reader_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String)); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let data_type = maker .make_data_type(&writer_schema, Some(&reader_schema), None) .expect("resolution should succeed"); @@ -3289,13 +3372,12 @@ mod tests { .parse_and_store_default(&serde_json::json!(1_000_000)) .unwrap(); assert_eq!(ltm, AvroLiteral::Long(1_000_000)); - let mut dt_ts_milli = AvroDataType::new(Codec::TimestampMillis(true), HashMap::new(), None); + let mut dt_ts_milli = AvroDataType::new(Codec::TimestampMillis(None), HashMap::new(), None); let l1 = dt_ts_milli .parse_and_store_default(&serde_json::json!(123)) .unwrap(); assert_eq!(l1, AvroLiteral::Long(123)); - let mut dt_ts_micro = - AvroDataType::new(Codec::TimestampMicros(false), HashMap::new(), None); + let mut dt_ts_micro = AvroDataType::new(Codec::TimestampMicros(None), HashMap::new(), None); let l2 = dt_ts_micro .parse_and_store_default(&serde_json::json!(456)) .unwrap(); @@ -3534,7 +3616,7 @@ mod tests { additional: r_add, }, })); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let dt = maker .make_data_type(&writer_schema, Some(&reader_schema), None) .unwrap(); @@ -3563,7 +3645,7 @@ mod tests { ])), attributes: Attributes::default(), })); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let dt = maker .make_data_type(&writer_schema, Some(&reader_schema), None) .unwrap(); @@ -3595,7 +3677,7 @@ mod tests { size: 16, attributes: Attributes::default(), })); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let dt = maker .make_data_type(&writer_schema, Some(&reader_schema), None) .unwrap(); @@ -3615,7 +3697,7 @@ mod tests { additional: Default::default(), }, })); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Default::default()); let dt = maker.make_data_type(&schema, None, None).unwrap(); assert!(matches!(dt.codec(), Codec::IntervalMonthDayNano)); assert_eq!( @@ -3696,7 +3778,7 @@ mod tests { ], attributes: Attributes::default(), })); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let dt = maker .make_data_type(&writer, Some(&reader), None) .expect("record resolution"); @@ -3777,7 +3859,7 @@ mod tests { }; let writer_schema = Schema::Complex(ComplexType::Record(writer_record)); let reader_schema = Schema::Complex(ComplexType::Record(reader_record)); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); let result = maker .make_data_type(&writer_schema, Some(&reader_schema), None) .expect("record alias resolution should succeed"); @@ -3809,7 +3891,7 @@ mod tests { }; let writer_schema = Schema::Complex(ComplexType::Enum(writer_enum)); let reader_schema = Schema::Complex(ComplexType::Enum(reader_enum)); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); maker .make_data_type(&writer_schema, Some(&reader_schema), None) .expect("enum alias resolution should succeed"); @@ -3833,7 +3915,7 @@ mod tests { }; let writer_schema = Schema::Complex(ComplexType::Fixed(writer_fixed)); let reader_schema = Schema::Complex(ComplexType::Fixed(reader_fixed)); - let mut maker = Maker::new(false, false); + let mut maker = Maker::new(false, false, Tz::default()); maker .make_data_type(&writer_schema, Some(&reader_schema), None) .expect("fixed alias resolution should succeed"); diff --git a/arrow-avro/src/reader/async_reader/builder.rs b/arrow-avro/src/reader/async_reader/builder.rs index 0f9a7abf1cd4..9e979c75669d 100644 --- a/arrow-avro/src/reader/async_reader/builder.rs +++ b/arrow-avro/src/reader/async_reader/builder.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::codec::AvroFieldBuilder; +use crate::codec::{AvroFieldBuilder, Tz}; use crate::errors::AvroError; use crate::reader::async_reader::ReaderState; use crate::reader::header::{Header, HeaderDecoder}; @@ -38,6 +38,7 @@ pub struct ReaderBuilder { header_size_hint: Option, utf8_view: bool, strict_mode: bool, + tz: Tz, } impl ReaderBuilder { @@ -52,6 +53,7 @@ impl ReaderBuilder { header_size_hint: None, utf8_view: false, strict_mode: false, + tz: Default::default(), } } @@ -107,6 +109,14 @@ impl ReaderBuilder { ..self } } + + /// Sets the timezone representation for Avro timestamp fields. + /// + /// The default is `Tz::OffsetZero`, meaning the "+00:00" time zone ID. + pub fn with_tz(mut self, tz: Tz) -> Self { + self.tz = tz; + self + } } impl ReaderBuilder { @@ -208,6 +218,7 @@ impl ReaderBuilder { builder .with_utf8view(self.utf8_view) .with_strict_mode(self.strict_mode) + .with_tz(self.tz) .build() }?; diff --git a/arrow-avro/src/reader/async_reader/mod.rs b/arrow-avro/src/reader/async_reader/mod.rs index 02c00a60e0ef..0aaa739eef85 100644 --- a/arrow-avro/src/reader/async_reader/mod.rs +++ b/arrow-avro/src/reader/async_reader/mod.rs @@ -541,6 +541,7 @@ impl Stream for AsyncAvroFileReader { #[cfg(all(test, feature = "object_store"))] mod tests { use super::*; + use crate::codec::Tz; use crate::schema::{ AVRO_NAME_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY, AvroSchema, SCHEMA_METADATA_KEY, }; @@ -562,6 +563,10 @@ mod tests { } fn get_alltypes_schema() -> SchemaRef { + get_alltypes_schema_with_tz("+00:00") + } + + fn get_alltypes_schema_with_tz(tz_id: &str) -> SchemaRef { let schema = Schema::new(vec![ Field::new("id", DataType::Int32, true), Field::new("bool_col", DataType::Boolean, true), @@ -575,7 +580,7 @@ mod tests { Field::new("string_col", DataType::Binary, true), Field::new( "timestamp_col", - DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())), + DataType::Timestamp(TimeUnit::Microsecond, Some(tz_id.into())), true, ), ]) @@ -1711,6 +1716,42 @@ mod tests { assert_eq!(batch.num_columns(), 11); } + #[tokio::test] + async fn test_with_tz_utc() { + let file = arrow_test_data("avro/alltypes_plain.avro"); + let store: Arc = Arc::new(LocalFileSystem::new()); + let location = Path::from_filesystem_path(&file).unwrap(); + let file_size = store.head(&location).await.unwrap().size; + + let file_reader = AvroObjectReader::new(store, location); + let schema = get_alltypes_schema_with_tz("UTC"); + let reader_schema = AvroSchema::try_from(schema.as_ref()).unwrap(); + + // Specify the time zone ID of "UTC" for timestamp fields with time zone. + let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024) + .with_reader_schema(reader_schema) + .with_tz(Tz::Utc) + .try_build() + .await + .unwrap(); + + let batches: Vec = reader.try_collect().await.unwrap(); + let batch = &batches[0]; + + assert_eq!(batch.num_columns(), 11); + + let schema = batch.schema(); + let ts_field = schema.field_with_name("timestamp_col").unwrap(); + assert!( + matches!( + ts_field.data_type(), + DataType::Timestamp(TimeUnit::Microsecond, Some(tz)) if tz.as_ref() == "UTC" + ), + "expected Timestamp(Microsecond, Some(\"UTC\")), got {:?}", + ts_field.data_type() + ); + } + #[tokio::test] async fn test_with_utf8_view_enabled() { // Test that utf8_view produces StringViewArray instead of StringArray diff --git a/arrow-avro/src/reader/mod.rs b/arrow-avro/src/reader/mod.rs index 63b61b601e00..84d41cf9c6b1 100644 --- a/arrow-avro/src/reader/mod.rs +++ b/arrow-avro/src/reader/mod.rs @@ -478,7 +478,7 @@ //! descriptive error. Populate the store up front to avoid this. //! //! --- -use crate::codec::AvroFieldBuilder; +use crate::codec::{AvroFieldBuilder, Tz}; use crate::errors::AvroError; use crate::reader::header::read_header; use crate::schema::{ @@ -967,6 +967,7 @@ pub struct ReaderBuilder { batch_size: usize, strict_mode: bool, utf8_view: bool, + tz: Tz, reader_schema: Option, projection: Option>, writer_schema_store: Option, @@ -979,6 +980,7 @@ impl Default for ReaderBuilder { batch_size: 1024, strict_mode: false, utf8_view: false, + tz: Default::default(), reader_schema: None, projection: None, writer_schema_store: None, @@ -993,6 +995,7 @@ impl ReaderBuilder { /// * `batch_size = 1024` /// * `strict_mode = false` /// * `utf8_view = false` + /// * `tz = Tz::OffsetZero` /// * `reader_schema = None` /// * `projection = None` /// * `writer_schema_store = None` @@ -1013,6 +1016,7 @@ impl ReaderBuilder { let root = builder .with_utf8view(self.utf8_view) .with_strict_mode(self.strict_mode) + .with_tz(self.tz) .build()?; RecordDecoder::try_new_with_options(root.data_type()) } @@ -1173,6 +1177,14 @@ impl ReaderBuilder { self } + /// Sets the timezone representation for Avro timestamp fields. + /// + /// The default is `Tz::OffsetZero`, meaning the "+00:00" time zone ID. + pub fn with_tz(mut self, tz: Tz) -> Self { + self.tz = tz; + self + } + /// Sets the **reader schema** used during decoding. /// /// If not provided, the writer schema from the OCF header (for `Reader`) or the @@ -1400,7 +1412,7 @@ impl RecordBatchReader for Reader { #[cfg(test)] mod test { - use crate::codec::AvroFieldBuilder; + use crate::codec::{AvroFieldBuilder, Tz}; use crate::reader::header::HeaderDecoder; use crate::reader::record::RecordDecoder; use crate::reader::{Decoder, Reader, ReaderBuilder}; @@ -3129,6 +3141,43 @@ mod test { ); } + #[test] + fn test_timestamp_with_utc_tz() { + let path = arrow_test_data("avro/alltypes_plain.avro"); + let reader_schema = + make_reader_schema_with_selected_fields_in_order(&path, &["timestamp_col"]); + let file = File::open(path).unwrap(); + let reader = ReaderBuilder::new() + .with_batch_size(1024) + .with_utf8_view(false) + .with_reader_schema(reader_schema) + .with_tz(Tz::Utc) + .build(BufReader::new(file)) + .unwrap(); + let schema = reader.schema(); + let batches = reader.collect::, _>>().unwrap(); + let batch = arrow::compute::concat_batches(&schema, &batches).unwrap(); + let expected = RecordBatch::try_from_iter_with_nullable([( + "timestamp_col", + Arc::new( + TimestampMicrosecondArray::from_iter_values([ + 1235865600000000, // 2009-03-01T00:00:00.000 + 1235865660000000, // 2009-03-01T00:01:00.000 + 1238544000000000, // 2009-04-01T00:00:00.000 + 1238544060000000, // 2009-04-01T00:01:00.000 + 1233446400000000, // 2009-02-01T00:00:00.000 + 1233446460000000, // 2009-02-01T00:01:00.000 + 1230768000000000, // 2009-01-01T00:00:00.000 + 1230768060000000, // 2009-01-01T00:01:00.000 + ]) + .with_timezone("UTC"), + ) as _, + true, + )]) + .unwrap(); + assert_eq!(batch, expected); + } + #[test] // TODO: avoid requiring snappy for this file #[cfg(feature = "snappy")] diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs index 605d29697392..97cdeed20fc6 100644 --- a/arrow-avro/src/reader/record.rs +++ b/arrow-avro/src/reader/record.rs @@ -19,7 +19,7 @@ use crate::codec::{ AvroDataType, AvroLiteral, Codec, EnumMapping, Promotion, ResolutionInfo, ResolvedField, - ResolvedRecord, ResolvedUnion, + ResolvedRecord, ResolvedUnion, Tz, }; use crate::errors::AvroError; use crate::reader::cursor::AvroCursor; @@ -227,9 +227,9 @@ enum Decoder { Date32(Vec), TimeMillis(Vec), TimeMicros(Vec), - TimestampMillis(bool, Vec), - TimestampMicros(bool, Vec), - TimestampNanos(bool, Vec), + TimestampMillis(Option, Vec), + TimestampMicros(Option, Vec), + TimestampNanos(Option, Vec), Int32ToInt64(Vec), Int32ToFloat32(Vec), Int32ToFloat64(Vec), @@ -347,14 +347,14 @@ impl Decoder { (Codec::Date32, _) => Self::Date32(Vec::with_capacity(DEFAULT_CAPACITY)), (Codec::TimeMillis, _) => Self::TimeMillis(Vec::with_capacity(DEFAULT_CAPACITY)), (Codec::TimeMicros, _) => Self::TimeMicros(Vec::with_capacity(DEFAULT_CAPACITY)), - (Codec::TimestampMillis(is_utc), _) => { - Self::TimestampMillis(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY)) + (Codec::TimestampMillis(tz), _) => { + Self::TimestampMillis(*tz, Vec::with_capacity(DEFAULT_CAPACITY)) } - (Codec::TimestampMicros(is_utc), _) => { - Self::TimestampMicros(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY)) + (Codec::TimestampMicros(tz), _) => { + Self::TimestampMicros(*tz, Vec::with_capacity(DEFAULT_CAPACITY)) } - (Codec::TimestampNanos(is_utc), _) => { - Self::TimestampNanos(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY)) + (Codec::TimestampNanos(tz), _) => { + Self::TimestampNanos(*tz, Vec::with_capacity(DEFAULT_CAPACITY)) } #[cfg(feature = "avro_custom_types")] (Codec::DurationNanos, _) => { @@ -1487,17 +1487,17 @@ impl Decoder { Self::TimeMicros(values) => { Arc::new(flush_primitive::(values, nulls)) } - Self::TimestampMillis(is_utc, values) => Arc::new( + Self::TimestampMillis(tz, values) => Arc::new( flush_primitive::(values, nulls) - .with_timezone_opt(is_utc.then(|| "+00:00")), + .with_timezone_opt(tz.as_ref().map(|tz| tz.to_string())), ), - Self::TimestampMicros(is_utc, values) => Arc::new( + Self::TimestampMicros(tz, values) => Arc::new( flush_primitive::(values, nulls) - .with_timezone_opt(is_utc.then(|| "+00:00")), + .with_timezone_opt(tz.as_ref().map(|tz| tz.to_string())), ), - Self::TimestampNanos(is_utc, values) => Arc::new( + Self::TimestampNanos(tz, values) => Arc::new( flush_primitive::(values, nulls) - .with_timezone_opt(is_utc.then(|| "+00:00")), + .with_timezone_opt(tz.as_ref().map(|tz| tz.to_string())), ), #[cfg(feature = "avro_custom_types")] Self::DurationSecond(values) => { @@ -5498,8 +5498,8 @@ mod tests { } #[test] - fn test_timestamp_nanos_decoding_utc() { - let avro_type = avro_from_codec(Codec::TimestampNanos(true)); + fn test_timestamp_nanos_decoding_offset_zero() { + let avro_type = avro_from_codec(Codec::TimestampNanos(Some(Tz::OffsetZero))); let mut decoder = Decoder::try_new(&avro_type).expect("create TimestampNanos decoder"); let mut data = Vec::new(); for v in [0_i64, 1_i64, -1_i64, 1_234_567_890_i64] { @@ -5523,9 +5523,35 @@ mod tests { } } + #[test] + fn test_timestamp_nanos_decoding_utc() { + let avro_type = avro_from_codec(Codec::TimestampNanos(Some(Tz::Utc))); + let mut decoder = Decoder::try_new(&avro_type).expect("create TimestampNanos decoder"); + let mut data = Vec::new(); + for v in [0_i64, 1_i64, -1_i64, 1_234_567_890_i64] { + data.extend_from_slice(&encode_avro_long(v)); + } + let mut cur = AvroCursor::new(&data); + for _ in 0..4 { + decoder.decode(&mut cur).expect("decode nanos ts"); + } + let array = decoder.flush(None).expect("flush nanos ts"); + let ts = array + .as_any() + .downcast_ref::() + .expect("TimestampNanosecondArray"); + assert_eq!(ts.values(), &[0, 1, -1, 1_234_567_890]); + match ts.data_type() { + DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, tz) => { + assert_eq!(tz.as_deref(), Some("UTC")); + } + other => panic!("expected Timestamp(Nanosecond, Some(\"UTC\")), got {other:?}"), + } + } + #[test] fn test_timestamp_nanos_decoding_local() { - let avro_type = avro_from_codec(Codec::TimestampNanos(false)); + let avro_type = avro_from_codec(Codec::TimestampNanos(None)); let mut decoder = Decoder::try_new(&avro_type).expect("create TimestampNanos decoder"); let mut data = Vec::new(); for v in [10_i64, 20_i64, -30_i64] { @@ -5552,7 +5578,7 @@ mod tests { #[test] fn test_timestamp_nanos_decoding_with_nulls() { let avro_type = AvroDataType::new( - Codec::TimestampNanos(false), + Codec::TimestampNanos(None), Default::default(), Some(Nullability::NullFirst), ); From d42610711d12a03b810bf0297d38a36029093304 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 18 Mar 2026 00:31:25 -0500 Subject: [PATCH 43/80] Add has_true() and has_false() to BooleanArray (#9511) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Motivation When working with `BooleanArray`, a common pattern is checking whether *any* true or false value exists — e.g. `arr.true_count() > 0` or `arr.false_count() == 0`. This currently requires `true_count()` / `false_count()`, which scan the **entire** bitmap to count every set bit (via `popcount`), even though we only need to know if at least one exists. This PR adds `has_true()` and `has_false()` methods that short-circuit as soon as they find a matching value, providing both: 1. **Better performance** — faster on large arrays in the best case 2. **More ergonomic API** — `arr.has_true()` expresses intent more clearly than `arr.true_count() > 0` ## Callsites in DataFusion There are several places in DataFusion that would benefit from these methods: - **`datafusion/functions-nested/src/array_has.rs`** — `eq_array.true_count() > 0` → `eq_array.has_true()` - **`datafusion/physical-plan/src/topk/mod.rs`** — `filter.true_count() == 0` check → `!filter.has_true()` - **`datafusion/datasource-parquet/src/metadata.rs`** — `exactness.true_count() == 0` and `combined_mask.true_count() > 0` - **`datafusion/physical-plan/src/joins/nested_loop_join.rs`** — `bitmap.true_count() == 0` checks - **`datafusion/physical-expr-common/src/physical_expr.rs`** — `selection_count == 0` from `selection.true_count()` - **`datafusion/physical-expr/src/expressions/binary.rs`** — short-circuit checks for AND/OR ## Benchmark Results ``` Scenario true_count has_true has_false Speedup (best) ───────────────────────────────────────────────────────────────────────────────────────────── all_true, 64 4.32 ns 4.08 ns 4.76 ns ~1.1x all_false, 64 4.30 ns 4.25 ns 4.52 ns ~1.0x all_true, 1024 5.15 ns 4.52 ns 4.99 ns ~1.1x all_false, 1024 5.17 ns 4.55 ns 5.00 ns ~1.1x mixed_early, 1024 5.22 ns — 5.04 ns ~1.0x nulls_all_true, 1024 12.84 ns 4.10 ns 12.92 ns ~3.1x all_true, 65536 100.06 ns 5.96 ns 49.70 ns ~16.8x (has_true) all_false, 65536 99.33 ns 49.30 ns 6.19 ns ~16.0x (has_false) mixed_early, 65536 100.10 ns — 6.20 ns ~16.1x (has_false) nulls_all_true, 65536 522.94 ns 4.05 ns 521.82 ns ~129x (has_true) ``` The key wins are on larger arrays (65,536 elements), where `has_true`/`has_false` are **up to 16-129x faster** than `true_count()` in best-case scenarios (early short-circuit). Even in worst case (must scan entire array), performance is comparable to `true_count`. ## Implementation The implementation processes bits in 64-bit chunks using `UnalignedBitChunk`, which handles arbitrary bit offsets and aligns data for SIMD-friendly processing. - **`has_true` (no nulls):** OR-folds 64-bit chunks, short-circuits when any bit is set - **`has_false` (no nulls):** AND-folds 64-bit chunks, short-circuits when any bit is unset (with padding bits masked to 1) - **With nulls:** Iterates paired `(null, value)` chunks, checking `null & value != 0` (has_true) or `null & !value != 0` (has_false) ### Alternatives considered 1. **Fully vectorized (no early stopping):** Would process the entire bitmap like `true_count()` but with simpler bitwise ops instead of popcount. Marginally faster than `true_count()` but misses the main optimization opportunity. 2. **Per-element iteration with early stopping:** `self.iter().any(|v| v == Some(true))`. Simple but processes one bit at a time, missing SIMD vectorization of the inner loop. Our approach processes 64 bits at a time while still supporting early exit. The chosen approach balances SIMD-friendly bulk processing (64 bits per iteration) with early termination, giving the best of both worlds. ## Test Plan - Unit tests covering: all-true, all-false, mixed, empty, nullable (all-valid-true, all-valid-false, all-null), non-aligned lengths (65 elements, 64+1 with trailing false) - Criterion benchmarks comparing `has_true`/`has_false` vs `true_count` across sizes (64, 1024, 65536) and data distributions 🤖 Generated with [Claude Code](https://claude.com/claude-code --------- Co-authored-by: Claude Opus 4.6 --- arrow-array/Cargo.toml | 4 + arrow-array/benches/boolean_array.rs | 77 +++++++++ arrow-array/src/array/boolean_array.rs | 209 ++++++++++++++++++++++++- 3 files changed, 288 insertions(+), 2 deletions(-) create mode 100644 arrow-array/benches/boolean_array.rs diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml index 6be5a6daab56..da8ef98a1084 100644 --- a/arrow-array/Cargo.toml +++ b/arrow-array/Cargo.toml @@ -92,3 +92,7 @@ harness = false [[bench]] name = "record_batch" harness = false + +[[bench]] +name = "boolean_array" +harness = false diff --git a/arrow-array/benches/boolean_array.rs b/arrow-array/benches/boolean_array.rs new file mode 100644 index 000000000000..03b601075bb8 --- /dev/null +++ b/arrow-array/benches/boolean_array.rs @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_array::BooleanArray; +use criterion::*; +use std::hint; + +fn criterion_benchmark(c: &mut Criterion) { + for len in [64, 1024, 65536] { + // All true (no nulls) + let all_true = BooleanArray::from(vec![true; len]); + c.bench_function(&format!("true_count(all_true, {len})"), |b| { + b.iter(|| hint::black_box(&all_true).true_count()); + }); + c.bench_function(&format!("has_true(all_true, {len})"), |b| { + b.iter(|| hint::black_box(&all_true).has_true()); + }); + c.bench_function(&format!("has_false(all_true, {len})"), |b| { + b.iter(|| hint::black_box(&all_true).has_false()); + }); + + // All false (no nulls) + let all_false = BooleanArray::from(vec![false; len]); + c.bench_function(&format!("true_count(all_false, {len})"), |b| { + b.iter(|| hint::black_box(&all_false).true_count()); + }); + c.bench_function(&format!("has_true(all_false, {len})"), |b| { + b.iter(|| hint::black_box(&all_false).has_true()); + }); + c.bench_function(&format!("has_false(all_false, {len})"), |b| { + b.iter(|| hint::black_box(&all_false).has_false()); + }); + + // Mixed: first element differs (best-case short-circuit) + let mut mixed_early: Vec = vec![true; len]; + mixed_early[0] = false; + let mixed_early = BooleanArray::from(mixed_early); + c.bench_function(&format!("true_count(mixed_early, {len})"), |b| { + b.iter(|| hint::black_box(&mixed_early).true_count()); + }); + c.bench_function(&format!("has_false(mixed_early, {len})"), |b| { + b.iter(|| hint::black_box(&mixed_early).has_false()); + }); + + // With nulls: all valid values true + let with_nulls: Vec> = (0..len) + .map(|i| if i % 10 == 0 { None } else { Some(true) }) + .collect(); + let with_nulls = BooleanArray::from(with_nulls); + c.bench_function(&format!("true_count(nulls_all_true, {len})"), |b| { + b.iter(|| hint::black_box(&with_nulls).true_count()); + }); + c.bench_function(&format!("has_true(nulls_all_true, {len})"), |b| { + b.iter(|| hint::black_box(&with_nulls).has_true()); + }); + c.bench_function(&format!("has_false(nulls_all_true, {len})"), |b| { + b.iter(|| hint::black_box(&with_nulls).has_false()); + }); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 582627b24396..1a2dd986ad25 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -19,6 +19,7 @@ use crate::array::print_long_array; use crate::builder::BooleanBuilder; use crate::iterator::BooleanIter; use crate::{Array, ArrayAccessor, ArrayRef, Scalar}; +use arrow_buffer::bit_chunk_iterator::UnalignedBitChunk; use arrow_buffer::{BooleanBuffer, Buffer, MutableBuffer, NullBuffer, bit_util}; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::DataType; @@ -156,7 +157,18 @@ impl BooleanArray { &self.values } - /// Returns the number of non null, true values within this array + /// Block size for chunked fold operations in [`Self::has_true`] and [`Self::has_false`]. + /// Folding this many u64 chunks at a time allows the compiler to autovectorize + /// the inner loop while still enabling short-circuit exits. + const CHUNK_FOLD_BLOCK_SIZE: usize = 64; + + /// Returns an [`UnalignedBitChunk`] over this array's values. + fn unaligned_bit_chunks(&self) -> UnalignedBitChunk<'_> { + UnalignedBitChunk::new(self.values().values(), self.values().offset(), self.len()) + } + + /// Returns the number of non null, true values within this array. + /// If you only need to check if there is at least one true value, consider using `has_true()` which can short-circuit and be more efficient. pub fn true_count(&self) -> usize { match self.nulls() { Some(nulls) => { @@ -171,11 +183,80 @@ impl BooleanArray { } } - /// Returns the number of non null, false values within this array + /// Returns the number of non null, false values within this array. + /// If you only need to check if there is at least one false value, consider using `has_false()` which can short-circuit and be more efficient. pub fn false_count(&self) -> usize { self.len() - self.null_count() - self.true_count() } + /// Returns whether there is at least one non-null `true` value in this array. + /// + /// This is more efficient than `true_count() > 0` because it can short-circuit + /// as soon as a `true` value is found, without counting all set bits. + /// + /// Null values are not counted as `true`. Returns `false` for empty arrays. + pub fn has_true(&self) -> bool { + match self.nulls() { + Some(nulls) => { + let null_chunks = nulls.inner().bit_chunks().iter_padded(); + let value_chunks = self.values().bit_chunks().iter_padded(); + null_chunks.zip(value_chunks).any(|(n, v)| (n & v) != 0) + } + None => { + let bit_chunks = self.unaligned_bit_chunks(); + bit_chunks.prefix().unwrap_or(0) != 0 + || bit_chunks + .chunks() + .chunks(Self::CHUNK_FOLD_BLOCK_SIZE) + .any(|block| block.iter().fold(0u64, |acc, &c| acc | c) != 0) + || bit_chunks.suffix().unwrap_or(0) != 0 + } + } + } + + /// Returns whether there is at least one non-null `false` value in this array. + /// + /// This is more efficient than `false_count() > 0` because it can short-circuit + /// as soon as a `false` value is found, without counting all set bits. + /// + /// Null values are not counted as `false`. Returns `false` for empty arrays. + pub fn has_false(&self) -> bool { + match self.nulls() { + Some(nulls) => { + let null_chunks = nulls.inner().bit_chunks().iter_padded(); + let value_chunks = self.values().bit_chunks().iter_padded(); + null_chunks.zip(value_chunks).any(|(n, v)| (n & !v) != 0) + } + None => { + let bit_chunks = self.unaligned_bit_chunks(); + // UnalignedBitChunk zeros padding bits; fill them with 1s so + // they don't appear as false values. + let lead_mask = !((1u64 << bit_chunks.lead_padding()) - 1); + let trail_mask = if bit_chunks.trailing_padding() == 0 { + u64::MAX + } else { + (1u64 << (64 - bit_chunks.trailing_padding())) - 1 + }; + let (prefix_fill, suffix_fill) = match (bit_chunks.prefix(), bit_chunks.suffix()) { + (Some(_), Some(_)) => (!lead_mask, !trail_mask), + (Some(_), None) => (!lead_mask | !trail_mask, 0), + (None, Some(_)) => (0, !trail_mask), + (None, None) => (0, 0), + }; + bit_chunks + .prefix() + .is_some_and(|v| (v | prefix_fill) != u64::MAX) + || bit_chunks + .chunks() + .chunks(Self::CHUNK_FOLD_BLOCK_SIZE) + .any(|block| block.iter().fold(u64::MAX, |acc, &c| acc & c) != u64::MAX) + || bit_chunks + .suffix() + .is_some_and(|v| (v | suffix_fill) != u64::MAX) + } + } + } + /// Returns the boolean value at index `i`. /// /// Note: This method does not check for nulls and the value is arbitrary @@ -854,4 +935,128 @@ mod tests { assert!(sliced.is_valid(1)); assert!(!sliced.value(1)); } + + #[test] + fn test_has_true_has_false_all_true() { + let arr = BooleanArray::from(vec![true, true, true]); + assert!(arr.has_true()); + assert!(!arr.has_false()); + } + + #[test] + fn test_has_true_has_false_all_false() { + let arr = BooleanArray::from(vec![false, false, false]); + assert!(!arr.has_true()); + assert!(arr.has_false()); + } + + #[test] + fn test_has_true_has_false_mixed() { + let arr = BooleanArray::from(vec![true, false, true]); + assert!(arr.has_true()); + assert!(arr.has_false()); + } + + #[test] + fn test_has_true_has_false_empty() { + let arr = BooleanArray::from(Vec::::new()); + assert!(!arr.has_true()); + assert!(!arr.has_false()); + } + + #[test] + fn test_has_true_has_false_nulls_all_valid_true() { + let arr = BooleanArray::from(vec![Some(true), None, Some(true)]); + assert!(arr.has_true()); + assert!(!arr.has_false()); + } + + #[test] + fn test_has_true_has_false_nulls_all_valid_false() { + let arr = BooleanArray::from(vec![Some(false), None, Some(false)]); + assert!(!arr.has_true()); + assert!(arr.has_false()); + } + + #[test] + fn test_has_true_has_false_all_null() { + let arr = BooleanArray::new_null(5); + assert!(!arr.has_true()); + assert!(!arr.has_false()); + } + + #[test] + fn test_has_false_aligned_suffix_all_true() { + let arr = BooleanArray::from(vec![true; 129]); + assert!(arr.has_true()); + assert!(!arr.has_false()); + } + + #[test] + fn test_has_false_non_aligned_all_true() { + // 65 elements: exercises the remainder path in has_false + let arr = BooleanArray::from(vec![true; 65]); + assert!(arr.has_true()); + assert!(!arr.has_false()); + } + + #[test] + fn test_has_false_non_aligned_last_false() { + // 64 trues + 1 false: remainder path should find the false + let mut values = vec![true; 64]; + values.push(false); + let arr = BooleanArray::from(values); + assert!(arr.has_true()); + assert!(arr.has_false()); + } + + #[test] + fn test_has_false_exact_64_all_true() { + // Exactly 64 elements, no remainder + let arr = BooleanArray::from(vec![true; 64]); + assert!(arr.has_true()); + assert!(!arr.has_false()); + } + + #[test] + fn test_has_true_has_false_unaligned_slices() { + let cases = [ + (1, 129, true, false), + (3, 130, true, false), + (5, 65, true, false), + (7, 64, true, false), + ]; + + let base = BooleanArray::from(vec![true; 300]); + + for (offset, len, expected_has_true, expected_has_false) in cases { + let arr = base.slice(offset, len); + assert_eq!( + arr.has_true(), + expected_has_true, + "offset={offset} len={len}" + ); + assert_eq!( + arr.has_false(), + expected_has_false, + "offset={offset} len={len}" + ); + } + } + + #[test] + fn test_has_true_has_false_exact_multiples_of_64() { + let cases = [ + (64, true, false), + (128, true, false), + (192, true, false), + (256, true, false), + ]; + + for (len, expected_has_true, expected_has_false) in cases { + let arr = BooleanArray::from(vec![true; len]); + assert_eq!(arr.has_true(), expected_has_true, "len={len}"); + assert_eq!(arr.has_false(), expected_has_false, "len={len}"); + } + } } From 19889a33f63427c4b22ab3b7fcb62b77dbe9ddec Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 18 Mar 2026 02:58:01 -0500 Subject: [PATCH 44/80] Use chunks_exact for has_true/has_false to enable compiler unrolling (#9570) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Replace `.chunks(64)` with `.chunks_exact(16)` in `has_true()` and `has_false()` as suggested in https://github.com/apache/arrow-rs/pull/9511#discussion_r2950942579 - With `chunks_exact`, the compiler can fully unroll the inner fold (guaranteed size, no inner branch/loop), allowing a smaller block size for more frequent short-circuit exits without regressing the full-scan path ## Benchmark results (block size 16 vs baseline) - Full-scan worst case (65536): No regression (~49ns both) - Early-exit cases (65536): ~27% faster (6.0ns → 4.4ns) - Small arrays (64, 1024): Unchanged ## Test plan - [x] All 13 existing `test_has` tests pass run benchmarks boolean_array @DanDanDan Would appreciate your review! 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 (1M context) --- arrow-array/src/array/boolean_array.rs | 29 ++++++++++++++------------ 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 1a2dd986ad25..ee3413e1833d 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -158,9 +158,9 @@ impl BooleanArray { } /// Block size for chunked fold operations in [`Self::has_true`] and [`Self::has_false`]. - /// Folding this many u64 chunks at a time allows the compiler to autovectorize - /// the inner loop while still enabling short-circuit exits. - const CHUNK_FOLD_BLOCK_SIZE: usize = 64; + /// Using `chunks_exact` with this size lets the compiler fully unroll the inner + /// fold (no inner branch/loop), enabling short-circuit exits every N chunks. + const CHUNK_FOLD_BLOCK_SIZE: usize = 16; /// Returns an [`UnalignedBitChunk`] over this array's values. fn unaligned_bit_chunks(&self) -> UnalignedBitChunk<'_> { @@ -204,11 +204,12 @@ impl BooleanArray { } None => { let bit_chunks = self.unaligned_bit_chunks(); - bit_chunks.prefix().unwrap_or(0) != 0 - || bit_chunks - .chunks() - .chunks(Self::CHUNK_FOLD_BLOCK_SIZE) - .any(|block| block.iter().fold(0u64, |acc, &c| acc | c) != 0) + let chunks = bit_chunks.chunks(); + let mut exact = chunks.chunks_exact(Self::CHUNK_FOLD_BLOCK_SIZE); + let found = bit_chunks.prefix().unwrap_or(0) != 0 + || exact.any(|block| block.iter().fold(0u64, |acc, &c| acc | c) != 0); + found + || exact.remainder().iter().any(|&c| c != 0) || bit_chunks.suffix().unwrap_or(0) != 0 } } @@ -243,13 +244,15 @@ impl BooleanArray { (None, Some(_)) => (0, !trail_mask), (None, None) => (0, 0), }; - bit_chunks + let chunks = bit_chunks.chunks(); + let mut exact = chunks.chunks_exact(Self::CHUNK_FOLD_BLOCK_SIZE); + let found = bit_chunks .prefix() .is_some_and(|v| (v | prefix_fill) != u64::MAX) - || bit_chunks - .chunks() - .chunks(Self::CHUNK_FOLD_BLOCK_SIZE) - .any(|block| block.iter().fold(u64::MAX, |acc, &c| acc & c) != u64::MAX) + || exact + .any(|block| block.iter().fold(u64::MAX, |acc, &c| acc & c) != u64::MAX); + found + || exact.remainder().iter().any(|&c| c != u64::MAX) || bit_chunks .suffix() .is_some_and(|v| (v | suffix_fill) != u64::MAX) From e3926a96b7b807e54cb303791a3d31cd9591357b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Wed, 18 Mar 2026 14:04:03 +0100 Subject: [PATCH 45/80] Add mutable operations to BooleanBuffer (Bit*Assign) (#9567) # Which issue does this PR close? - Closes #NNN. # Rationale for this change I want to avoid allocating a new buffer when doing `&`. We can use `&=` this way. # What changes are included in this PR? # Are these changes tested? # Are there any user-facing changes? --------- Co-authored-by: Andrew Lamb --- arrow-buffer/src/buffer/boolean.rs | 125 ++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs index c1c7529e0a2d..bae083b3b208 100644 --- a/arrow-buffer/src/buffer/boolean.rs +++ b/arrow-buffer/src/buffer/boolean.rs @@ -23,7 +23,7 @@ use crate::{ buffer_bin_xor, }; -use std::ops::{BitAnd, BitOr, BitXor, Not}; +use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not}; /// A slice-able [`Buffer`] containing bit-packed booleans /// @@ -67,6 +67,27 @@ use std::ops::{BitAnd, BitOr, BitXor, Not}; /// Note that the bits marked `?` are not logically part of the mask and may /// contain either `0` or `1` /// +/// # Bitwise Operations +/// +/// `BooleanBuffer` implements the standard bitwise traits for creating a new +/// buffer ([`BitAnd`], [`BitOr`], [`BitXor`], [`Not`]) as well as the assign variants +/// for updating an existing buffer in place when possible ([`BitAndAssign`], +/// [`BitOrAssign`], [`BitXorAssign`]). +/// +/// ``` +/// # use arrow_buffer::BooleanBuffer; +/// let mut left = BooleanBuffer::from(&[true, false, true, true] as &[bool]); +/// let right = BooleanBuffer::from(&[true, true, false, true] as &[bool]); +/// +/// // Create a new buffer by applying bitwise AND +/// let anded = &left & &right; +/// assert_eq!(anded, BooleanBuffer::from(&[true, false, false, true] as &[bool])); +/// +/// // Update `left` in place by applying bitwise AND in place +/// left &= &right; +/// assert_eq!(left, BooleanBuffer::from(&[true, false, false, true] as &[bool])); +/// ``` +/// /// # See Also /// * [`BooleanBufferBuilder`] for building [`BooleanBuffer`] instances /// * [`NullBuffer`] for representing null values in Arrow arrays @@ -497,6 +518,49 @@ impl BooleanBuffer { self.buffer.claim(pool); } + /// Apply a bitwise binary operation to `self`. + /// + /// If the underlying buffer is uniquely owned, reuses the allocation + /// and updates the bytes in place. If the underlying buffer is shared, + /// returns a newly allocated buffer. + /// + /// # API Notes + /// + /// If the buffer is reused, the result preserves the existing offset, which + /// may be non-zero. + fn bitwise_bin_op_assign(&mut self, rhs: &BooleanBuffer, op: F) + where + F: FnMut(u64, u64) -> u64, + { + assert_eq!(self.bit_len, rhs.bit_len); + // Try to mutate in place if the buffer is uniquely owned + let buffer = std::mem::take(&mut self.buffer); + match buffer.into_mutable() { + Ok(mut buf) => { + bit_util::apply_bitwise_binary_op( + &mut buf, + self.bit_offset, + &rhs.buffer, + rhs.bit_offset, + self.bit_len, + op, + ); + self.buffer = buf.into(); + } + Err(buf) => { + self.buffer = buf; + *self = BooleanBuffer::from_bitwise_binary_op( + self.values(), + self.bit_offset, + rhs.values(), + rhs.bit_offset, + self.bit_len, + op, + ); + } + } + } + /// Returns an iterator over the bits in this [`BooleanBuffer`] pub fn iter(&self) -> BitIterator<'_> { self.into_iter() @@ -583,6 +647,24 @@ impl BitXor<&BooleanBuffer> for &BooleanBuffer { } } +impl BitAndAssign<&BooleanBuffer> for BooleanBuffer { + fn bitand_assign(&mut self, rhs: &BooleanBuffer) { + self.bitwise_bin_op_assign(rhs, |a, b| a & b); + } +} + +impl BitOrAssign<&BooleanBuffer> for BooleanBuffer { + fn bitor_assign(&mut self, rhs: &BooleanBuffer) { + self.bitwise_bin_op_assign(rhs, |a, b| a | b); + } +} + +impl BitXorAssign<&BooleanBuffer> for BooleanBuffer { + fn bitxor_assign(&mut self, rhs: &BooleanBuffer) { + self.bitwise_bin_op_assign(rhs, |a, b| a ^ b); + } +} + impl<'a> IntoIterator for &'a BooleanBuffer { type Item = bool; type IntoIter = BitIterator<'a>; @@ -729,6 +811,47 @@ mod tests { assert_eq!(boolean_buf1 ^ boolean_buf2, expected); } + #[test] + fn test_boolean_bitand_assign_shared_and_unshared() { + let rhs = BooleanBuffer::from(&[true, true, false, true, false, true][..]); + let original = BooleanBuffer::from(&[true, false, true, true, true, false][..]); + + let mut unshared = BooleanBuffer::from(&[true, false, true, true, true, false][..]); + unshared &= &rhs; + + let mut shared = original.clone(); + let _shared_owner = shared.clone(); + shared &= &rhs; + + let expected = &original & &rhs; + assert_eq!(unshared, expected); + assert_eq!(shared, expected); + } + + #[test] + fn test_boolean_bitor_assign() { + let rhs = BooleanBuffer::from(&[true, true, false, true, false, true][..]); + let original = BooleanBuffer::from(&[true, false, true, true, true, false][..]); + + let mut actual = original.clone(); + actual |= &rhs; + + let expected = &original | &rhs; + assert_eq!(actual, expected); + } + + #[test] + fn test_boolean_bitxor_assign() { + let rhs = BooleanBuffer::from(&[true, true, false, true, false, true][..]); + let original = BooleanBuffer::from(&[true, false, true, true, true, false][..]); + + let mut actual = original.clone(); + actual ^= &rhs; + + let expected = &original ^ &rhs; + assert_eq!(actual, expected); + } + #[test] fn test_boolean_not() { let offset = 0; From edc3cb78b4e2b9bf6a21e4c522d0f9e90fa10532 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alfonso=20Subiotto=20Marqu=C3=A9s?= Date: Wed, 18 Mar 2026 14:25:02 +0100 Subject: [PATCH 46/80] arrow-select: fix MutableArrayData interleave for ListView (#9560) The previous code did not extend child data buffers. I'm preparing a PR for an optimized listview interleave, but wanted to make sure the fallback path was correct before comparing benchmarks. # Which issue does this PR close? - Closes #9559 - Closes https://github.com/apache/arrow-rs/pull/9562 - https://github.com/apache/arrow-rs/issues/9561 # Rationale for this change Fix a bug # What changes are included in this PR? Bugfix and test # Are these changes tested? Yes # Are there any user-facing changes? ListView interleaves did not succeed previously. Signed-off-by: Alfonso Subiotto Marques --- arrow-data/src/transform/list_view.rs | 23 ++++++----- arrow-select/src/interleave.rs | 55 +++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/arrow-data/src/transform/list_view.rs b/arrow-data/src/transform/list_view.rs index 9b66a6a6abb1..f01e14b978c9 100644 --- a/arrow-data/src/transform/list_view.rs +++ b/arrow-data/src/transform/list_view.rs @@ -27,21 +27,20 @@ pub(super) fn build_extend( let offsets = array.buffer::(0); let sizes = array.buffer::(1); Box::new( - move |mutable: &mut _MutableArrayData, _index: usize, start: usize, len: usize| { - let offset_buffer = &mut mutable.buffer1; - let sizes_buffer = &mut mutable.buffer2; + move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| { + let mut new_offset = T::usize_as(mutable.child_data[0].len()); - for &offset in &offsets[start..start + len] { - offset_buffer.push(offset); - } + for i in start..start + len { + mutable.buffer1.push(new_offset); + mutable.buffer2.push(sizes[i]); + new_offset = new_offset.checked_add(&sizes[i]).expect("offset overflow"); - // sizes - for &size in &sizes[start..start + len] { - sizes_buffer.push(size); + let size = sizes[i].as_usize(); + if size > 0 { + let child_start = offsets[i].as_usize(); + mutable.child_data[0].extend(index, child_start, child_start + size); + } } - - // the beauty of views is that we don't need to copy child_data, we just splat - // the offsets and sizes. }, ) } diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index 711e816f70d5..f5904bc171ee 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -582,6 +582,7 @@ mod tests { use arrow_array::Int32RunArray; use arrow_array::builder::{GenericListBuilder, Int32Builder, PrimitiveRunBuilder}; use arrow_array::types::Int8Type; + use arrow_buffer::ScalarBuffer; use arrow_schema::Field; #[test] @@ -1489,4 +1490,58 @@ mod tests { Err(ArrowError::OffsetOverflowError(_)) )); } + + #[test] + fn test_interleave_list_view() { + // `interleave` for ListView falls through to `interleave_fallback`, which uses + // `MutableArrayData`. `list_view::build_extend` copies offsets/sizes but never + // extends the child array, so the result contains offsets/sizes that reference + // positions in the now-absent original child arrays while the child is empty. + // + // lv_a: [[1, 2], [3]] (values=[1,2,3], offsets=[0,2], sizes=[2,1]) + // lv_b: [[4, 5, 6]] (values=[4,5,6], offsets=[0], sizes=[3]) + // interleave at [(0,0), (1,0), (0,1)] should produce [[1, 2], [4, 5, 6], [3]] + let field = Arc::new(Field::new_list_field(DataType::Int64, false)); + + let lv_a = ListViewArray::new( + Arc::clone(&field), + ScalarBuffer::from(vec![0i32, 2]), + ScalarBuffer::from(vec![2i32, 1]), + Arc::new(Int64Array::from(vec![1_i64, 2, 3])), + None, + ); + let lv_b = ListViewArray::new( + field, + ScalarBuffer::from(vec![0i32]), + ScalarBuffer::from(vec![3i32]), + Arc::new(Int64Array::from(vec![4_i64, 5, 6])), + None, + ); + + let result = interleave( + &[&lv_a as &dyn Array, &lv_b as &dyn Array], + &[(0, 0), (1, 0), (0, 1)], + ) + .unwrap(); + + result + .to_data() + .validate_full() + .expect("interleaved ListViewArray must be internally consistent"); + + let result_lv = result.as_list_view::(); + assert_eq!(result_lv.len(), 3); + assert_eq!( + result_lv.value(0).as_primitive::().values(), + &[1, 2] + ); + assert_eq!( + result_lv.value(1).as_primitive::().values(), + &[4, 5, 6] + ); + assert_eq!( + result_lv.value(2).as_primitive::().values(), + &[3] + ); + } } From 00ad7fca2fc5e09c0da5f56f87edc3a454eec576 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20=C5=9Een?= Date: Wed, 18 Mar 2026 16:25:34 +0300 Subject: [PATCH 47/80] chore: extend record_batch macro to support variables and expressions (#9522) # Which issue does this PR close? - Closes #9245. # Rationale for this change Currently record_batch! macro supports only literal values. In datafusion repository there is also a record_batch! macro that supports this. https://github.com/apache/datafusion/issues/13037 can be closed after Datafusion repository upgrades version # What changes are included in this PR? Extend record_batch! macro to support datafusion equivalent added in: # Are these changes tested? I've actually ported datafusion logic to here. I was not sure if it makes sense to add unit tests for this macro but I can if requested # Are there any user-facing changes? No breaking changes to downstream since this only extends macro --------- Co-authored-by: Andrew Lamb --- arrow-array/src/record_batch.rs | 65 +++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs index cfec969165a9..780e14fd4fbf 100644 --- a/arrow-array/src/record_batch.rs +++ b/arrow-array/src/record_batch.rs @@ -135,6 +135,18 @@ macro_rules! create_array { ($ty: tt, [$($values: expr),*]) => { std::sync::Arc::new(<$crate::create_array!(@from $ty)>::from(vec![$($values),*])) }; + + (Binary, $values: expr) => { + std::sync::Arc::new($crate::BinaryArray::from_vec($values)) + }; + + (LargeBinary, $values: expr) => { + std::sync::Arc::new($crate::LargeBinaryArray::from_vec($values)) + }; + + ($ty: tt, $values: expr) => { + std::sync::Arc::new(<$crate::create_array!(@from $ty)>::from($values)) + }; } /// Creates a record batch from literal slice of values, suitable for rapid @@ -152,10 +164,22 @@ macro_rules! create_array { /// ("c", Utf8, ["alpha", "beta", "gamma"]) /// ); /// ``` +/// +/// Variables and expressions are also supported: +/// +/// ```rust +/// use arrow_array::record_batch; +/// +/// let values = vec![1, 2, 3]; +/// let batch = record_batch!( +/// ("a", Int32, values), +/// ("b", Float64, vec![Some(4.0), None, Some(5.0)]) +/// ); +/// ``` /// Due to limitation of [`create_array!`] macro, support for limited data types is available. #[macro_export] macro_rules! record_batch { - ($(($name: expr, $type: ident, [$($values: expr),*])),*) => { + ($(($name: expr, $type: ident, $($values: tt)+)),*) => { { let schema = std::sync::Arc::new(arrow_schema::Schema::new(vec![ $( @@ -163,16 +187,14 @@ macro_rules! record_batch { )* ])); - let batch = $crate::RecordBatch::try_new( + $crate::RecordBatch::try_new( schema, vec![$( - $crate::create_array!($type, [$($values),*]), + $crate::create_array!($type, $($values)+), )*] - ); - - batch + ) } - } + }; } /// A two-dimensional batch of column-oriented data with a defined @@ -981,6 +1003,35 @@ mod tests { assert_eq!(5, record_batch.column(1).len()); } + #[test] + fn create_binary_record_batch_from_variables() { + let binary_values = vec![b"a".as_slice()]; + let large_binary_values = vec![b"xxx".as_slice()]; + + let record_batch = record_batch!( + ("a", Binary, binary_values), + ("b", LargeBinary, large_binary_values) + ) + .unwrap(); + + assert_eq!(1, record_batch.num_rows()); + assert_eq!(2, record_batch.num_columns()); + assert_eq!( + &DataType::Binary, + record_batch.schema().field(0).data_type() + ); + assert_eq!( + &DataType::LargeBinary, + record_batch.schema().field(1).data_type() + ); + + let binary = record_batch.column(0).as_binary::(); + assert_eq!(b"a", binary.value(0)); + + let large_binary = record_batch.column(1).as_binary::(); + assert_eq!(b"xxx", large_binary.value(0)); + } + #[test] fn byte_size_should_not_regress() { let schema = Schema::new(vec![ From c4b43bb916aa91d366d17013d867992d931aae70 Mon Sep 17 00:00:00 2001 From: Mikhail Zabaluev Date: Wed, 18 Mar 2026 16:11:40 +0200 Subject: [PATCH 48/80] feat(arrow-avro): `HeaderInfo` to expose OCF header (#9548) # Which issue does this PR close? - Closes #9460. # Rationale for this change Rework of #9462 along the lines proposed in https://github.com/apache/arrow-rs/pull/9462#issuecomment-3995541243. # What changes are included in this PR? Add `HeaderInfo` as a cheaply cloneable value to expose header information parsed from an Avro OCF file. Add `read_header_info` function to the `reader` module, and its async counterpart to the `reader::async_reader` module, to read the header from the file reader and return `HeaderInfo`. Add `build_with_header` method to async reader builder to enable reuse of the header with multiple readers. # Are these changes tested? Added a test for the async reader. # Are there any user-facing changes? New API in arrow-avro: * `reader::HeaderInfo` * `reader::read_header_info` and `reader::async_reader::read_header_info` * `build_with_header` method of `AvroAsyncFileReader`'s builder. --------- Co-authored-by: Connor Sanders <170039284+jecsand838@users.noreply.github.com> --- arrow-avro/src/reader/async_reader/builder.rs | 135 +++++++++++------- arrow-avro/src/reader/async_reader/mod.rs | 45 +++++- arrow-avro/src/reader/header.rs | 67 ++++++++- arrow-avro/src/reader/mod.rs | 8 +- 4 files changed, 195 insertions(+), 60 deletions(-) diff --git a/arrow-avro/src/reader/async_reader/builder.rs b/arrow-avro/src/reader/async_reader/builder.rs index 9e979c75669d..d3cca70425de 100644 --- a/arrow-avro/src/reader/async_reader/builder.rs +++ b/arrow-avro/src/reader/async_reader/builder.rs @@ -18,10 +18,10 @@ use crate::codec::{AvroFieldBuilder, Tz}; use crate::errors::AvroError; use crate::reader::async_reader::ReaderState; -use crate::reader::header::{Header, HeaderDecoder}; +use crate::reader::header::{Header, HeaderDecoder, HeaderInfo}; use crate::reader::record::RecordDecoder; use crate::reader::{AsyncAvroFileReader, AsyncFileReader, Decoder}; -use crate::schema::{AvroSchema, FingerprintAlgorithm, SCHEMA_METADATA_KEY}; +use crate::schema::{AvroSchema, FingerprintAlgorithm}; use indexmap::IndexMap; use std::ops::Range; @@ -119,50 +119,71 @@ impl ReaderBuilder { } } -impl ReaderBuilder { - async fn read_header(&mut self) -> Result<(Header, u64), AvroError> { - let mut decoder = HeaderDecoder::default(); - let mut position = 0; - loop { - let range_to_fetch = position - ..(position + self.header_size_hint.unwrap_or(DEFAULT_HEADER_SIZE_HINT)) - .min(self.file_size); +/// Reads the Avro file header (magic, metadata, sync marker) asynchronously from `reader`. +/// +/// On success, returns the parsed [`HeaderInfo`] containing the header and its length in bytes. +pub async fn read_header_info( + reader: &mut R, + file_size: u64, + header_size_hint: Option, +) -> Result +where + R: AsyncFileReader, +{ + read_header(reader, file_size, header_size_hint) + .await + .map(|(header, header_len)| HeaderInfo::new(header, header_len)) +} - // Maybe EOF after the header, no actual data - if range_to_fetch.is_empty() { - break; - } +async fn read_header( + reader: &mut R, + file_size: u64, + header_size_hint: Option, +) -> Result<(Header, u64), AvroError> +where + R: AsyncFileReader, +{ + let mut decoder = HeaderDecoder::default(); + let mut position = 0; + loop { + let range_to_fetch = position + ..(position + header_size_hint.unwrap_or(DEFAULT_HEADER_SIZE_HINT)).min(file_size); - let current_data = self - .reader - .get_bytes(range_to_fetch.clone()) - .await - .map_err(|err| { - AvroError::General(format!( - "Error fetching Avro header from file reader: {err}" - )) - })?; - if current_data.is_empty() { - return Err(AvroError::EOF( - "Unexpected EOF while fetching header data".into(), - )); - } + // Maybe EOF after the header, no actual data + if range_to_fetch.is_empty() { + break; + } - let read = current_data.len(); - let decoded = decoder.decode(¤t_data)?; - if decoded != read { - position += decoded as u64; - break; - } - position += read as u64; + let current_data = reader + .get_bytes(range_to_fetch.clone()) + .await + .map_err(|err| { + AvroError::General(format!( + "Error fetching Avro header from file reader: {err}" + )) + })?; + if current_data.is_empty() { + return Err(AvroError::EOF( + "Unexpected EOF while fetching header data".into(), + )); } - decoder - .flush() - .map(|header| (header, position)) - .ok_or_else(|| AvroError::EOF("Unexpected EOF while reading Avro header".into())) + let read = current_data.len(); + let decoded = decoder.decode(¤t_data)?; + if decoded != read { + position += decoded as u64; + break; + } + position += read as u64; } + decoder + .flush() + .map(|header| (header, position)) + .ok_or_else(|| AvroError::EOF("Unexpected EOF while reading Avro header".into())) +} + +impl ReaderBuilder { /// Build the asynchronous Avro reader with the provided parameters. /// This reads the header first to initialize the reader state. pub async fn try_build(mut self) -> Result, AvroError> { @@ -172,18 +193,24 @@ impl ReaderBuilder { // Start by reading the header from the beginning of the avro file // take the writer schema from the header - let (header, header_len) = self.read_header().await?; - let writer_schema = { - let raw = header.get(SCHEMA_METADATA_KEY).ok_or_else(|| { - AvroError::ParseError("No Avro schema present in file header".to_string()) - })?; - let json_string = std::str::from_utf8(raw) - .map_err(|e| { - AvroError::ParseError(format!("Invalid UTF-8 in Avro schema header: {e}")) - })? - .to_string(); - AvroSchema::new(json_string) - }; + let header_info = + read_header_info(&mut self.reader, self.file_size, self.header_size_hint).await?; + + self.build_with_header(header_info) + } + + /// Build the asynchronous Avro reader with the provided header. + /// + /// This allows initializing the reader with pre-parsed header information. + /// Note that this method is not async because it does not need to perform any I/O operations. + /// + /// Note: Any `header_size_hint` set via [`Self::with_header_size_hint`] is not used + /// when building with a pre-parsed header, since no header fetching occurs. + pub fn build_with_header( + self, + header_info: HeaderInfo, + ) -> Result, AvroError> { + let writer_schema = header_info.writer_schema()?; // If projection exists, project the reader schema, // if no reader schema is provided, parse it from the header(get the raw writer schema), and project that @@ -230,6 +257,7 @@ impl ReaderBuilder { IndexMap::new(), FingerprintAlgorithm::Rabin, ); + let header_len = header_info.header_len(); let range = match self.range { Some(r) => { // If this PartitionedFile's range starts at 0, we need to skip the header bytes. @@ -252,8 +280,9 @@ impl ReaderBuilder { reader: self.reader, } }; - let codec = header.compression()?; - let sync_marker = header.sync(); + + let codec = header_info.compression()?; + let sync_marker = header_info.sync(); Ok(AsyncAvroFileReader::new( range, diff --git a/arrow-avro/src/reader/async_reader/mod.rs b/arrow-avro/src/reader/async_reader/mod.rs index 0aaa739eef85..c034411edb03 100644 --- a/arrow-avro/src/reader/async_reader/mod.rs +++ b/arrow-avro/src/reader/async_reader/mod.rs @@ -15,6 +15,11 @@ // specific language governing permissions and limitations // under the License. +//! Asynchronous implementation of Avro file reader. +//! +//! This module provides [`AsyncAvroFileReader`], which supports reading and decoding +//! the Avro OCF format from any source that implements [`AsyncFileReader`]. + use crate::compression::CompressionCodec; use crate::reader::Decoder; use crate::reader::block::{BlockDecoder, BlockDecoderState}; @@ -32,7 +37,7 @@ mod async_file_reader; mod builder; pub use async_file_reader::AsyncFileReader; -pub use builder::ReaderBuilder; +pub use builder::{ReaderBuilder, read_header_info}; #[cfg(feature = "object_store")] mod store; @@ -1286,6 +1291,44 @@ mod tests { assert_eq!(batch.num_rows(), 8); } + #[tokio::test] + async fn test_builder_with_header_info() { + let file = arrow_test_data("avro/alltypes_plain.avro"); + let store = Arc::new(LocalFileSystem::new()); + let location = Path::from_filesystem_path(&file).unwrap(); + + let file_size = store.head(&location).await.unwrap().size; + + let mut file_reader = AvroObjectReader::new(store, location); + + let header_info = read_header_info(&mut file_reader, file_size, None) + .await + .unwrap(); + + assert_eq!(header_info.header_len(), 675); + + let writer_schema = header_info.writer_schema().unwrap(); + let expected_avro_json: serde_json::Value = serde_json::from_str( + get_alltypes_schema() + .metadata() + .get(SCHEMA_METADATA_KEY) + .unwrap(), + ) + .unwrap(); + let actual_avro_json: serde_json::Value = + serde_json::from_str(&writer_schema.json_string).unwrap(); + assert_eq!(actual_avro_json, expected_avro_json); + + let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024) + .build_with_header(header_info) + .unwrap(); + + let batches: Vec = reader.try_collect().await.unwrap(); + + let batch = &batches[0]; + assert_eq!(batch.num_rows(), 8) + } + #[tokio::test] async fn test_roundtrip_write_then_async_read() { use crate::writer::AvroWriter; diff --git a/arrow-avro/src/reader/header.rs b/arrow-avro/src/reader/header.rs index b5efd8bcdb65..c5593ba0ad70 100644 --- a/arrow-avro/src/reader/header.rs +++ b/arrow-avro/src/reader/header.rs @@ -20,12 +20,17 @@ use crate::compression::{CODEC_METADATA_KEY, CompressionCodec}; use crate::errors::AvroError; use crate::reader::vlq::VLQDecoder; -use crate::schema::{SCHEMA_METADATA_KEY, Schema}; +use crate::schema::{AvroSchema, SCHEMA_METADATA_KEY, Schema}; use std::io::BufRead; +use std::str; +use std::sync::Arc; /// Read the Avro file header (magic, metadata, sync marker) from `reader`. -pub(crate) fn read_header(mut reader: R) -> Result { +/// +/// On success, returns the parsed [`Header`] and the number of bytes read from `reader`. +pub(crate) fn read_header(mut reader: R) -> Result<(Header, u64), AvroError> { let mut decoder = HeaderDecoder::default(); + let mut position = 0; loop { let buf = reader.fill_buf()?; if buf.is_empty() { @@ -34,12 +39,14 @@ pub(crate) fn read_header(mut reader: R) -> Result); + +struct HeaderInfoInner { + header: Header, + header_len: u64, +} + +/// Reads the Avro file header (magic, metadata, sync marker) from `reader`. +/// +/// On success, returns the parsed [`HeaderInfo`] containing the header and its length in bytes. +pub fn read_header_info(reader: R) -> Result { + let (header, header_len) = read_header(reader)?; + Ok(HeaderInfo::new(header, header_len)) +} + +impl HeaderInfo { + pub(crate) fn new(header: Header, header_len: u64) -> Self { + Self(Arc::new(HeaderInfoInner { header, header_len })) + } + + /// Returns the writer schema for this file. + pub fn writer_schema(&self) -> Result { + let raw = self.0.header.get(SCHEMA_METADATA_KEY).ok_or_else(|| { + AvroError::ParseError("No Avro schema present in file header".to_string()) + })?; + let json_string = str::from_utf8(raw) + .map_err(|e| { + AvroError::ParseError(format!("Invalid UTF-8 in Avro schema header: {e}")) + })? + .to_string(); + Ok(AvroSchema::new(json_string)) + } + + /// Returns the [`CompressionCodec`] if any + pub fn compression(&self) -> Result, AvroError> { + self.0.header.compression() + } + + /// Returns the length of the header in bytes. + pub fn header_len(&self) -> u64 { + self.0.header_len + } + + /// Returns the sync token for this file. + pub fn sync(&self) -> [u8; 16] { + self.0.header.sync() + } +} + /// A decoder for [`Header`] /// /// The avro file format does not encode the length of the header, and so it @@ -315,7 +376,7 @@ mod test { fn decode_file(file: &str) -> Header { let file = File::open(file).unwrap(); - read_header(BufReader::with_capacity(1000, file)).unwrap() + read_header(BufReader::with_capacity(1000, file)).unwrap().0 } #[test] diff --git a/arrow-avro/src/reader/mod.rs b/arrow-avro/src/reader/mod.rs index 84d41cf9c6b1..070204f2bcfb 100644 --- a/arrow-avro/src/reader/mod.rs +++ b/arrow-avro/src/reader/mod.rs @@ -500,7 +500,9 @@ mod record; mod vlq; #[cfg(feature = "async")] -mod async_reader; +pub mod async_reader; + +pub use header::{HeaderInfo, read_header_info}; #[cfg(feature = "object_store")] pub use async_reader::AvroObjectReader; @@ -1285,7 +1287,7 @@ impl ReaderBuilder { /// the discovered writer (and optional reader) schema, and prepares to iterate blocks, /// decompressing if necessary. pub fn build(self, mut reader: R) -> Result, ArrowError> { - let header = read_header(&mut reader)?; + let (header, _) = read_header(&mut reader)?; let decoder = self.make_decoder(Some(&header), self.reader_schema.as_ref())?; Ok(Reader { reader, @@ -1644,7 +1646,7 @@ mod test { fn load_writer_schema_json(path: &str) -> Value { let file = File::open(path).unwrap(); - let header = super::read_header(BufReader::new(file)).unwrap(); + let (header, _) = super::read_header(BufReader::new(file)).unwrap(); let schema = header.schema().unwrap().unwrap(); serde_json::to_value(&schema).unwrap() } From 66313ae9a18bd5479c5be97aaaf926fd5f64cdb9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 18 Mar 2026 10:12:12 -0400 Subject: [PATCH 49/80] Bump actions/download-artifact from 7 to 8 (#9488) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 7 to 8.
Release notes

Sourced from actions/download-artifact's releases.

v8.0.0

v8 - What's new

Direct downloads

To support direct uploads in actions/upload-artifact, the action will no longer attempt to unzip all downloaded files. Instead, the action checks the Content-Type header ahead of unzipping and skips non-zipped files. Callers wishing to download a zipped file as-is can also set the new skip-decompress parameter to false.

Enforced checks (breaking)

A previous release introduced digest checks on the download. If a download hash didn't match the expected hash from the server, the action would log a warning. Callers can now configure the behavior on mismatch with the digest-mismatch parameter. To be secure by default, we are now defaulting the behavior to error which will fail the workflow run.

ESM

To support new versions of the @actions/* packages, we've upgraded the package to ESM.

What's Changed

Full Changelog: https://github.com/actions/download-artifact/compare/v7...v8.0.0

Commits
  • 70fc10c Merge pull request #461 from actions/danwkennedy/digest-mismatch-behavior
  • f258da9 Add change docs
  • ccc058e Fix linting issues
  • bd7976b Add a setting to specify what to do on hash mismatch and default it to error
  • ac21fcf Merge pull request #460 from actions/danwkennedy/download-no-unzip
  • 15999bf Add note about package bumps
  • 974686e Bump the version to v8 and add release notes
  • fbe48b1 Update test names to make it clearer what they do
  • 96bf374 One more test fix
  • b8c4819 Fix skip decompress test
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/download-artifact&package-manager=github_actions&previous-version=7&new-version=8)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 12e22abce06d..5762ba1ffce3 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -71,7 +71,7 @@ jobs: steps: - uses: actions/checkout@v6 - name: Download crate docs - uses: actions/download-artifact@v7 + uses: actions/download-artifact@v8 with: name: crate-docs path: website/build From 3b6179658203dc1b1610b67c1777d5b8beb137fc Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Wed, 18 Mar 2026 16:27:12 +0200 Subject: [PATCH 50/80] fix: first next_back() on new RowsIter panics (#9505) # Which issue does this PR close? N/A # Rationale for this change it should not panic # What changes are included in this PR? correctly use last row in `next_back` # Are these changes tested? yes # Are there any user-facing changes? they can now use `next_back` --- arrow-row/src/lib.rs | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 9679c89b4807..078c4574775d 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -1414,9 +1414,12 @@ impl DoubleEndedIterator for RowsIter<'_> { if self.end == self.start { return None; } - // Safety: We have checked that `start` is less than `end` - let row = unsafe { self.rows.row_unchecked(self.end) }; + self.end -= 1; + + // Safety: By construction we create `end >= start`, so if `end` is not equal to `start` it cannot be less than `start` + // therefore `end - 1` is within range + let row = unsafe { self.rows.row_unchecked(self.end) }; Some(row) } } @@ -5651,4 +5654,40 @@ mod tests { .contains("not yet implemented") ); } + + #[test] + fn empty_row_iter_next_back() { + let rows = RowConverter::new(vec![SortField::new(DataType::UInt8)]) + .unwrap() + .empty_rows(0, 0); + let mut rows_iter = rows.iter(); + assert_eq!(rows_iter.next_back(), None); + assert_eq!(rows_iter.next_back(), None); + assert_eq!(rows_iter.next_back(), None); + } + + #[test] + fn row_iter_next_back() { + let row_converter = RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap(); + let mut rng = StdRng::seed_from_u64(42); + let array = generate_primitive_array::(&mut rng, 100, 0.8); + let rows = row_converter.convert_columns(&[Arc::new(array)]).unwrap(); + + let mut rows_iter = rows.iter(); + let mut bytes: Vec = vec![]; + + while let Some(row) = rows_iter.next_back() { + bytes.extend(row.data.iter().rev()); + } + + bytes.reverse(); + + assert_eq!( + bytes, + &rows.buffer.as_slice()[..*rows.offsets.last().unwrap()] + ); + + assert_eq!(rows_iter.next_back(), None); + assert_eq!(rows_iter.next(), None); + } } From c50ea6eaaf484620d4895896400ab0e2ced731ce Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 18 Mar 2026 11:47:48 -0700 Subject: [PATCH 51/80] Optimize delta binary decoder in the case where bitwidth=0 (#9477) # Which issue does this PR close? - Closes #9476. # Rationale for this change Explore if we can achieve the speedups seen in arrow-cpp (https://github.com/apache/arrow/pull/49296). # What changes are included in this PR? Adds special cases to the delta binary packed decoder when bitwidth for a miniblock is 0. The optimization avoids relying on previous values to decode current ones. # Are these changes tested? Yes, tests have been added, as well as new benchmarks. # Are there any user-facing changes? No --- parquet/src/encodings/decoding.rs | 146 +++++++++++++++++++++++++++--- 1 file changed, 135 insertions(+), 11 deletions(-) diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs index 58430820a9b6..7da21e6dd091 100644 --- a/parquet/src/encodings/decoding.rs +++ b/parquet/src/encodings/decoding.rs @@ -770,15 +770,48 @@ where // At this point we have read the deltas to `buffer` we now need to offset // these to get back to the original values that were encoded - for v in &mut buffer[read..read + batch_read] { + // + // Optimization: if the bit_width for the miniblock is 0, then we can employ + // a faster decoding method than setting `value[i] = value[i-1] + value[i] + min_delta`. + // Where min_delta is 0 (all values in the miniblock are the same), we can simply + // set all values to `self.last_value`. In the case of non-zero min_delta (values + // in the mini-block form an arithmetic progression) each value can be computed via + // `value[i] = (i + 1) * min_delta + last_value`. In both cases we remove the + // dependence on the preceding value. + // Kudos to @pitrou for the idea https://github.com/apache/arrow/pull/49296 + let min_delta = self.min_delta.as_i64()?; + if bit_width == 0 { + if min_delta == 0 { + buffer[read..read + batch_read].fill(self.last_value); + } else { + // the c++ version multiplies min_delta by the iter index, but doing + // wrapping_mul through T::T was a bit slower. this is still + // faster than before. + let mut delta = self.min_delta; + for v in &mut buffer[read..read + batch_read] { + *v = self.last_value.wrapping_add(&delta); + delta = delta.wrapping_add(&self.min_delta); + } + + self.last_value = buffer[read + batch_read - 1]; + } + } else { // It is OK for deltas to contain "overflowed" values after encoding, // e.g. i64::MAX - i64::MIN, so we use `wrapping_add` to "overflow" again and // restore original value. - *v = v - .wrapping_add(&self.min_delta) - .wrapping_add(&self.last_value); - - self.last_value = *v; + if min_delta == 0 { + for v in &mut buffer[read..read + batch_read] { + *v = v.wrapping_add(&self.last_value); + self.last_value = *v; + } + } else { + for v in &mut buffer[read..read + batch_read] { + *v = v + .wrapping_add(&self.min_delta) + .wrapping_add(&self.last_value); + self.last_value = *v; + } + } } read += batch_read; @@ -840,12 +873,33 @@ where )); } - for v in &mut skip_buffer[0..skip_count] { - *v = v - .wrapping_add(&self.min_delta) - .wrapping_add(&self.last_value); + // see commentary in self.get() above regarding optimizations + let min_delta = self.min_delta.as_i64()?; + if bit_width == 0 { + // if min_delta == 0, there's nothing to do. self.last_value is unchanged + if min_delta != 0 { + let mut delta = self.min_delta; + for v in &mut skip_buffer[0..skip_count] { + *v = self.last_value.wrapping_add(&delta); + delta = delta.wrapping_add(&self.min_delta); + } + + self.last_value = skip_buffer[skip_count - 1]; + } + } else if min_delta == 0 { + for v in &mut skip_buffer[0..skip_count] { + *v = v.wrapping_add(&self.last_value); + + self.last_value = *v; + } + } else { + for v in &mut skip_buffer[0..skip_count] { + *v = v + .wrapping_add(&self.min_delta) + .wrapping_add(&self.last_value); - self.last_value = *v; + self.last_value = *v; + } } skip += mini_block_should_skip; @@ -1802,6 +1856,76 @@ mod tests { ); } + #[test] + fn test_delta_bit_packed_int32_single_value_large() { + let block_data = vec![3; 10240]; + test_delta_bit_packed_decode::(vec![block_data]); + } + + #[test] + fn test_delta_bit_packed_int32_single_value_skip_large() { + let block_data = vec![3; 10240]; + test_skip::(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50); + test_skip::(block_data, Encoding::DELTA_BINARY_PACKED, 5000); + } + + #[test] + fn test_delta_bit_packed_int32_increasing_value_large() { + let block_data = (0i32..10240).collect(); + test_delta_bit_packed_decode::(vec![block_data]); + } + + #[test] + fn test_delta_bit_packed_int32_increasing_value_skip_large() { + let block_data = (0i32..10240).collect::>(); + test_skip::(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50); + test_skip::(block_data, Encoding::DELTA_BINARY_PACKED, 5000); + } + + #[test] + fn test_delta_bit_packed_int32_stepped_value_large() { + let block_data = (0i32..10240).map(|i| i / 2).collect(); + test_delta_bit_packed_decode::(vec![block_data]); + } + + #[test] + fn test_delta_bit_packed_int32_stepped_value_skip_large() { + let block_data = (0i32..10240).map(|i| i / 2).collect::>(); + test_skip::(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50); + test_skip::(block_data, Encoding::DELTA_BINARY_PACKED, 5000); + } + + #[test] + fn test_delta_bit_packed_int32_mixed_large() { + // should be enough for 4 mini-blocks plus a little so we get some + // mixed mini-blocks + const BLOCK_SIZE: i32 = 133; + let block1_data = (0..BLOCK_SIZE).map(|i| (i * 7) % 11).collect(); + let block2_data = vec![3; BLOCK_SIZE as usize]; + let block3_data = (0..BLOCK_SIZE).map(|i| (i * 5) % 13).collect(); + let block4_data = (0..BLOCK_SIZE).collect(); + let block5_data = (0..BLOCK_SIZE).map(|i| (i * 3) % 17).collect(); + test_delta_bit_packed_decode::(vec![ + block1_data, + block2_data, + block3_data, + block4_data, + block5_data, + ]); + } + + #[test] + fn test_delta_bit_packed_int64_single_value_large() { + let block_data = vec![5; 10240]; + test_delta_bit_packed_decode::(vec![block_data]); + } + + #[test] + fn test_delta_bit_packed_int64_increasing_value_large() { + let block_data = (0i64..10240).collect(); + test_delta_bit_packed_decode::(vec![block_data]); + } + #[test] fn test_delta_byte_array_same_arrays() { let data = vec![ From ea3c0509bcee34e1e85152db56d085c19ae05e9c Mon Sep 17 00:00:00 2001 From: Peter L Date: Thu, 19 Mar 2026 05:44:35 +1030 Subject: [PATCH 52/80] Add `claim` method to recordbatch for memory accounting (#9433) # Which issue does this PR close? None specifically but aligns with some of the changes in https://github.com/apache/arrow-rs/issues/8137 # Rationale for this change It should be easy to claim a `RecordBatch` in totality with an arrow memory pool # What changes are included in this PR? Adds a few methods to bubble up the `claim` to `RecordBatch` level if the `pool` feature is enabled. # Are these changes tested? Yes & new tests added # Are there any user-facing changes? If `pool` feature is added, a new `claim` method on `RecordBatch` and associated structs --- arrow-array/src/record_batch.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs index 780e14fd4fbf..f400ac4d0de9 100644 --- a/arrow-array/src/record_batch.rs +++ b/arrow-array/src/record_batch.rs @@ -795,6 +795,20 @@ impl RecordBatch { RecordBatch::try_new(schema, columns) } + /// Registers all buffers in this record batch with the provided [`MemoryPool`]. + /// + /// This claims memory for all columns in the batch by calling [`Array::claim`] + /// on each column. + /// + /// [`MemoryPool`]: arrow_buffer::MemoryPool + /// [`Array::claim`]: crate::Array::claim + #[cfg(feature = "pool")] + pub fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) { + for column in self.columns() { + column.claim(pool); + } + } + /// Returns the total number of bytes of memory occupied physically by this batch. /// /// Note that this does not always correspond to the exact memory usage of a From 8745c3560ba6b688e3cb8e1599e4da82b4168be4 Mon Sep 17 00:00:00 2001 From: Alexander Rafferty Date: Thu, 19 Mar 2026 06:18:33 +1100 Subject: [PATCH 53/80] Move `ValueIter` into own module, and add public `record_count` function (#9557) # Which issue does this PR close? Another smaller PR extracted from #9494. # Rationale for this change I've moved `ValueIter` into its own module because it's already self-contained, and because that will make it easier to review the changes I have made to `arrow-json/src/reader/schema.rs`. I've also added a public `record_count` function to `ValueIter` - which can be used to simplify consuming code in Datafusion which is currently tracking it separately. # What changes are included in this PR? * Moved `ValueIter` into own module * Added `record_count` method to `ValueIter` # Are these changes tested? Yes. # Are there any user-facing changes? Addition of one new public method, `ValueIter::record_count`. --- arrow-json/src/reader/mod.rs | 2 + arrow-json/src/reader/schema.rs | 80 +-------------------- arrow-json/src/reader/value_iter.rs | 103 ++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 78 deletions(-) create mode 100644 arrow-json/src/reader/value_iter.rs diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs index 786cf9212d04..04271368a4aa 100644 --- a/arrow-json/src/reader/mod.rs +++ b/arrow-json/src/reader/mod.rs @@ -150,6 +150,7 @@ use arrow_array::{RecordBatch, RecordBatchReader, StructArray, downcast_integer, use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType, FieldRef, Schema, SchemaRef, TimeUnit}; pub use schema::*; +pub use value_iter::ValueIter; use crate::reader::boolean_array::BooleanArrayDecoder; use crate::reader::decimal_array::DecimalArrayDecoder; @@ -179,6 +180,7 @@ mod string_view_array; mod struct_array; mod tape; mod timestamp_array; +mod value_iter; /// A builder for [`Reader`] and [`Decoder`] pub struct ReaderBuilder { diff --git a/arrow-json/src/reader/schema.rs b/arrow-json/src/reader/schema.rs index fb7d93a85e12..524e6b2aa560 100644 --- a/arrow-json/src/reader/schema.rs +++ b/arrow-json/src/reader/schema.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use super::ValueIter; use arrow_schema::{ArrowError, DataType, Field, Fields, Schema}; use indexmap::map::IndexMap as HashMap; use indexmap::set::IndexSet as HashSet; @@ -127,83 +128,6 @@ fn generate_schema(spec: HashMap) -> Result { - reader: R, - max_read_records: Option, - record_count: usize, - // reuse line buffer to avoid allocation on each record - line_buf: String, -} - -impl ValueIter { - /// Creates a new `ValueIter` - pub fn new(reader: R, max_read_records: Option) -> Self { - Self { - reader, - max_read_records, - record_count: 0, - line_buf: String::new(), - } - } -} - -impl Iterator for ValueIter { - type Item = Result; - - fn next(&mut self) -> Option { - if let Some(max) = self.max_read_records { - if self.record_count >= max { - return None; - } - } - - loop { - self.line_buf.truncate(0); - match self.reader.read_line(&mut self.line_buf) { - Ok(0) => { - // read_line returns 0 when stream reached EOF - return None; - } - Err(e) => { - return Some(Err(ArrowError::JsonError(format!( - "Failed to read JSON record: {e}" - )))); - } - _ => { - let trimmed_s = self.line_buf.trim(); - if trimmed_s.is_empty() { - // ignore empty lines - continue; - } - - self.record_count += 1; - return Some( - serde_json::from_str(trimmed_s) - .map_err(|e| ArrowError::JsonError(format!("Not valid JSON: {e}"))), - ); - } - } - } - } -} - /// Infer the fields of a JSON file by reading the first n records of the file, with /// `max_read_records` controlling the maximum number of records to read. /// @@ -282,7 +206,7 @@ pub fn infer_json_schema( ) -> Result<(Schema, usize), ArrowError> { let mut values = ValueIter::new(reader, max_read_records); let schema = infer_json_schema_from_iterator(&mut values)?; - Ok((schema, values.record_count)) + Ok((schema, values.record_count())) } fn set_object_scalar_field_type( diff --git a/arrow-json/src/reader/value_iter.rs b/arrow-json/src/reader/value_iter.rs new file mode 100644 index 000000000000..f70b893f52a0 --- /dev/null +++ b/arrow-json/src/reader/value_iter.rs @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::io::BufRead; + +use arrow_schema::ArrowError; +use serde_json::Value; + +/// JSON file reader that produces a serde_json::Value iterator from a Read trait +/// +/// # Example +/// +/// ``` +/// use std::fs::File; +/// use std::io::BufReader; +/// use arrow_json::reader::ValueIter; +/// +/// let mut reader = +/// BufReader::new(File::open("test/data/mixed_arrays.json").unwrap()); +/// let mut value_reader = ValueIter::new(&mut reader, None); +/// for value in value_reader { +/// println!("JSON value: {}", value.unwrap()); +/// } +/// ``` +#[derive(Debug)] +pub struct ValueIter { + reader: R, + max_read_records: Option, + record_count: usize, + // reuse line buffer to avoid allocation on each record + line_buf: String, +} + +impl ValueIter { + /// Creates a new `ValueIter` + pub fn new(reader: R, max_read_records: Option) -> Self { + Self { + reader, + max_read_records, + record_count: 0, + line_buf: String::new(), + } + } + + /// Returns the number of records this iterator has consumed + pub fn record_count(&self) -> usize { + self.record_count + } +} + +impl Iterator for ValueIter { + type Item = Result; + + fn next(&mut self) -> Option { + if let Some(max) = self.max_read_records { + if self.record_count >= max { + return None; + } + } + + loop { + self.line_buf.truncate(0); + match self.reader.read_line(&mut self.line_buf) { + Ok(0) => { + // read_line returns 0 when stream reached EOF + return None; + } + Err(e) => { + return Some(Err(ArrowError::JsonError(format!( + "Failed to read JSON record: {e}" + )))); + } + _ => { + let trimmed_s = self.line_buf.trim(); + if trimmed_s.is_empty() { + // ignore empty lines + continue; + } + + self.record_count += 1; + return Some( + serde_json::from_str(trimmed_s) + .map_err(|e| ArrowError::JsonError(format!("Not valid JSON: {e}"))), + ); + } + } + } + } +} From f4ab49e9f3621e72f875b5da26c0dffae880249c Mon Sep 17 00:00:00 2001 From: Konstantin Tarasov <33369833+sdf-jkl@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:23:24 -0400 Subject: [PATCH 54/80] [Variant] clean up `variant_get` tests (#9518) # Which issue does this PR close? - closes #9517. # Rationale for this change check issue # What changes are included in this PR? - Use `variant_shred` in test macros - Use `VariantArray::from_parts` instead of using `StructArrayBuilder` # Are these changes tested? yes, changes pass same tests # Are there any user-facing changes? no --- parquet-variant-compute/src/shred_variant.rs | 2 - .../src/type_conversion.rs | 2 +- parquet-variant-compute/src/variant_get.rs | 827 ++++++++---------- 3 files changed, 370 insertions(+), 461 deletions(-) diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index d0087fd2c7ea..6520ea700b0c 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -1147,7 +1147,6 @@ mod tests { } #[test] - // TODO(#9518): Drop this once variant_get tests build shredded fixtures via shred_variant. fn test_largeutf8_shredding() { let input = VariantArray::from_iter(vec![ Some(Variant::from("hello")), @@ -1204,7 +1203,6 @@ mod tests { } #[test] - // TODO(#9518): Drop this once variant_get tests build shredded fixtures via shred_variant. fn test_largebinary_shredding() { let input = VariantArray::from_iter(vec![ Some(Variant::from(&b"\x00\x01\x02"[..])), diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 42bac5727aa5..4086a2410792 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -94,7 +94,7 @@ impl_primitive_from_variant!(datatypes::Time32MillisecondType, as_time_utc, |v| } }); impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| { - Some((v.num_seconds_from_midnight() * 1_000_000 + v.nanosecond() / 1_000) as i64) + Some(v.num_seconds_from_midnight() as i64 * 1_000_000 + v.nanosecond() as i64 / 1_000) }); impl_primitive_from_variant!(datatypes::Time64NanosecondType, as_time_utc, |v| { // convert micro to nano seconds diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index e02518057be1..a155d04e4720 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -334,7 +334,9 @@ mod test { use super::{GetOptions, variant_get}; use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder}; - use crate::{VariantArray, VariantArrayBuilder, json_to_variant}; + use crate::{ + VariantArray, VariantArrayBuilder, cast_to_variant, json_to_variant, shred_variant, + }; use arrow::array::{ Array, ArrayRef, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, @@ -463,51 +465,96 @@ mod test { }; } + /// Build a mixed input [typed, null, fallback, typed] and let shred_variant + /// generate the shredded fixture for the requested type. macro_rules! partially_shredded_variant_array_gen { ($func_name:ident, $typed_value_array_gen: expr) => { + partially_shredded_variant_array_gen!( + $func_name, + $typed_value_array_gen, + Variant::from("n/a") + ); + }; + ($func_name:ident, $typed_value_array_gen: expr, $fallback_variant:expr) => { fn $func_name() -> ArrayRef { - // At the time of writing, the `VariantArrayBuilder` does not support shredding. - // so we must construct the array manually. see https://github.com/apache/arrow-rs/issues/7895 - let (metadata, string_value) = { - let mut builder = parquet_variant::VariantBuilder::new(); - builder.append_value("n/a"); - builder.finish() - }; - - let nulls = NullBuffer::from(vec![ - true, // row 0 non null - false, // row 1 is null - true, // row 2 non null - true, // row 3 non null - ]); - - // metadata is the same for all rows - let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4)); - - // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY - // about why row1 is an empty but non null, value. - let values = BinaryViewArray::from(vec![ - None, // row 0 is shredded, so no value - Some(b"" as &[u8]), // row 1 is null, so empty value (why?) - Some(&string_value), // copy the string value "N/A" - None, // row 3 is shredded, so no value - ]); - - let typed_value = $typed_value_array_gen(); - - let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata), false) - .with_field("typed_value", Arc::new(typed_value), true) - .with_field("value", Arc::new(values), true) - .with_nulls(nulls) - .build(); - ArrayRef::from( - VariantArray::try_new(&struct_array).expect("should create variant array"), - ) + let typed_value: ArrayRef = Arc::new($typed_value_array_gen()); + let typed_as_variant = cast_to_variant(typed_value.as_ref()) + .expect("should cast typed array to variant"); + let mut input_builder = VariantArrayBuilder::new(typed_as_variant.len()); + input_builder.append_variant(typed_as_variant.value(0)); + input_builder.append_null(); + input_builder.append_variant($fallback_variant); + input_builder.append_variant(typed_as_variant.value(3)); + + let variant_array = shred_variant(&input_builder.build(), typed_value.data_type()) + .expect("should shred variant array"); + ArrayRef::from(variant_array) } }; } + // Fixture definitions grouped with the partially-shredded tests. + macro_rules! numeric_partially_shredded_variant_array_fn { + ($func:ident, $array_type:ident, $primitive_type:ty) => { + partially_shredded_variant_array_gen!($func, || $array_type::from(vec![ + Some(<$primitive_type>::try_from(34u8).unwrap()), + None, + None, + Some(<$primitive_type>::try_from(100u8).unwrap()), + ])); + }; + } + + numeric_partially_shredded_variant_array_fn!( + partially_shredded_int8_variant_array, + Int8Array, + i8 + ); + numeric_partially_shredded_variant_array_fn!( + partially_shredded_int16_variant_array, + Int16Array, + i16 + ); + numeric_partially_shredded_variant_array_fn!( + partially_shredded_int32_variant_array, + Int32Array, + i32 + ); + numeric_partially_shredded_variant_array_fn!( + partially_shredded_int64_variant_array, + Int64Array, + i64 + ); + numeric_partially_shredded_variant_array_fn!( + partially_shredded_float32_variant_array, + Float32Array, + f32 + ); + numeric_partially_shredded_variant_array_fn!( + partially_shredded_float64_variant_array, + Float64Array, + f64 + ); + + partially_shredded_variant_array_gen!(partially_shredded_bool_variant_array, || { + arrow::array::BooleanArray::from(vec![Some(true), None, None, Some(false)]) + }); + + partially_shredded_variant_array_gen!( + partially_shredded_utf8_variant_array, + || { StringArray::from(vec![Some("hello"), None, None, Some("world")]) }, + Variant::from(42i32) + ); + + partially_shredded_variant_array_gen!(partially_shredded_date32_variant_array, || { + Date32Array::from(vec![ + Some(20348), // 2025-09-17 + None, + None, + Some(20340), // 2025-09-09 + ]) + }); + #[test] fn get_variant_partially_shredded_int8_as_variant() { numeric_partially_shredded_test!(i8, partially_shredded_int8_variant_array); @@ -568,7 +615,7 @@ mod test { // Expect the values are the same as the original values assert_eq!(result.value(0), Variant::from("hello")); assert!(!result.is_valid(1)); - assert_eq!(result.value(2), Variant::from("n/a")); + assert_eq!(result.value(2), Variant::from(42i32)); assert_eq!(result.value(3), Variant::from("world")); } @@ -618,6 +665,153 @@ mod test { assert_eq!(result.value(3), Variant::from(&[4u8, 5u8, 6u8][..])); } + // Timestamp partially-shredded tests grouped with the other partially-shredded cases. + macro_rules! assert_variant_get_as_variant_array_with_default_option { + ($variant_array: expr, $array_expected: expr) => {{ + let options = GetOptions::new(); + let array = $variant_array; + let result = variant_get(&array, options).unwrap(); + let result = VariantArray::try_new(&result).unwrap(); + + assert_eq!(result.len(), $array_expected.len()); + + for (idx, item) in $array_expected.into_iter().enumerate() { + match item { + Some(item) => assert_eq!(result.value(idx), item), + None => assert!(result.is_null(idx)), + } + } + }}; + } + + partially_shredded_variant_array_gen!( + partially_shredded_timestamp_micro_ntz_variant_array, + || { + arrow::array::TimestampMicrosecondArray::from(vec![ + Some(-456000), + None, + None, + Some(1758602096000000), + ]) + } + ); + + #[test] + fn get_variant_partial_shredded_timestamp_micro_ntz_as_variant() { + let array = partially_shredded_timestamp_micro_ntz_variant_array(); + assert_variant_get_as_variant_array_with_default_option!( + array, + vec![ + Some(Variant::from( + DateTime::from_timestamp_micros(-456000i64) + .unwrap() + .naive_utc(), + )), + None, + Some(Variant::from("n/a")), + Some(Variant::from( + DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00") + .unwrap() + .naive_utc(), + )), + ] + ) + } + + partially_shredded_variant_array_gen!(partially_shredded_timestamp_micro_variant_array, || { + arrow::array::TimestampMicrosecondArray::from(vec![ + Some(-456000), + None, + None, + Some(1758602096000000), + ]) + .with_timezone("+00:00") + }); + + #[test] + fn get_variant_partial_shredded_timestamp_micro_as_variant() { + let array = partially_shredded_timestamp_micro_variant_array(); + assert_variant_get_as_variant_array_with_default_option!( + array, + vec![ + Some(Variant::from( + DateTime::from_timestamp_micros(-456000i64) + .unwrap() + .to_utc(), + )), + None, + Some(Variant::from("n/a")), + Some(Variant::from( + DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00") + .unwrap() + .to_utc(), + )), + ] + ) + } + + partially_shredded_variant_array_gen!( + partially_shredded_timestamp_nano_ntz_variant_array, + || { + arrow::array::TimestampNanosecondArray::from(vec![ + Some(-4999999561), + None, + None, + Some(1758602096000000000), + ]) + } + ); + + #[test] + fn get_variant_partial_shredded_timestamp_nano_ntz_as_variant() { + let array = partially_shredded_timestamp_nano_ntz_variant_array(); + assert_variant_get_as_variant_array_with_default_option!( + array, + vec![ + Some(Variant::from( + DateTime::from_timestamp(-5, 439).unwrap().naive_utc() + )), + None, + Some(Variant::from("n/a")), + Some(Variant::from( + DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00") + .unwrap() + .naive_utc() + )), + ] + ) + } + + partially_shredded_variant_array_gen!(partially_shredded_timestamp_nano_variant_array, || { + arrow::array::TimestampNanosecondArray::from(vec![ + Some(-4999999561), + None, + None, + Some(1758602096000000000), + ]) + .with_timezone("+00:00") + }); + + #[test] + fn get_variant_partial_shredded_timestamp_nano_as_variant() { + let array = partially_shredded_timestamp_nano_variant_array(); + assert_variant_get_as_variant_array_with_default_option!( + array, + vec![ + Some(Variant::from( + DateTime::from_timestamp(-5, 439).unwrap().to_utc() + )), + None, + Some(Variant::from("n/a")), + Some(Variant::from( + DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00") + .unwrap() + .to_utc() + )), + ] + ) + } + /// Shredding: extract a value as an Int32Array #[test] fn get_variant_shredded_int32_as_int32_safe_cast() { @@ -836,22 +1030,21 @@ mod test { macro_rules! perfectly_shredded_variant_array_fn { ($func:ident, $typed_value_gen:expr) => { fn $func() -> ArrayRef { - // At the time of writing, the `VariantArrayBuilder` does not support shredding. - // so we must construct the array manually. see https://github.com/apache/arrow-rs/issues/7895 + // Prefer producing fixtures with shred_variant from unshredded input. + // Fall back for remaining non-shreddable test-only Arrow types (currently Null). + let typed_value: ArrayRef = Arc::new($typed_value_gen()); + if let Some(shredded) = cast_to_variant(typed_value.as_ref()) + .ok() + .and_then(|unshredded| shred_variant(&unshredded, typed_value.data_type()).ok()) + { + return shredded.into(); + } + let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n( EMPTY_VARIANT_METADATA_BYTES, - 3, + typed_value.len(), )); - let typed_value = $typed_value_gen(); - - let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata), false) - .with_field("typed_value", Arc::new(typed_value), true) - .build(); - - VariantArray::try_new(&struct_array) - .expect("should create variant array") - .into() + VariantArray::from_parts(metadata, None, Some(typed_value), None).into() } }; } @@ -1435,156 +1628,6 @@ mod test { .unwrap() ); - macro_rules! assert_variant_get_as_variant_array_with_default_option { - ($variant_array: expr, $array_expected: expr) => {{ - let options = GetOptions::new(); - let array = $variant_array; - let result = variant_get(&array, options).unwrap(); - - // expect the result is a VariantArray - let result = VariantArray::try_new(&result).unwrap(); - - assert_eq!(result.len(), $array_expected.len()); - - for (idx, item) in $array_expected.into_iter().enumerate() { - match item { - Some(item) => assert_eq!(result.value(idx), item), - None => assert!(result.is_null(idx)), - } - } - }}; - } - - partially_shredded_variant_array_gen!( - partially_shredded_timestamp_micro_ntz_variant_array, - || { - arrow::array::TimestampMicrosecondArray::from(vec![ - Some(-456000), - None, - None, - Some(1758602096000000), - ]) - } - ); - - #[test] - fn get_variant_partial_shredded_timestamp_micro_ntz_as_variant() { - let array = partially_shredded_timestamp_micro_ntz_variant_array(); - assert_variant_get_as_variant_array_with_default_option!( - array, - vec![ - Some(Variant::from( - DateTime::from_timestamp_micros(-456000i64) - .unwrap() - .naive_utc(), - )), - None, - Some(Variant::from("n/a")), - Some(Variant::from( - DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00") - .unwrap() - .naive_utc(), - )), - ] - ) - } - - partially_shredded_variant_array_gen!(partially_shredded_timestamp_micro_variant_array, || { - arrow::array::TimestampMicrosecondArray::from(vec![ - Some(-456000), - None, - None, - Some(1758602096000000), - ]) - .with_timezone("+00:00") - }); - - #[test] - fn get_variant_partial_shredded_timestamp_micro_as_variant() { - let array = partially_shredded_timestamp_micro_variant_array(); - assert_variant_get_as_variant_array_with_default_option!( - array, - vec![ - Some(Variant::from( - DateTime::from_timestamp_micros(-456000i64) - .unwrap() - .to_utc(), - )), - None, - Some(Variant::from("n/a")), - Some(Variant::from( - DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00") - .unwrap() - .to_utc(), - )), - ] - ) - } - - partially_shredded_variant_array_gen!( - partially_shredded_timestamp_nano_ntz_variant_array, - || { - arrow::array::TimestampNanosecondArray::from(vec![ - Some(-4999999561), - None, - None, - Some(1758602096000000000), - ]) - } - ); - - #[test] - fn get_variant_partial_shredded_timestamp_nano_ntz_as_variant() { - let array = partially_shredded_timestamp_nano_ntz_variant_array(); - - assert_variant_get_as_variant_array_with_default_option!( - array, - vec![ - Some(Variant::from( - DateTime::from_timestamp(-5, 439).unwrap().naive_utc() - )), - None, - Some(Variant::from("n/a")), - Some(Variant::from( - DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00") - .unwrap() - .naive_utc() - )), - ] - ) - } - - partially_shredded_variant_array_gen!(partially_shredded_timestamp_nano_variant_array, || { - arrow::array::TimestampNanosecondArray::from(vec![ - Some(-4999999561), - None, - None, - Some(1758602096000000000), - ]) - .with_timezone("+00:00") - }); - - #[test] - fn get_variant_partial_shredded_timestamp_nano_as_variant() { - let array = partially_shredded_timestamp_nano_variant_array(); - - assert_variant_get_as_variant_array_with_default_option!( - array, - vec![ - Some(Variant::from( - DateTime::from_timestamp(-5, 439).unwrap().to_utc() - )), - None, - Some(Variant::from("n/a")), - Some(Variant::from( - DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00") - .unwrap() - .to_utc() - )), - ] - ) - } - perfectly_shredded_variant_array_fn!(perfectly_shredded_binary_variant_array, || { BinaryArray::from(vec![ Some(b"Apache" as &[u8]), @@ -1642,98 +1685,6 @@ mod test { ]) ); - /// Return a VariantArray that represents a normal "shredded" variant - /// for the following example - /// - /// Based on the example from [the doc] - /// - /// [the doc]: https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?tab=t.0 - /// - /// ```text - /// 34 - /// null (an Arrow NULL, not a Variant::Null) - /// "n/a" (a string) - /// 100 - /// ``` - /// - /// The schema of the corresponding `StructArray` would look like this: - /// - /// ```text - /// StructArray { - /// metadata: BinaryViewArray, - /// value: BinaryViewArray, - /// typed_value: Int32Array, - /// } - /// ``` - macro_rules! numeric_partially_shredded_variant_array_fn { - ($func:ident, $array_type:ident, $primitive_type:ty) => { - partially_shredded_variant_array_gen!($func, || $array_type::from(vec![ - Some(<$primitive_type>::try_from(34u8).unwrap()), // row 0 is shredded, so it has a value - None, // row 1 is null, so no value - None, // row 2 is a string, so no typed value - Some(<$primitive_type>::try_from(100u8).unwrap()), // row 3 is shredded, so it has a value - ])); - }; - } - - numeric_partially_shredded_variant_array_fn!( - partially_shredded_int8_variant_array, - Int8Array, - i8 - ); - numeric_partially_shredded_variant_array_fn!( - partially_shredded_int16_variant_array, - Int16Array, - i16 - ); - numeric_partially_shredded_variant_array_fn!( - partially_shredded_int32_variant_array, - Int32Array, - i32 - ); - numeric_partially_shredded_variant_array_fn!( - partially_shredded_int64_variant_array, - Int64Array, - i64 - ); - numeric_partially_shredded_variant_array_fn!( - partially_shredded_float32_variant_array, - Float32Array, - f32 - ); - numeric_partially_shredded_variant_array_fn!( - partially_shredded_float64_variant_array, - Float64Array, - f64 - ); - - partially_shredded_variant_array_gen!(partially_shredded_bool_variant_array, || { - arrow::array::BooleanArray::from(vec![ - Some(true), // row 0 is shredded, so it has a value - None, // row 1 is null, so no value - None, // row 2 is a string, so no typed value - Some(false), // row 3 is shredded, so it has a value - ]) - }); - - partially_shredded_variant_array_gen!(partially_shredded_utf8_variant_array, || { - StringArray::from(vec![ - Some("hello"), // row 0 is shredded - None, // row 1 is null - None, // row 2 is a string - Some("world"), // row 3 is shredded - ]) - }); - - partially_shredded_variant_array_gen!(partially_shredded_date32_variant_array, || { - Date32Array::from(vec![ - Some(20348), // row 0 is shredded, 2025-09-17 - None, // row 1 is null - None, // row 2 is a string, not a date - Some(20340), // row 3 is shredded, 2025-09-09 - ]) - }); - /// Return a VariantArray that represents an "all null" variant /// for the following example (3 null values): /// @@ -1761,12 +1712,7 @@ mod test { let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3)); - let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata), false) - .with_nulls(nulls) - .build(); - - Arc::new(struct_array) + ArrayRef::from(VariantArray::from_parts(metadata, None, None, Some(nulls))) } /// This test manually constructs a shredded variant array representing objects /// like {"x": 1, "y": "foo"} and {"x": 42} and tests extracting the "x" field @@ -1851,13 +1797,11 @@ mod test { let x_field_typed_value = Int32Array::from(vec![Some(1), Some(42)]); // For perfect shredding of the x field, no "value" column, only typed_value - let x_field_struct = StructArrayBuilder::new() - .with_field("typed_value", Arc::new(x_field_typed_value), true) - .build(); - - // Wrap the x field struct in a ShreddedVariantFieldArray - let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct) - .expect("should create ShreddedVariantFieldArray"); + let x_field_shredded = ShreddedVariantFieldArray::from_parts( + None, + Some(Arc::new(x_field_typed_value) as ArrayRef), + None, + ); // Create the main typed_value as a struct containing the "x" field let typed_value_fields = Fields::from(vec![Field::new( @@ -1873,13 +1817,12 @@ mod test { .unwrap(); // Create the main VariantArray - let main_struct = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata_array), false) - .with_field("value", Arc::new(value_array), true) - .with_field("typed_value", Arc::new(typed_value_struct), true) - .build(); - - Arc::new(main_struct) + ArrayRef::from(VariantArray::from_parts( + metadata_array, + Some(value_array), + Some(Arc::new(typed_value_struct)), + None, + )) } /// Simple test to check if nested paths are supported by current implementation @@ -2231,12 +2174,11 @@ mod test { let x_field_typed_value = Int32Array::from(vec![Some(42), None]); // For the x field, only typed_value (perfect shredding when possible) - let x_field_struct = StructArrayBuilder::new() - .with_field("typed_value", Arc::new(x_field_typed_value), true) - .build(); - - let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct) - .expect("should create ShreddedVariantFieldArray"); + let x_field_shredded = ShreddedVariantFieldArray::from_parts( + None, + Some(Arc::new(x_field_typed_value) as ArrayRef), + None, + ); // Create the main typed_value as a struct containing the "x" field let typed_value_fields = Fields::from(vec![Field::new( @@ -2252,13 +2194,12 @@ mod test { .unwrap(); // Build final VariantArray - let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata_array), false) - .with_field("value", Arc::new(value_array), true) - .with_field("typed_value", Arc::new(typed_value_struct), true) - .build(); - - Arc::new(struct_array) + ArrayRef::from(VariantArray::from_parts( + metadata_array, + Some(value_array), + Some(Arc::new(typed_value_struct)), + None, + )) } /// Create working depth 1 shredded test data based on the existing working pattern @@ -2313,11 +2254,11 @@ mod test { // Create the nested shredded structure // Level 2: x field (the deepest level) let x_typed_value = Int32Array::from(vec![Some(55), None]); - let x_field_struct = StructArrayBuilder::new() - .with_field("typed_value", Arc::new(x_typed_value), true) - .build(); - let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct) - .expect("should create ShreddedVariantFieldArray for x"); + let x_field_shredded = ShreddedVariantFieldArray::from_parts( + None, + Some(Arc::new(x_typed_value) as ArrayRef), + None, + ); // Level 1: a field containing x field + value field for fallbacks // The "a" field needs both typed_value (for shredded x) and value (for fallback cases) @@ -2340,23 +2281,15 @@ mod test { x_field_shredded.data_type().clone(), true, )]); - let a_inner_struct = StructArrayBuilder::new() - .with_field( - "typed_value", - Arc::new( - StructArray::try_new( - a_inner_fields, - vec![ArrayRef::from(x_field_shredded)], - None, - ) - .unwrap(), - ), - true, - ) - .with_field("value", Arc::new(a_value_array), true) - .build(); - let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct) - .expect("should create ShreddedVariantFieldArray for a"); + let a_inner_typed_value = Arc::new( + StructArray::try_new(a_inner_fields, vec![ArrayRef::from(x_field_shredded)], None) + .unwrap(), + ) as ArrayRef; + let a_field_shredded = ShreddedVariantFieldArray::from_parts( + Some(a_value_array), + Some(a_inner_typed_value), + None, + ); // Level 0: main typed_value struct containing a field let typed_value_fields = Fields::from(vec![Field::new( @@ -2372,13 +2305,12 @@ mod test { .unwrap(); // Build final VariantArray - let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata_array), false) - .with_field("value", Arc::new(value_array), true) - .with_field("typed_value", Arc::new(typed_value_struct), true) - .build(); - - Arc::new(struct_array) + ArrayRef::from(VariantArray::from_parts( + metadata_array, + Some(value_array), + Some(Arc::new(typed_value_struct)), + None, + )) } /// Create working depth 2 shredded test data for "a.b.x" paths @@ -2426,11 +2358,11 @@ mod test { // Level 3: x field (deepest level) let x_typed_value = Int32Array::from(vec![Some(100), None, None]); - let x_field_struct = StructArrayBuilder::new() - .with_field("typed_value", Arc::new(x_typed_value), true) - .build(); - let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct) - .expect("should create ShreddedVariantFieldArray for x"); + let x_field_shredded = ShreddedVariantFieldArray::from_parts( + None, + Some(Arc::new(x_typed_value) as ArrayRef), + None, + ); // Level 2: b field containing x field + value field let b_value_data = { @@ -2451,23 +2383,15 @@ mod test { x_field_shredded.data_type().clone(), true, )]); - let b_inner_struct = StructArrayBuilder::new() - .with_field( - "typed_value", - Arc::new( - StructArray::try_new( - b_inner_fields, - vec![ArrayRef::from(x_field_shredded)], - None, - ) - .unwrap(), - ), - true, - ) - .with_field("value", Arc::new(b_value_array), true) - .build(); - let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_inner_struct) - .expect("should create ShreddedVariantFieldArray for b"); + let b_inner_typed_value = Arc::new( + StructArray::try_new(b_inner_fields, vec![ArrayRef::from(x_field_shredded)], None) + .unwrap(), + ) as ArrayRef; + let b_field_shredded = ShreddedVariantFieldArray::from_parts( + Some(b_value_array), + Some(b_inner_typed_value), + None, + ); // Level 1: a field containing b field + value field let a_value_data = { @@ -2488,23 +2412,15 @@ mod test { b_field_shredded.data_type().clone(), true, )]); - let a_inner_struct = StructArrayBuilder::new() - .with_field( - "typed_value", - Arc::new( - StructArray::try_new( - a_inner_fields, - vec![ArrayRef::from(b_field_shredded)], - None, - ) - .unwrap(), - ), - true, - ) - .with_field("value", Arc::new(a_value_array), true) - .build(); - let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct) - .expect("should create ShreddedVariantFieldArray for a"); + let a_inner_typed_value = Arc::new( + StructArray::try_new(a_inner_fields, vec![ArrayRef::from(b_field_shredded)], None) + .unwrap(), + ) as ArrayRef; + let a_field_shredded = ShreddedVariantFieldArray::from_parts( + Some(a_value_array), + Some(a_inner_typed_value), + None, + ); // Level 0: main typed_value struct containing a field let typed_value_fields = Fields::from(vec![Field::new( @@ -2520,13 +2436,12 @@ mod test { .unwrap(); // Build final VariantArray - let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata_array), false) - .with_field("value", Arc::new(value_array), true) - .with_field("typed_value", Arc::new(typed_value_struct), true) - .build(); - - Arc::new(struct_array) + ArrayRef::from(VariantArray::from_parts( + metadata_array, + Some(value_array), + Some(Arc::new(typed_value_struct)), + None, + )) } #[test] @@ -3212,27 +3127,27 @@ mod test { // Create shredded fields with different null patterns // Field "a": present in rows 0,3 (missing in rows 1,2,4) let a_field_typed_value = Int32Array::from(vec![Some(1), None, None, Some(1), None]); - let a_field_struct = StructArrayBuilder::new() - .with_field("typed_value", Arc::new(a_field_typed_value), true) - .build(); - let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_field_struct) - .expect("should create ShreddedVariantFieldArray for a"); + let a_field_shredded = ShreddedVariantFieldArray::from_parts( + None, + Some(Arc::new(a_field_typed_value) as ArrayRef), + None, + ); // Field "b": present in rows 0,2 (missing in rows 1,3,4) let b_field_typed_value = Int32Array::from(vec![Some(2), None, Some(2), None, None]); - let b_field_struct = StructArrayBuilder::new() - .with_field("typed_value", Arc::new(b_field_typed_value), true) - .build(); - let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_field_struct) - .expect("should create ShreddedVariantFieldArray for b"); + let b_field_shredded = ShreddedVariantFieldArray::from_parts( + None, + Some(Arc::new(b_field_typed_value) as ArrayRef), + None, + ); // Field "c": present in row 0 only (missing in all other rows) let c_field_typed_value = Int32Array::from(vec![Some(3), None, None, None, None]); - let c_field_struct = StructArrayBuilder::new() - .with_field("typed_value", Arc::new(c_field_typed_value), true) - .build(); - let c_field_shredded = ShreddedVariantFieldArray::try_new(&c_field_struct) - .expect("should create ShreddedVariantFieldArray for c"); + let c_field_shredded = ShreddedVariantFieldArray::from_parts( + None, + Some(Arc::new(c_field_typed_value) as ArrayRef), + None, + ); // Create main typed_value struct let typed_value_fields = Fields::from(vec![ @@ -3252,13 +3167,12 @@ mod test { .unwrap(); // Build final VariantArray with top-level nulls - let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata_array), false) - .with_field("typed_value", Arc::new(typed_value_struct), true) - .with_nulls(nulls) - .build(); - - Arc::new(struct_array) + ArrayRef::from(VariantArray::from_parts( + metadata_array, + None, + Some(Arc::new(typed_value_struct)), + Some(nulls), + )) } /// Create comprehensive nested shredded variant with diverse null patterns @@ -3269,10 +3183,11 @@ mod test { // Create the inner level: contains typed_value with Int32 values // Row 0: has value 42, Row 1: inner null, Row 2: outer null, Row 3: top-level null let inner_typed_value = Int32Array::from(vec![Some(42), None, None, None]); // dummy value for row 2 - let inner = StructArrayBuilder::new() - .with_field("typed_value", Arc::new(inner_typed_value), true) - .build(); - let inner = ShreddedVariantFieldArray::try_new(&inner).unwrap(); + let inner = ShreddedVariantFieldArray::from_parts( + None, + Some(Arc::new(inner_typed_value) as ArrayRef), + None, + ); let outer_typed_value_nulls = NullBuffer::from(vec![ true, // row 0: inner struct exists with typed_value=42 @@ -3285,10 +3200,11 @@ mod test { .with_nulls(outer_typed_value_nulls) .build(); - let outer = StructArrayBuilder::new() - .with_field("typed_value", Arc::new(outer_typed_value), true) - .build(); - let outer = ShreddedVariantFieldArray::try_new(&outer).unwrap(); + let outer = ShreddedVariantFieldArray::from_parts( + None, + Some(Arc::new(outer_typed_value) as ArrayRef), + None, + ); let typed_value_nulls = NullBuffer::from(vec![ true, // row 0: inner struct exists with typed_value=42 @@ -3310,13 +3226,12 @@ mod test { true, // row 2: outer field NULL false, // row 3: top-level NULL ]); - let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata_array), false) - .with_field("typed_value", Arc::new(typed_value), true) - .with_nulls(nulls) - .build(); - - Arc::new(struct_array) + ArrayRef::from(VariantArray::from_parts( + metadata_array, + None, + Some(Arc::new(typed_value)), + Some(nulls), + )) } /// Create variant with mixed shredding (spec-compliant) including null scenarios @@ -3366,11 +3281,11 @@ mod test { // Create shredded field "x" (globally shredded - never appears in value field) // For top-level null row, the field still needs valid content (not null) let x_field_typed_value = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(0)]); - let x_field_struct = StructArrayBuilder::new() - .with_field("typed_value", Arc::new(x_field_typed_value), true) - .build(); - let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct) - .expect("should create ShreddedVariantFieldArray for x"); + let x_field_shredded = ShreddedVariantFieldArray::from_parts( + None, + Some(Arc::new(x_field_typed_value) as ArrayRef), + None, + ); // Create main typed_value struct (only contains shredded fields) let typed_value_struct = StructArrayBuilder::new() @@ -3380,14 +3295,12 @@ mod test { // Build VariantArray with both value and typed_value (PartiallyShredded) // Top-level null is encoded in the main StructArray's null mask let variant_nulls = NullBuffer::from(vec![true, true, true, false]); // Row 3 is top-level null - let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata_array), false) - .with_field("value", Arc::new(value_array), true) - .with_field("typed_value", Arc::new(typed_value_struct), true) - .with_nulls(variant_nulls) - .build(); - - Arc::new(struct_array) + ArrayRef::from(VariantArray::from_parts( + metadata_array, + Some(value_array), + Some(Arc::new(typed_value_struct)), + Some(variant_nulls), + )) } #[test] @@ -4061,11 +3974,9 @@ mod test { EMPTY_VARIANT_METADATA_BYTES, all_nulls_values.len(), )); - let variant_struct = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata), false) - .with_field("typed_value", Arc::new(typed_value_struct), true) - .build(); - let variant_array: ArrayRef = VariantArray::try_new(&variant_struct).unwrap().into(); + let variant_array: ArrayRef = + VariantArray::from_parts(metadata, None, Some(Arc::new(typed_value_struct)), None) + .into(); // Case 1: all-null primitive column should reuse the typed_value Arc directly let all_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true)); From 7ea7cdc55a20162346e2e006ac4589a30f7bfdbb Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Wed, 18 Mar 2026 20:36:23 +0100 Subject: [PATCH 55/80] Optimize `take_fixed_size_binary` For Predefined Value Lengths (#9535) # Which issue does this PR close? - Related to https://github.com/apache/arrow-rs/issues/279 # Rationale for this change The `take` kernel is very important for many operations (e.g., `HashJoin` in DataFusion IIRC). Currently, there is a gap between the performance of the take kernel for primitive arrays (e.g., `DataType::UInt32`) and fixed size binary arrays of the same length (e.g., `FixedSizeBinary<4>`). In our case this lead to a performance reduction when moving from an integer-based id column to a fixed-size-binary-based id column. This PR aims to address parts of this gap. The 16-bytes case would especially benefit operations on UUID columns. # What changes are included in this PR? - Add `take_fixed_size` that can be called for set of predefined fsb-lengths that we want to support. This is a "flat buffer" version of the `take_native` kernel. # Are these changes tested? I've added another test that still exercises the non-optimized code path. # Are there any user-facing changes? No --- arrow-select/src/take.rs | 173 ++++++++++++++++++++++++++++------ arrow/benches/take_kernels.rs | 16 +++- 2 files changed, 156 insertions(+), 33 deletions(-) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index 43c13e66fb0e..ee813f5353c2 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -18,6 +18,7 @@ //! Defines take kernel for [Array] use std::fmt::Display; +use std::mem::ManuallyDrop; use std::sync::Arc; use arrow_array::builder::{BufferBuilder, UInt32Builder}; @@ -723,46 +724,127 @@ fn take_fixed_size_binary( ArrowError::InvalidArgumentError(format!("Cannot convert size '{}' to usize", size)) })?; - let values_buffer = values.values().as_slice(); - let mut values_buffer_builder = BufferBuilder::new(indices.len() * size_usize); - - if indices.null_count() == 0 { - let array_iter = indices.values().iter().map(|idx| { - let offset = idx.as_usize() * size_usize; - &values_buffer[offset..offset + size_usize] - }); - for slice in array_iter { - values_buffer_builder.append_slice(slice); - } - } else { - // The indices nullability cannot be ignored here because the values buffer may contain - // nulls which should not cause a panic. - let array_iter = indices.iter().map(|idx| { - idx.map(|idx| { - let offset = idx.as_usize() * size_usize; - &values_buffer[offset..offset + size_usize] - }) - }); - for slice in array_iter { - match slice { - None => values_buffer_builder.append_n(size_usize, 0), - Some(slice) => values_buffer_builder.append_slice(slice), - } - } - } + let result_buffer = match size_usize { + 1 => take_fixed_size::(values.values(), indices), + 2 => take_fixed_size::(values.values(), indices), + 4 => take_fixed_size::(values.values(), indices), + 8 => take_fixed_size::(values.values(), indices), + 16 => take_fixed_size::(values.values(), indices), + _ => take_fixed_size_binary_buffer_dynamic_length(values, indices, size_usize), + }; - let values_buffer = values_buffer_builder.finish(); let value_nulls = take_nulls(values.nulls(), indices); let final_nulls = NullBuffer::union(value_nulls.as_ref(), indices.nulls()); - let array_data = ArrayDataBuilder::new(DataType::FixedSizeBinary(size)) .len(indices.len()) .nulls(final_nulls) .offset(0) - .add_buffer(values_buffer) + .add_buffer(result_buffer) .build()?; - Ok(FixedSizeBinaryArray::from(array_data)) + return Ok(FixedSizeBinaryArray::from(array_data)); + + /// Implementation of the take kernel for fixed size binary arrays. + #[inline(never)] + fn take_fixed_size_binary_buffer_dynamic_length( + values: &FixedSizeBinaryArray, + indices: &PrimitiveArray, + size_usize: usize, + ) -> Buffer { + let values_buffer = values.values().as_slice(); + let mut values_buffer_builder = BufferBuilder::new(indices.len() * size_usize); + + if indices.null_count() == 0 { + let array_iter = indices.values().iter().map(|idx| { + let offset = idx.as_usize() * size_usize; + &values_buffer[offset..offset + size_usize] + }); + for slice in array_iter { + values_buffer_builder.append_slice(slice); + } + } else { + // The indices nullability cannot be ignored here because the values buffer may contain + // nulls which should not cause a panic. + let array_iter = indices.iter().map(|idx| { + idx.map(|idx| { + let offset = idx.as_usize() * size_usize; + &values_buffer[offset..offset + size_usize] + }) + }); + for slice in array_iter { + match slice { + None => values_buffer_builder.append_n(size_usize, 0), + Some(slice) => values_buffer_builder.append_slice(slice), + } + } + } + + values_buffer_builder.finish() + } +} + +/// Implements the take kernel semantics over a flat [`Buffer`], interpreting it as a slice of +/// `&[[u8; N]]`, where `N` is a compile-time constant. The usage of a flat [`Buffer`] allows using +/// this kernel without an available [`ArrowPrimitiveType`] (e.g., for `[u8; 5]`). +/// +/// # Using This Function in the Primitive Take Kernel +/// +/// This function is basically the same as [`take_native`] but just on a flat [`Buffer`] instead of +/// the primitive [`ScalarBuffer`]. Ideally, the [`take_primitive`] kernel should just use this +/// more general function. However, the "idiomatic code" requires the +/// [feature(generic_const_exprs)](https://github.com/rust-lang/rust/issues/76560) for calling +/// `take_fixed_size () } >(...)`. Once this feature has been stabilized, +/// we can use this function also in the primitive kernels. +fn take_fixed_size( + buffer: &Buffer, + indices: &PrimitiveArray, +) -> Buffer { + assert_eq!( + buffer.len() % N, + 0, + "Invalid array length in take_fixed_size" + ); + + let ptr = buffer.as_ptr(); + let chunk_ptr = ptr.cast::<[u8; N]>(); + let chunk_len = buffer.len() / N; + let buffer: &[[u8; N]] = unsafe { + // SAFETY: interpret an already valid slice as a slice of N-byte chunks. N divides buffer + // length without remainder. + std::slice::from_raw_parts(chunk_ptr, chunk_len) + }; + + let result_buffer = match indices.nulls().filter(|n| n.null_count() > 0) { + Some(n) => indices + .values() + .iter() + .enumerate() + .map(|(idx, index)| match buffer.get(index.as_usize()) { + Some(v) => *v, + // SAFETY: idx match unsafe { n.inner().value_unchecked(idx) } { + false => [0u8; N], + true => panic!("Out-of-bounds index {index:?}"), + }, + }) + .collect::>(), + None => indices + .values() + .iter() + .map(|index| buffer[index.as_usize()]) + .collect::>(), + }; + + let mut vec = ManuallyDrop::new(result_buffer); // Prevent de-allocation + let ptr = vec.as_mut_ptr(); + let len = vec.len(); + let cap = vec.capacity(); + let result_buffer = unsafe { + // SAFETY: flattening an already valid Vec. + Vec::from_raw_parts(ptr.cast::(), len * N, cap * N) + }; + + Buffer::from_vec(result_buffer) } /// `take` implementation for dictionary arrays @@ -2150,6 +2232,35 @@ mod tests { ); } + /// The [`take_fixed_size_binary`] kernel contains optimizations that provide a faster + /// implementation for commonly-used value lengths. This test uses a value length that is not + /// optimized to test both code paths. + #[test] + fn test_take_fixed_size_binary_with_nulls_indices_not_optimized_length() { + let fsb = FixedSizeBinaryArray::try_from_sparse_iter_with_size( + [ + Some(vec![0x01, 0x01, 0x01, 0x01, 0x01]), + Some(vec![0x02, 0x02, 0x02, 0x02, 0x01]), + Some(vec![0x03, 0x03, 0x03, 0x03, 0x01]), + Some(vec![0x04, 0x04, 0x04, 0x04, 0x01]), + ] + .into_iter(), + 5, + ) + .unwrap(); + + // The two middle indices are null -> Should be null in the output. + let indices = UInt32Array::from(vec![Some(0), None, None, Some(3)]); + + let result = take_fixed_size_binary(&fsb, &indices, 5).unwrap(); + assert_eq!(result.len(), 4); + assert_eq!(result.null_count(), 2); + assert_eq!( + result.nulls().unwrap().iter().collect::>(), + vec![true, false, false, true] + ); + } + #[test] #[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")] fn test_take_list_out_of_bounds() { diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs index 37b83a5e33ed..fb231771681c 100644 --- a/arrow/benches/take_kernels.rs +++ b/arrow/benches/take_kernels.rs @@ -195,14 +195,26 @@ fn add_benchmark(c: &mut Criterion) { let values = create_fsb_array(1024, 0.0, 12); let indices = create_random_index(1024, 0.0); - c.bench_function("take primitive fsb value len: 12, indices: 1024", |b| { + c.bench_function("take fsb value len: 12, indices: 1024", |b| { b.iter(|| bench_take(&values, &indices)) }); let values = create_fsb_array(1024, 0.5, 12); let indices = create_random_index(1024, 0.0); + c.bench_function("take fsb value len: 12, null values, indices: 1024", |b| { + b.iter(|| bench_take(&values, &indices)) + }); + + let values = create_fsb_array(1024, 0.0, 16); + let indices = create_random_index(1024, 0.0); + c.bench_function("take fsb value optimized len: 16, indices: 1024", |b| { + b.iter(|| bench_take(&values, &indices)) + }); + + let values = create_fsb_array(1024, 0.5, 16); + let indices = create_random_index(1024, 0.0); c.bench_function( - "take primitive fsb value len: 12, null values, indices: 1024", + "take fsb value optimized len: 16, null values, indices: 1024", |b| b.iter(|| bench_take(&values, &indices)), ); } From 88422cbdcbfa8f4e2411d66578dd3582fafbf2a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alfonso=20Subiotto=20Marqu=C3=A9s?= Date: Wed, 18 Mar 2026 20:47:38 +0100 Subject: [PATCH 56/80] arrow-flight: generate dict_ids for dicts nested inside complex types (#9556) Some cases were missing. # Which issue does this PR close? - Closes #9555 . # Rationale for this change Fix flight encoding panic # What changes are included in this PR? Assigning dict ids properly to nested dicts # Are these changes tested? Yes. The same tests fail on main. # Are there any user-facing changes? Signed-off-by: Alfonso Subiotto Marques --- arrow-flight/src/encode.rs | 272 +++++++++++++++++++++++++++++++++---- arrow-ipc/src/writer.rs | 7 +- 2 files changed, 248 insertions(+), 31 deletions(-) diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs index 187de400f6c0..191da024136f 100644 --- a/arrow-flight/src/encode.rs +++ b/arrow-flight/src/encode.rs @@ -528,13 +528,28 @@ fn prepare_field_for_flight( } DataType::Dictionary(_, value_type) => { if !send_dictionaries { - Field::new( + // Recurse into value type to handle nested dicts being stripped + let value_field = Field::new( field.name(), value_type.as_ref().clone(), field.is_nullable(), + ); + prepare_field_for_flight( + &Arc::new(value_field), + dictionary_tracker, + send_dictionaries, ) .with_metadata(field.metadata().clone()) } else { + // Recurse into value type BEFORE registering this dict's id, + // matching the depth-first order of encode_dictionaries in the + // IPC writer which processes nested dicts before the parent. + let value_field = Field::new("values", value_type.as_ref().clone(), true); + prepare_field_for_flight( + &Arc::new(value_field), + dictionary_tracker, + send_dictionaries, + ); dictionary_tracker.next_dict_id(); #[allow(deprecated)] Field::new_dict( @@ -547,6 +562,44 @@ fn prepare_field_for_flight( .with_metadata(field.metadata().clone()) } } + DataType::ListView(inner) | DataType::LargeListView(inner) => { + let prepared = prepare_field_for_flight(inner, dictionary_tracker, send_dictionaries); + Field::new( + field.name(), + match field.data_type() { + DataType::ListView(_) => DataType::ListView(Arc::new(prepared)), + _ => DataType::LargeListView(Arc::new(prepared)), + }, + field.is_nullable(), + ) + .with_metadata(field.metadata().clone()) + } + DataType::FixedSizeList(inner, size) => Field::new( + field.name(), + DataType::FixedSizeList( + Arc::new(prepare_field_for_flight( + inner, + dictionary_tracker, + send_dictionaries, + )), + *size, + ), + field.is_nullable(), + ) + .with_metadata(field.metadata().clone()), + DataType::RunEndEncoded(run_ends, values) => Field::new( + field.name(), + DataType::RunEndEncoded( + run_ends.clone(), + Arc::new(prepare_field_for_flight( + values, + dictionary_tracker, + send_dictionaries, + )), + ), + field.is_nullable(), + ) + .with_metadata(field.metadata().clone()), DataType::Map(inner, sorted) => Field::new( field.name(), DataType::Map( @@ -556,7 +609,37 @@ fn prepare_field_for_flight( field.is_nullable(), ) .with_metadata(field.metadata().clone()), - _ => field.as_ref().clone(), + DataType::Null + | DataType::Boolean + | DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 + | DataType::Float16 + | DataType::Float32 + | DataType::Float64 + | DataType::Timestamp(_, _) + | DataType::Date32 + | DataType::Date64 + | DataType::Time32(_) + | DataType::Time64(_) + | DataType::Duration(_) + | DataType::Interval(_) + | DataType::Binary + | DataType::FixedSizeBinary(_) + | DataType::LargeBinary + | DataType::BinaryView + | DataType::Utf8 + | DataType::LargeUtf8 + | DataType::Utf8View + | DataType::Decimal32(_, _) + | DataType::Decimal64(_, _) + | DataType::Decimal128(_, _) + | DataType::Decimal256(_, _) => field.as_ref().clone(), } } @@ -573,33 +656,7 @@ fn prepare_schema_for_flight( let fields: Fields = schema .fields() .iter() - .map(|field| match field.data_type() { - DataType::Dictionary(_, value_type) => { - if !send_dictionaries { - Field::new( - field.name(), - value_type.as_ref().clone(), - field.is_nullable(), - ) - .with_metadata(field.metadata().clone()) - } else { - dictionary_tracker.next_dict_id(); - #[allow(deprecated)] - Field::new_dict( - field.name(), - field.data_type().clone(), - field.is_nullable(), - 0, - field.dict_is_ordered().unwrap_or_default(), - ) - .with_metadata(field.metadata().clone()) - } - } - tpe if tpe.is_nested() => { - prepare_field_for_flight(field, dictionary_tracker, send_dictionaries) - } - _ => field.as_ref().clone(), - }) + .map(|field| prepare_field_for_flight(field, dictionary_tracker, send_dictionaries)) .collect(); Schema::new(fields).with_metadata(schema.metadata().clone()) @@ -729,7 +786,8 @@ fn hydrate_dictionary(array: &ArrayRef, data_type: &DataType) -> Result>(); + let run_ends1 = Int32Array::from(vec![1, 2, 3]); + let arr1 = RunArray::try_new(&run_ends1, &dict_values1).unwrap(); + + let dict_values2 = vec![Some("c"), Some("a")] + .into_iter() + .collect::>(); + let run_ends2 = Int32Array::from(vec![1, 2]); + let arr2 = RunArray::try_new(&run_ends2, &dict_values2).unwrap(); + + let schema = Arc::new(Schema::new(vec![Field::new( + "ree", + arr1.data_type().clone(), + true, + )])); + + let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap(); + let batch2 = RecordBatch::try_new(schema, vec![Arc::new(arr2)]).unwrap(); + + verify_flight_round_trip(vec![batch1, batch2]).await; + } + + #[tokio::test] + async fn test_dictionary_of_struct_of_dict_resend() { + // Dict(Int8, Struct { dict: Dict(Int32, Utf8), int: Int32 }) + // This exercises the Dictionary branch recursing into its value type + // before assigning its own dict_id (depth-first ordering). + let struct_fields: Vec = vec![ + Field::new_dictionary("dict", DataType::Int32, DataType::Utf8, true), + Field::new("int", DataType::Int32, false), + ]; + + let inner_values = + StringArray::from(vec![Some("alpha"), None, Some("beta"), Some("gamma")]); + let inner_keys = Int32Array::from_iter_values([0, 1, 2, 3, 0]); + let inner_dict = DictionaryArray::new(inner_keys, Arc::new(inner_values)); + let int_array = Int32Array::from(vec![10, 20, 30, 40, 50]); + + let struct_array = StructArray::from(vec![ + ( + Arc::new(struct_fields[0].clone()), + Arc::new(inner_dict) as ArrayRef, + ), + ( + Arc::new(struct_fields[1].clone()), + Arc::new(int_array) as ArrayRef, + ), + ]); + + let outer_keys = Int8Array::from_iter_values([0, 0, 1, 2]); + let arr1 = DictionaryArray::new(outer_keys, Arc::new(struct_array)); + + let inner_values2 = StringArray::from(vec![Some("x"), Some("y")]); + let inner_keys2 = Int32Array::from_iter_values([0, 1, 0]); + let inner_dict2 = DictionaryArray::new(inner_keys2, Arc::new(inner_values2)); + let int_array2 = Int32Array::from(vec![100, 200, 300]); + + let struct_array2 = StructArray::from(vec![ + ( + Arc::new(struct_fields[0].clone()), + Arc::new(inner_dict2) as ArrayRef, + ), + ( + Arc::new(struct_fields[1].clone()), + Arc::new(int_array2) as ArrayRef, + ), + ]); + + let outer_keys2 = Int8Array::from_iter_values([0, 1]); + let arr2 = DictionaryArray::new(outer_keys2, Arc::new(struct_array2)); + + let schema = Arc::new(Schema::new(vec![Field::new( + "dict_struct", + arr1.data_type().clone(), + false, + )])); + + let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap(); + let batch2 = RecordBatch::try_new(schema, vec![Arc::new(arr2)]).unwrap(); + + verify_flight_round_trip(vec![batch1, batch2]).await; + } + + async fn verify_dictionary_list_view_resend() { + let mut builder = + GenericListViewBuilder::::new(StringDictionaryBuilder::::new()); + + builder.append_value(vec![Some("a"), None, Some("b")]); + let arr1 = builder.finish(); + + builder.append_value(vec![Some("c"), None, Some("d")]); + let arr2 = builder.finish(); + + let inner = Arc::new(Field::new_dictionary( + "item", + DataType::UInt16, + DataType::Utf8, + true, + )); + let dt = if O::IS_LARGE { + DataType::LargeListView(inner) + } else { + DataType::ListView(inner) + }; + let schema = Arc::new(Schema::new(vec![Field::new("dict_list_view", dt, true)])); + + let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap(); + let batch2 = RecordBatch::try_new(schema, vec![Arc::new(arr2)]).unwrap(); + + verify_flight_round_trip(vec![batch1, batch2]).await; + } + + #[tokio::test] + async fn test_dictionary_list_view_resend() { + verify_dictionary_list_view_resend::().await; + } + + #[tokio::test] + async fn test_dictionary_large_list_view_resend() { + verify_dictionary_list_view_resend::().await; + } + + #[tokio::test] + async fn test_dictionary_fixed_size_list_resend() { + let mut builder = + FixedSizeListBuilder::new(StringDictionaryBuilder::::new(), 2); + + builder.values().append_value("a"); + builder.values().append_value("b"); + builder.append(true); + let arr1 = builder.finish(); + + builder.values().append_value("c"); + builder.values().append_value("d"); + builder.append(true); + let arr2 = builder.finish(); + + let schema = Arc::new(Schema::new(vec![Field::new_fixed_size_list( + "dict_fsl", + Field::new_dictionary("item", DataType::UInt16, DataType::Utf8, true), + 2, + true, + )])); + + let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap(); + let batch2 = RecordBatch::try_new(schema, vec![Arc::new(arr2)]).unwrap(); + + verify_flight_round_trip(vec![batch1, batch2]).await; + } + async fn verify_flight_round_trip(mut batches: Vec) { let expected_schema = batches.first().unwrap().schema(); diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs index 23217fec6dfe..9d3fca293874 100644 --- a/arrow-ipc/src/writer.rs +++ b/arrow-ipc/src/writer.rs @@ -417,7 +417,12 @@ impl IpcDataGenerator { // sequence is assigned depth-first, so we need to first encode children and have // them take their assigned dict IDs before we take the dict ID for this field. let dict_id = dict_id_seq.next().ok_or_else(|| { - ArrowError::IpcError(format!("no dict id for field {}", field.name())) + ArrowError::IpcError(format!( + "no dict id for field {:?}: field.data_type={:?}, column.data_type={:?}", + field.name(), + field.data_type(), + column.data_type() + )) })?; match dictionary_tracker.insert_column( From 42ab0bcef7c2257772dfb7de77b04051350e18cb Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Thu, 19 Mar 2026 10:39:35 -0700 Subject: [PATCH 57/80] fix: Used `checked_add` for bounds checks to avoid UB (#9568) # Which issue does this PR close? - Closes #9543. # Rationale for this change See issue, but it is possible to construct arguments to `arrow_buffer::bit_util::bit_mask::set_bits` that overflow the bounds checking protecting unsafe code. # What changes are included in this PR? Use `checked_add` when doing the bounds checking and panic when an overflow occurs. # Are these changes tested? Yes # Are there any user-facing changes? No --- arrow-buffer/src/util/bit_mask.rs | 50 +++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index a8ae1a765414..e3897e67542d 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -32,8 +32,18 @@ pub fn set_bits( offset_read: usize, len: usize, ) -> usize { - assert!(offset_write + len <= write_data.len() * 8); - assert!(offset_read + len <= data.len() * 8); + assert!( + offset_write + .checked_add(len) + .expect("operation will overflow write buffer") + <= write_data.len() * 8 + ); + assert!( + offset_read + .checked_add(len) + .expect("operation will overflow read buffer") + <= data.len() * 8 + ); let mut null_count = 0; let mut acc = 0; while len > acc { @@ -427,4 +437,40 @@ mod tests { assert_eq!(len_set, 1); assert_eq!(write_data, &[0b00000010]); } + + #[test] + #[should_panic(expected = "operation will overflow read buffer")] + fn test_overflow_read_buffer_bounds() { + // Tiny buffers so any huge computed index is out-of-bounds. + let data = [0u8; 1]; + let mut write_data = [0u8; 1]; + + // Choose values so (offset_read + len) wraps to a small number in release builds. + // offset_read = usize::MAX - 7, len = 8 => wraps to 0. + // This can bypass `assert!(offset_read + len <= data.len() * 8)`. + let offset_write: usize = 0; + let offset_read: usize = usize::MAX - 7; + let len: usize = 8; + + // should panic on bounds check overflow + let _nulls = set_bits(&mut write_data, &data, offset_write, offset_read, len); + } + + #[test] + #[should_panic(expected = "operation will overflow write buffer")] + fn test_overflow_write_buffer_bounds() { + // Tiny buffers so any huge computed index is out-of-bounds. + let data = [0u8; 1]; + let mut write_data = [0u8; 1]; + + // Choose values so (offset_write + len) wraps to a small number in release builds. + // offset_write = usize::MAX - 7, len = 8 => wraps to 0. + // This can bypass `assert!(offset_write + len <= write_data.len() * 8)`. + let offset_write: usize = usize::MAX - 7; + let offset_read: usize = 0; + let len: usize = 8; + + // should panic on bounds check overflow + let _nulls = set_bits(&mut write_data, &data, offset_write, offset_read, len); + } } From 55a7768bbb95976e1dac29facb2ea337aa4d89b6 Mon Sep 17 00:00:00 2001 From: Konstantin Tarasov <33369833+sdf-jkl@users.noreply.github.com> Date: Thu, 19 Mar 2026 13:41:47 -0400 Subject: [PATCH 58/80] [Variant] Add `variant_to_arrow` `Struct` type support (#9572) # Which issue does this PR close? - Closes #9529 . # Rationale for this change - In a follow up PR, can fix the `variant_get` TODO: https://github.com/apache/arrow-rs/blob/3b6179658203dc1b1610b67c1777d5b8beb137fc/parquet-variant-compute/src/variant_get.rs#L89-L92 - When we know that Struct VariantArray is not shredded can reuse `shred_basic_variant` # What changes are included in this PR? - Added `StructVariantToArrowRowBuilder` builder. - Moved `make_variant_to_arrow_row_builder` logic to `make_typed_variant_to_arrow_row_builder` to reuse by `Struct` array's inner fields. - Changed a `variant_get` test to show that it now handles unshredded `Struct` `VariantArray` # Are these changes tested? - Yes, added `test_struct_row_builder_handles_unshredded_nested_structs` - Everything else still works. # Are there any user-facing changes? No --------- Co-authored-by: Ryan Johnson --- parquet-variant-compute/src/variant_get.rs | 26 ++-- .../src/variant_to_arrow.rs | 133 +++++++++++++++--- 2 files changed, 130 insertions(+), 29 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index a155d04e4720..9204dcf70856 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -3070,10 +3070,8 @@ mod test { assert!(struct_result.is_null(3)); } - /// Test that demonstrates the actual struct row builder gap - /// This test should fail because it hits unshredded nested structs #[test] - fn test_struct_row_builder_gap_demonstration() { + fn test_struct_row_builder_handles_unshredded_nested_structs() { // Create completely unshredded JSON variant (no typed_value at all) let json_strings = vec![ r#"{"outer": {"inner": 42}}"#, @@ -3082,7 +3080,7 @@ mod test { let string_array: Arc = Arc::new(StringArray::from(json_strings)); let variant_array = json_to_variant(&string_array).unwrap(); - // Request nested struct - this should fail at the row builder level + // Request nested struct let inner_fields = Fields::from(vec![Field::new("inner", DataType::Int32, true)]); let inner_struct_type = DataType::Struct(inner_fields); let outer_fields = Fields::from(vec![Field::new("outer", inner_struct_type, true)]); @@ -3095,12 +3093,22 @@ mod test { }; let variant_array_ref = ArrayRef::from(variant_array); - let result = variant_get(&variant_array_ref, options); + let result = variant_get(&variant_array_ref, options).unwrap(); - // Should fail with NotYetImplemented when the row builder tries to handle struct type - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("Not yet implemented")); + let outer_struct = result.as_struct(); + assert_eq!(outer_struct.len(), 2); + assert_eq!(outer_struct.num_columns(), 1); + + let inner_struct = outer_struct.column(0).as_struct(); + assert_eq!(inner_struct.num_columns(), 1); + + let inner_values = inner_struct + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(inner_values.value(0), 42); + assert_eq!(inner_values.value(1), 100); } /// Create comprehensive shredded variant with diverse null patterns and empty objects diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index 106e8915beb8..dc8fbcd223d2 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -28,12 +28,13 @@ use arrow::array::{ BinaryViewBuilder, BooleanBuilder, FixedSizeBinaryBuilder, GenericListArray, GenericListViewArray, LargeBinaryBuilder, LargeStringBuilder, NullArray, NullBufferBuilder, OffsetSizeTrait, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder, + StructArray, }; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::compute::{CastOptions, DecimalCast}; use arrow::datatypes::{self, DataType, DecimalType}; use arrow::error::{ArrowError, Result}; -use arrow_schema::{FieldRef, TimeUnit}; +use arrow_schema::{FieldRef, Fields, TimeUnit}; use parquet_variant::{Variant, VariantPath}; use std::sync::Arc; @@ -44,6 +45,7 @@ use std::sync::Arc; pub(crate) enum VariantToArrowRowBuilder<'a> { Primitive(PrimitiveVariantToArrowRowBuilder<'a>), Array(ArrayVariantToArrowRowBuilder<'a>), + Struct(StructVariantToArrowRowBuilder<'a>), BinaryVariant(VariantToBinaryVariantArrowRowBuilder), // Path extraction wrapper - contains a boxed enum for any of the above @@ -56,6 +58,7 @@ impl<'a> VariantToArrowRowBuilder<'a> { match self { Primitive(b) => b.append_null(), Array(b) => b.append_null(), + Struct(b) => b.append_null(), BinaryVariant(b) => b.append_null(), WithPath(path_builder) => path_builder.append_null(), } @@ -66,6 +69,7 @@ impl<'a> VariantToArrowRowBuilder<'a> { match self { Primitive(b) => b.append_value(&value), Array(b) => b.append_value(&value), + Struct(b) => b.append_value(&value), BinaryVariant(b) => b.append_value(value), WithPath(path_builder) => path_builder.append_value(value), } @@ -76,12 +80,42 @@ impl<'a> VariantToArrowRowBuilder<'a> { match self { Primitive(b) => b.finish(), Array(b) => b.finish(), + Struct(b) => b.finish(), BinaryVariant(b) => b.finish(), WithPath(path_builder) => path_builder.finish(), } } } +fn make_typed_variant_to_arrow_row_builder<'a>( + data_type: &'a DataType, + cast_options: &'a CastOptions, + capacity: usize, +) -> Result> { + use VariantToArrowRowBuilder::*; + + match data_type { + DataType::Struct(fields) => { + let builder = StructVariantToArrowRowBuilder::try_new(fields, cast_options, capacity)?; + Ok(Struct(builder)) + } + data_type @ (DataType::List(_) + | DataType::LargeList(_) + | DataType::ListView(_) + | DataType::LargeListView(_) + | DataType::FixedSizeList(..)) => { + let builder = + ArrayVariantToArrowRowBuilder::try_new(data_type, cast_options, capacity)?; + Ok(Array(builder)) + } + data_type => { + let builder = + make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?; + Ok(Primitive(builder)) + } + } +} + pub(crate) fn make_variant_to_arrow_row_builder<'a>( metadata: &BinaryViewArray, path: VariantPath<'a>, @@ -97,26 +131,8 @@ pub(crate) fn make_variant_to_arrow_row_builder<'a>( metadata.clone(), capacity, )), - Some(DataType::Struct(_)) => { - return Err(ArrowError::NotYetImplemented( - "Converting unshredded variant objects to arrow structs".to_string(), - )); - } - Some( - data_type @ (DataType::List(_) - | DataType::LargeList(_) - | DataType::ListView(_) - | DataType::LargeListView(_) - | DataType::FixedSizeList(..)), - ) => { - let builder = - ArrayVariantToArrowRowBuilder::try_new(data_type, cast_options, capacity)?; - Array(builder) - } Some(data_type) => { - let builder = - make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?; - Primitive(builder) + make_typed_variant_to_arrow_row_builder(data_type, cast_options, capacity)? } }; @@ -491,6 +507,83 @@ pub(crate) enum ArrayVariantToArrowRowBuilder<'a> { LargeListView(VariantToListArrowRowBuilder<'a, i64, true>), } +pub(crate) struct StructVariantToArrowRowBuilder<'a> { + fields: &'a Fields, + field_builders: Vec>, + nulls: NullBufferBuilder, + cast_options: &'a CastOptions<'a>, +} + +impl<'a> StructVariantToArrowRowBuilder<'a> { + fn try_new( + fields: &'a Fields, + cast_options: &'a CastOptions<'a>, + capacity: usize, + ) -> Result { + let mut field_builders = Vec::with_capacity(fields.len()); + for field in fields.iter() { + field_builders.push(make_typed_variant_to_arrow_row_builder( + field.data_type(), + cast_options, + capacity, + )?); + } + Ok(Self { + fields, + field_builders, + nulls: NullBufferBuilder::new(capacity), + cast_options, + }) + } + + fn append_null(&mut self) -> Result<()> { + for builder in &mut self.field_builders { + builder.append_null()?; + } + self.nulls.append_null(); + Ok(()) + } + + fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { + let Variant::Object(obj) = value else { + if self.cast_options.safe { + self.append_null()?; + return Ok(false); + } + return Err(ArrowError::CastError(format!( + "Failed to extract struct from variant {:?}", + value + ))); + }; + + for (index, field) in self.fields.iter().enumerate() { + match obj.get(field.name()) { + Some(field_value) => { + self.field_builders[index].append_value(field_value)?; + } + None => { + self.field_builders[index].append_null()?; + } + } + } + + self.nulls.append_non_null(); + Ok(true) + } + + fn finish(mut self) -> Result { + let mut children = Vec::with_capacity(self.field_builders.len()); + for builder in self.field_builders { + children.push(builder.finish()?); + } + Ok(Arc::new(StructArray::try_new( + self.fields.clone(), + children, + self.nulls.finish(), + )?)) + } +} + impl<'a> ArrayVariantToArrowRowBuilder<'a> { pub(crate) fn try_new( data_type: &'a DataType, From 14f1eb97fbf017dbd0faef749f62f6cd9389a451 Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Thu, 19 Mar 2026 18:42:07 +0100 Subject: [PATCH 59/80] pyarrow: Cache the imported classes to avoid importing them each time (#9439) # Which issue does this PR close? - Closes #9438. # Rationale for this change Speed up conversion by only importing `pyarrow` once. # What changes are included in this PR? - Use `PyOnceLock::import` to import the types. - Remove some not useful `.extract::()?` (the `Display` implementation already does something similar) # Are these changes tested? Covered by existing tests. It would be nice to add benchmark but it might require to: - either add a dependency to a python benchmark runner - write some hacky code to import `pyarrow` from criterion tests (likely by running `pip`/`uv` from the Rust benchmark code) # Are there any user-facing changes? No --- arrow-pyarrow/src/lib.rs | 98 ++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 39 deletions(-) diff --git a/arrow-pyarrow/src/lib.rs b/arrow-pyarrow/src/lib.rs index 15951f8dcfbf..e396711f873d 100644 --- a/arrow-pyarrow/src/lib.rs +++ b/arrow-pyarrow/src/lib.rs @@ -75,10 +75,10 @@ use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType, Field, Schema, SchemaRef}; use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::ffi::Py_uintptr_t; +use pyo3::import_exception; use pyo3::prelude::*; -use pyo3::pybacked::PyBackedStr; -use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple}; -use pyo3::{import_exception, intern}; +use pyo3::sync::PyOnceLock; +use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple, PyType}; import_exception!(pyarrow, ArrowException); /// Represents an exception raised by PyArrow. @@ -118,17 +118,13 @@ impl IntoPyArrow for T { } } -fn validate_class(expected: &str, value: &Bound) -> PyResult<()> { - let pyarrow = PyModule::import(value.py(), "pyarrow")?; - let class = pyarrow.getattr(expected)?; - if !value.is_instance(&class)? { - let expected_module = class.getattr("__module__")?.extract::()?; - let expected_name = class.getattr("__name__")?.extract::()?; +fn validate_class(expected: &Bound, value: &Bound) -> PyResult<()> { + if !value.is_instance(expected)? { + let expected_module = expected.getattr("__module__")?; + let expected_name = expected.getattr("__name__")?; let found_class = value.get_type(); - let found_module = found_class - .getattr("__module__")? - .extract::()?; - let found_name = found_class.getattr("__name__")?.extract::()?; + let found_module = found_class.getattr("__module__")?; + let found_name = found_class.getattr("__name__")?; return Err(PyTypeError::new_err(format!( "Expected instance of {expected_module}.{expected_name}, got {found_module}.{found_name}", ))); @@ -173,7 +169,7 @@ impl FromPyArrow for DataType { } } - validate_class("DataType", value)?; + validate_class(data_type_class(value.py())?, value)?; let c_schema = FFI_ArrowSchema::empty(); let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; @@ -187,9 +183,8 @@ impl ToPyArrow for DataType { fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult> { let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - let module = py.import("pyarrow")?; - let class = module.getattr("DataType")?; - let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; + let dtype = + data_type_class(py)?.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; Ok(dtype) } } @@ -213,7 +208,7 @@ impl FromPyArrow for Field { } } - validate_class("Field", value)?; + validate_class(field_class(value.py())?, value)?; let c_schema = FFI_ArrowSchema::empty(); let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; @@ -227,9 +222,8 @@ impl ToPyArrow for Field { fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult> { let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - let module = py.import("pyarrow")?; - let class = module.getattr("Field")?; - let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; + let dtype = + field_class(py)?.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; Ok(dtype) } } @@ -253,7 +247,7 @@ impl FromPyArrow for Schema { } } - validate_class("Schema", value)?; + validate_class(schema_class(value.py())?, value)?; let c_schema = FFI_ArrowSchema::empty(); let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; @@ -267,9 +261,8 @@ impl ToPyArrow for Schema { fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult> { let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - let module = py.import("pyarrow")?; - let class = module.getattr("Schema")?; - let schema = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; + let schema = + schema_class(py)?.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; Ok(schema) } } @@ -310,7 +303,7 @@ impl FromPyArrow for ArrayData { return unsafe { ffi::from_ffi(array, schema_ptr.as_ref()) }.map_err(to_py_err); } - validate_class("Array", value)?; + validate_class(array_class(value.py())?, value)?; // prepare a pointer to receive the Array struct let mut array = FFI_ArrowArray::empty(); @@ -336,9 +329,7 @@ impl ToPyArrow for ArrayData { let array = FFI_ArrowArray::new(self); let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?; - let module = py.import("pyarrow")?; - let class = module.getattr("Array")?; - let array = class.call_method1( + let array = array_class(py)?.call_method1( "_import_from_c", ( addr_of!(array) as Py_uintptr_t, @@ -423,7 +414,7 @@ impl FromPyArrow for RecordBatch { return RecordBatch::try_new_with_options(schema, columns, &options).map_err(to_py_err); } - validate_class("RecordBatch", value)?; + validate_class(record_batch_class(value.py())?, value)?; // TODO(kszucs): implement the FFI conversions in arrow-rs for RecordBatches let schema = value.getattr("schema")?; let schema = Arc::new(Schema::from_pyarrow_bound(&schema)?); @@ -483,7 +474,7 @@ impl FromPyArrow for ArrowArrayStreamReader { return Ok(stream_reader); } - validate_class("RecordBatchReader", value)?; + validate_class(record_batch_reader_class(value.py())?, value)?; // prepare a pointer to receive the stream struct let mut stream = FFI_ArrowArrayStream::empty(); @@ -510,10 +501,8 @@ impl IntoPyArrow for Box { let mut stream = FFI_ArrowArrayStream::new(self); let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream; - let module = py.import("pyarrow")?; - let class = module.getattr("RecordBatchReader")?; - let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t])?; - let reader = class.call_method1("_import_from_c", args)?; + let reader = record_batch_reader_class(py)? + .call_method1("_import_from_c", (stream_ptr as Py_uintptr_t,))?; Ok(reader) } @@ -606,21 +595,52 @@ impl FromPyArrow for Table { /// Convert a [`Table`] into `pyarrow.Table`. impl IntoPyArrow for Table { fn into_pyarrow(self, py: Python) -> PyResult> { - let module = py.import(intern!(py, "pyarrow"))?; - let class = module.getattr(intern!(py, "Table"))?; - let py_batches = PyList::new(py, self.record_batches.into_iter().map(PyArrowType))?; let py_schema = PyArrowType(Arc::unwrap_or_clone(self.schema)); let kwargs = PyDict::new(py); kwargs.set_item("schema", py_schema)?; - let reader = class.call_method("from_batches", (py_batches,), Some(&kwargs))?; + let reader = table_class(py)?.call_method("from_batches", (py_batches,), Some(&kwargs))?; Ok(reader) } } +fn array_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> { + static TYPE: PyOnceLock> = PyOnceLock::new(); + TYPE.import(py, "pyarrow", "Array") +} + +fn record_batch_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> { + static TYPE: PyOnceLock> = PyOnceLock::new(); + TYPE.import(py, "pyarrow", "RecordBatch") +} + +fn record_batch_reader_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> { + static TYPE: PyOnceLock> = PyOnceLock::new(); + TYPE.import(py, "pyarrow", "RecordBatchReader") +} +fn data_type_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> { + static TYPE: PyOnceLock> = PyOnceLock::new(); + TYPE.import(py, "pyarrow", "DataType") +} + +fn field_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> { + static TYPE: PyOnceLock> = PyOnceLock::new(); + TYPE.import(py, "pyarrow", "Field") +} + +fn schema_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> { + static TYPE: PyOnceLock> = PyOnceLock::new(); + TYPE.import(py, "pyarrow", "Schema") +} + +fn table_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> { + static TYPE: PyOnceLock> = PyOnceLock::new(); + TYPE.import(py, "pyarrow", "Table") +} + /// A newtype wrapper for types implementing [`FromPyArrow`] or [`IntoPyArrow`]. /// /// When wrapped around a type `T: FromPyArrow`, it From 44f5dfc607892bab849a4dba008b6ee8966c1461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Thu, 19 Mar 2026 19:49:12 +0100 Subject: [PATCH 60/80] perf: Coalesce page fetches when RowSelection selects all rows (#9578) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - When a `RowSelection` selects every row in a row group, `fetch_ranges` now treats it as no selection, producing a single whole-column-chunk I/O request instead of N individual page requests - This reduces the number of I/O requests for subsequent filter predicates when an earlier predicate passes all rows ## Details In `InMemoryRowGroup::fetch_ranges`, when both a `RowSelection` and an `OffsetIndex` are present, the code enters a page-level fetch path that uses `scan_ranges()` to produce individual page ranges. Even when the selection covers all rows, this produces N separate ranges (one per page). The fix: before entering the page-level path, check if the selection's `row_count()` equals the row group's total row count. If so, drop the selection and take the simpler whole-column-chunk path. This commonly happens when a multi-predicate `RowFilter` has an early predicate that passes all rows in a row group (e.g., `CounterID = 62` on a row group where all rows have `CounterID = 62`). ## Test plan - [x] Existing tests pass (snapshot updated to reflect fewer I/O requests) - [x] `test_read_multiple_row_filter` verifies the coalesced fetch pattern 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 (1M context) --- parquet/src/arrow/arrow_reader/read_plan.rs | 7 +++++++ parquet/tests/arrow_reader/io/async_reader.rs | 4 +--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/read_plan.rs b/parquet/src/arrow/arrow_reader/read_plan.rs index 7c9eb36befe3..99ffe0febc95 100644 --- a/parquet/src/arrow/arrow_reader/read_plan.rs +++ b/parquet/src/arrow/arrow_reader/read_plan.rs @@ -167,6 +167,13 @@ impl ReadPlanBuilder { }; } + // If the predicate selected all rows and there is no prior selection, + // skip creating a RowSelection entirely — this avoids the allocation + // and keeps selection as None which enables coalesced page fetches. + let all_selected = filters.iter().all(|f| f.true_count() == f.len()); + if all_selected && self.selection.is_none() { + return Ok(self); + } let raw = RowSelection::from_filters(&filters); self.selection = match self.selection.take() { Some(selection) => Some(selection.and_then(&raw)), diff --git a/parquet/tests/arrow_reader/io/async_reader.rs b/parquet/tests/arrow_reader/io/async_reader.rs index 8022335da0ef..db06dda8ee89 100644 --- a/parquet/tests/arrow_reader/io/async_reader.rs +++ b/parquet/tests/arrow_reader/io/async_reader.rs @@ -275,9 +275,7 @@ async fn test_read_multiple_row_filter() { "Read Multi:", " Row Group 1, column 'a': MultiPage(dictionary_page: true, data_pages: [0, 1]) (1856 bytes, 1 requests) [data]", "Read Multi:", - " Row Group 1, column 'b': DictionaryPage (1617 bytes, 1 requests) [data]", - " Row Group 1, column 'b': DataPage(0) (113 bytes , 1 requests) [data]", - " Row Group 1, column 'b': DataPage(1) (126 bytes , 1 requests) [data]", + " Row Group 1, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1]) (1856 bytes, 1 requests) [data]", "Read Multi:", " Row Group 1, column 'c': DictionaryPage (7217 bytes, 1 requests) [data]", " Row Group 1, column 'c': DataPage(0) (113 bytes , 1 requests) [data]", From d53df605656d8012eca42e8ddffe165362a1a4cb Mon Sep 17 00:00:00 2001 From: Kunal <155142500+kunalsinghdadhwal@users.noreply.github.com> Date: Fri, 20 Mar 2026 01:44:01 +0530 Subject: [PATCH 61/80] feat: Optimize from_bitwise_binary_op with 64-bit alignment (#9441) # Which issue does this PR close? - Closes #9378 # Rationale for this change the optimizations as listed in the issue description - Align to 8 bytes - Don't try to return a buffer with bit_offset 0 but round it to a multiple of 64 - Use chunk_exact for the fallback path # What changes are included in this PR? When both inputs share the same sub-64-bit alignment (left_offset % 64 == right_offset % 64), the optimized path is used. This covers the common cases (both offset 0, both sliced equally, etc.). The BitChunks fallback is retained only when the two offsets have different sub-64-bit alignment. # Are these changes tested? Yes the tests are changed and they are included # Are there any user-facing changes? Yes, this is a minor breaking change to from_bitwise_binary_op: - The returned BooleanBuffer may now have a non-zero offset (previously always 0) - The returned BooleanBuffer may have padding bits set outside the logical range in values() --------- Signed-off-by: Kunal Singh Dadhwal Co-authored-by: Andrew Lamb --- arrow-buffer/src/buffer/boolean.rs | 230 +++++++++++++++++++++++++---- arrow-buffer/src/buffer/ops.rs | 102 +++++++++++-- 2 files changed, 288 insertions(+), 44 deletions(-) diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs index bae083b3b208..420bbf59f3be 100644 --- a/arrow-buffer/src/buffer/boolean.rs +++ b/arrow-buffer/src/buffer/boolean.rs @@ -290,7 +290,8 @@ impl BooleanBuffer { /// on the relevant bits; the input `u64` values may contain irrelevant bits /// and may be processed differently on different endian architectures. /// * `op` may be called with input bits outside the requested range. - /// * The returned `BooleanBuffer` always has zero offset. + /// * Returned `BooleanBuffer` may have non zero offset + /// * Returned `BooleanBuffer` may have bits set outside the requested range /// /// # See Also /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a single input buffer. @@ -305,19 +306,28 @@ impl BooleanBuffer { /// let result = BooleanBuffer::from_bitwise_binary_op( /// &left, 0, &right, 0, 12, |a, b| a & b /// ); - /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]); + /// assert_eq!(result.len(), 12); + /// for i in 0..12 { + /// assert_eq!(result.value(i), left.as_slice()[i / 8] >> (i % 8) & 1 == 1 + /// && right.as_slice()[i / 8] >> (i % 8) & 1 == 1); + /// } /// ``` /// /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte slices /// ``` - /// # use arrow_buffer::BooleanBuffer; + /// # use arrow_buffer::{BooleanBuffer, bit_util}; /// let left = [0b11001100u8, 0b10111010u8]; /// let right = [0b10101010u8, 0b11011100u8]; /// // OR of bits 4..16 from left and bits 0..12 from right /// let result = BooleanBuffer::from_bitwise_binary_op( /// &left, 4, &right, 0, 12, |a, b| a | b /// ); - /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]); + /// assert_eq!(result.len(), 12); + /// for i in 0..12 { + /// let l = bit_util::get_bit(&left, 4 + i); + /// let r = bit_util::get_bit(&right, i); + /// assert_eq!(result.value(i), l | r); + /// } /// ``` pub fn from_bitwise_binary_op( left: impl AsRef<[u8]>, @@ -332,39 +342,74 @@ impl BooleanBuffer { { let left = left.as_ref(); let right = right.as_ref(); - // try fast path for aligned input - // If the underlying buffers are aligned to u64 we can apply the operation directly on the u64 slices - // to improve performance. - if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 { - // align to byte boundary - let left = &left[left_offset_in_bits / 8..]; - let right = &right[right_offset_in_bits / 8..]; - - unsafe { - let (left_prefix, left_u64s, left_suffix) = left.align_to::(); - let (right_prefix, right_u64s, right_suffix) = right.align_to::(); - // if there is no prefix or suffix, both buffers are aligned and - // we can do the operation directly on u64s. - // TODO: consider `slice::as_chunks` and `u64::from_le_bytes` when MSRV reaches 1.88. - // https://github.com/apache/arrow-rs/pull/9022#discussion_r2639949361 - if left_prefix.is_empty() - && right_prefix.is_empty() - && left_suffix.is_empty() - && right_suffix.is_empty() - { - let result_u64s = left_u64s + + // When both offsets share the same sub-64-bit alignment, we can + // align both to 64-bit boundaries and zip u64s directly, + // avoiding BitChunks bit-shifting entirely. + if left_offset_in_bits % 64 == right_offset_in_bits % 64 { + let bit_offset = left_offset_in_bits % 64; + let left_end = left_offset_in_bits + len_in_bits; + let right_end = right_offset_in_bits + len_in_bits; + + let left_aligned = left_offset_in_bits & !63; + let right_aligned = right_offset_in_bits & !63; + + let left_end_bytes = (bit_util::ceil(left_end, 64) * 8).min(left.len()); + let right_end_bytes = (bit_util::ceil(right_end, 64) * 8).min(right.len()); + + let left_slice = &left[left_aligned / 8..left_end_bytes]; + let right_slice = &right[right_aligned / 8..right_end_bytes]; + + let (lp, left_u64s, ls) = unsafe { left_slice.align_to::() }; + let (rp, right_u64s, rs) = unsafe { right_slice.align_to::() }; + + match (lp, ls, rp, rs) { + ([], [], [], []) => { + let result_u64s: Vec = left_u64s .iter() .zip(right_u64s.iter()) .map(|(l, r)| op(*l, *r)) - .collect::>(); - return BooleanBuffer { - buffer: Buffer::from(result_u64s), - bit_offset: 0, - bit_len: len_in_bits, - }; + .collect(); + return BooleanBuffer::new(result_u64s.into(), bit_offset, len_in_bits); } + ([], left_suf, [], right_suf) => { + let left_iter = left_u64s + .iter() + .cloned() + .chain((!left_suf.is_empty()).then(|| read_u64(left_suf))); + let right_iter = right_u64s + .iter() + .cloned() + .chain((!right_suf.is_empty()).then(|| read_u64(right_suf))); + let result_u64s: Vec = + left_iter.zip(right_iter).map(|(l, r)| op(l, r)).collect(); + return BooleanBuffer::new(result_u64s.into(), bit_offset, len_in_bits); + } + _ => {} } + + // Memory not u64-aligned, use chunks_exact fallback + let left_chunks = left_slice.chunks_exact(8); + let left_rem = left_chunks.remainder(); + let right_chunks = right_slice.chunks_exact(8); + let right_rem = right_chunks.remainder(); + + let left_iter = left_chunks.map(|c| u64::from_le_bytes(c.try_into().unwrap())); + let right_iter = right_chunks.map(|c| u64::from_le_bytes(c.try_into().unwrap())); + + let result_u64s: Vec = if left_rem.is_empty() && right_rem.is_empty() { + left_iter.zip(right_iter).map(|(l, r)| op(l, r)).collect() + } else { + left_iter + .chain(Some(read_u64(left_rem))) + .zip(right_iter.chain(Some(read_u64(right_rem)))) + .map(|(l, r)| op(l, r)) + .collect() + }; + return BooleanBuffer::new(result_u64s.into(), bit_offset, len_in_bits); } + + // Different sub-64-bit alignments: bit-shifting unavoidable let left_chunks = BitChunks::new(left, left_offset_in_bits, len_in_bits); let right_chunks = BitChunks::new(right, right_offset_in_bits, len_in_bits); @@ -479,7 +524,7 @@ impl BooleanBuffer { } } - /// Returns a [`Buffer`] containing the sliced contents of this [`BooleanBuffer`] + /// Returns a new [`Buffer`] containing the sliced contents of this [`BooleanBuffer`] /// /// Equivalent to `self.buffer.bit_slice(self.offset, self.len)` pub fn sliced(&self) -> Buffer { @@ -994,6 +1039,127 @@ mod tests { } } + #[test] + fn test_from_bitwise_binary_op_same_mod_64_unaligned_fallback() { + // Exercise the shared-alignment fast path when both inputs are misaligned in memory, + // forcing the chunks_exact fallback instead of align_to::(). + let left_bytes = [ + 0, // dropped so `&left_bytes[1..]` is not u64-aligned in memory + 0b1101_0010, // logical left bits start at bit 3 of this byte + 0b0110_1101, + 0b1010_0111, + 0b0001_1110, + 0b1110_0001, + 0b0101_1010, + 0b1001_0110, + 0b0011_1100, + 0b1011_0001, + 0b0100_1110, + 0b1100_0011, + 0b0111_1000, + ]; + let right_bytes = [ + 0, // dropped so `&right_bytes[1..]` is not u64-aligned in memory + 0b1010_1100, // logical right bits start at bit 67 == bit 3 of the second 64-bit block + 0b0101_0011, + 0b1111_0000, + 0b0011_1010, + 0b1000_1111, + 0b0110_0101, + 0b1101_1000, + 0b0001_0111, + 0b1110_0100, + 0b0010_1101, + 0b1001_1010, + 0b0111_0001, + ]; + + let left = &left_bytes[1..]; + let right = &right_bytes[1..]; + + let left_offset = 3; + let right_offset = 67; // same mod 64 as left_offset, so this takes the shared-alignment path + let len = 24; // leaves a partial trailing chunk, so this covers the non-empty remainder branch + + let result = BooleanBuffer::from_bitwise_binary_op( + left, + left_offset, + right, + right_offset, + len, + |a, b| a & b, + ); + let expected = (0..len) + .map(|i| { + bit_util::get_bit(left, left_offset + i) + & bit_util::get_bit(right, right_offset + i) + }) + .collect::(); + + assert_eq!(result, expected); + assert_eq!(result.offset(), left_offset % 64); + } + + #[test] + fn test_from_bitwise_binary_op_same_mod_64_unaligned_fallback_no_remainder() { + // Force the chunks_exact fallback with an exact 8-byte chunk so both remainders are empty. + let left_bytes = [ + 0, // dropped so `&left_bytes[1..]` is not u64-aligned in memory + 0b1010_1100, // logical left bits start at bit 3 of this byte + 0b0110_1001, + 0b1101_0011, + 0b0001_1110, + 0b1110_0101, + 0b0101_1000, + 0b1001_0111, + 0b0011_1101, + ]; + let right_bytes = [ + 0, // dropped so `&right_bytes[1..]` is not u64-aligned in memory + 0b0111_0010, // logical right bits start at bit 67 == bit 3 of the second 64-bit block + 0b1010_1001, + 0b0101_1110, + 0b1100_0011, + 0b0011_1011, + 0b1000_1110, + 0b1111_0001, + 0b0100_1101, + 0b1011_0110, + 0b0001_1011, + 0b1101_0100, + 0b0110_0011, + 0b1001_1110, + 0b0010_1001, + 0b1110_0110, + 0b0101_0001, + ]; + + let left = &left_bytes[1..]; + let right = &right_bytes[1..]; + + let left_offset = 3; + let right_offset = 67; // same mod 64 as left_offset, so this takes the shared-alignment path + let len = 61; // 3 + 61 = 64, so the aligned slices are exactly one 8-byte chunk with empty remainders + + let result = BooleanBuffer::from_bitwise_binary_op( + left, + left_offset, + right, + right_offset, + len, + |a, b| a | b, + ); + let expected = (0..len) + .map(|i| { + bit_util::get_bit(left, left_offset + i) + | bit_util::get_bit(right, right_offset + i) + }) + .collect::(); + + assert_eq!(result, expected); + assert_eq!(result.offset(), left_offset % 64); + } + #[test] fn test_extend_trusted_len_sets_byte_len() { // Ensures extend_trusted_len keeps the underlying byte length in sync with bit length. diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs index 36efe876432d..793bbaf6c2e7 100644 --- a/arrow-buffer/src/buffer/ops.rs +++ b/arrow-buffer/src/buffer/ops.rs @@ -143,6 +143,9 @@ where /// Apply a bitwise and to two inputs and return the result as a Buffer. /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits. +/// +/// # See Also +/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly pub fn buffer_bin_and( left: &Buffer, left_offset_in_bits: usize, @@ -150,19 +153,27 @@ pub fn buffer_bin_and( right_offset_in_bits: usize, len_in_bits: usize, ) -> Buffer { - BooleanBuffer::from_bitwise_binary_op( + let result = BooleanBuffer::from_bitwise_binary_op( left, left_offset_in_bits, right, right_offset_in_bits, len_in_bits, |a, b| a & b, - ) - .into_inner() + ); + // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer. + if result.offset() == 0 { + result.into_inner() + } else { + result.sliced() + } } /// Apply a bitwise or to two inputs and return the result as a Buffer. /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits. +/// +/// # See Also +/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly pub fn buffer_bin_or( left: &Buffer, left_offset_in_bits: usize, @@ -170,19 +181,27 @@ pub fn buffer_bin_or( right_offset_in_bits: usize, len_in_bits: usize, ) -> Buffer { - BooleanBuffer::from_bitwise_binary_op( + let result = BooleanBuffer::from_bitwise_binary_op( left, left_offset_in_bits, right, right_offset_in_bits, len_in_bits, |a, b| a | b, - ) - .into_inner() + ); + // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer. + if result.offset() == 0 { + result.into_inner() + } else { + result.sliced() + } } /// Apply a bitwise xor to two inputs and return the result as a Buffer. /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits. +/// +/// # See Also +/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly pub fn buffer_bin_xor( left: &Buffer, left_offset_in_bits: usize, @@ -190,19 +209,27 @@ pub fn buffer_bin_xor( right_offset_in_bits: usize, len_in_bits: usize, ) -> Buffer { - BooleanBuffer::from_bitwise_binary_op( + let result = BooleanBuffer::from_bitwise_binary_op( left, left_offset_in_bits, right, right_offset_in_bits, len_in_bits, |a, b| a ^ b, - ) - .into_inner() + ); + // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer. + if result.offset() == 0 { + result.into_inner() + } else { + result.sliced() + } } /// Apply a bitwise and_not to two inputs and return the result as a Buffer. /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits. +/// +/// # See Also +/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly pub fn buffer_bin_and_not( left: &Buffer, left_offset_in_bits: usize, @@ -210,19 +237,70 @@ pub fn buffer_bin_and_not( right_offset_in_bits: usize, len_in_bits: usize, ) -> Buffer { - BooleanBuffer::from_bitwise_binary_op( + let result = BooleanBuffer::from_bitwise_binary_op( left, left_offset_in_bits, right, right_offset_in_bits, len_in_bits, |a, b| a & !b, - ) - .into_inner() + ); + // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer. + if result.offset() == 0 { + result.into_inner() + } else { + result.sliced() + } } /// Apply a bitwise not to one input and return the result as a Buffer. /// The input is treated as a bitmap, meaning that offset and length are specified in number of bits. +/// +/// # See Also +/// * [`BooleanBuffer::from_bitwise_unary_op`] for creating `BooleanBuffer`s directly pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: usize) -> Buffer { BooleanBuffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, |a| !a).into_inner() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_buffer_bin_ops_return_zero_offset_buffers() { + let left = Buffer::from(vec![0b1010_1100, 0b0110_1001]); + let right = Buffer::from(vec![0, 0, 0, 0, 0, 0, 0, 0, 0b1110_0101, 0b0101_1000]); + + let left_offset = 1; + let right_offset = 65; // same mod 64 as left_offset, so from_bitwise_binary_op returns non-zero offset + let len = 7; + + // Reuse the same offset scenario for all four binary wrappers: + // each wrapper should return the logically equivalent offset-0 Buffer, + // even though the underlying BooleanBuffer result has offset 1. + for (op, wrapper) in [ + ( + (|a, b| a & b) as fn(u64, u64) -> u64, + buffer_bin_and as fn(&Buffer, usize, &Buffer, usize, usize) -> Buffer, + ), + (((|a, b| a | b) as fn(u64, u64) -> u64), buffer_bin_or), + (((|a, b| a ^ b) as fn(u64, u64) -> u64), buffer_bin_xor), + (((|a, b| a & !b) as fn(u64, u64) -> u64), buffer_bin_and_not), + ] { + let unsliced = BooleanBuffer::from_bitwise_binary_op( + &left, + left_offset, + &right, + right_offset, + len, + op, + ); + assert_eq!(unsliced.offset(), 1); + + let result = wrapper(&left, left_offset, &right, right_offset, len); + + assert_eq!(result, unsliced.sliced()); + assert_eq!(result.len(), 1); + } + } +} From 39dda22517e6369d006aaac5eaac53d9cd72c29b Mon Sep 17 00:00:00 2001 From: Peter L Date: Sat, 21 Mar 2026 00:14:56 +1030 Subject: [PATCH 62/80] Make Sbbf Constructers Public (#9569) # Which issue does this PR close? None # Rationale for this change We want to use the SBBF Bloom Filter, but need to construct/serialize it manually. Currently there is no way to create a new `Sbbf` outside of this crate. Alongside this: we want to store the `Sbbf` in a `FixedSizedBinary` column for some fancy indexing. # What changes are included in this PR? Some methods become public # Are these changes tested? N/A # Are there any user-facing changes? Yes, we add a few more public methods to the `Sbbf` struct --- parquet/src/bloom_filter/mod.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/parquet/src/bloom_filter/mod.rs b/parquet/src/bloom_filter/mod.rs index 1f77e492ccf1..933b5a269fff 100644 --- a/parquet/src/bloom_filter/mod.rs +++ b/parquet/src/bloom_filter/mod.rs @@ -232,8 +232,10 @@ fn read_bloom_filter_header_and_length_from_bytes( Ok((header, (total_length - prot.as_slice().len()) as u64)) } -pub(crate) const BITSET_MIN_LENGTH: usize = 32; -pub(crate) const BITSET_MAX_LENGTH: usize = 128 * 1024 * 1024; +/// The minimum number of bytes for a bloom filter bitset. +pub const BITSET_MIN_LENGTH: usize = 32; +/// The maximum number of bytes for a bloom filter bitset. +pub const BITSET_MAX_LENGTH: usize = 128 * 1024 * 1024; #[inline] fn optimal_num_of_bytes(num_bytes: usize) -> usize { @@ -255,7 +257,7 @@ fn num_of_bits_from_ndv_fpp(ndv: u64, fpp: f64) -> usize { impl Sbbf { /// Create a new [Sbbf] with given number of distinct values and false positive probability. /// Will return an error if `fpp` is greater than or equal to 1.0 or less than 0.0. - pub(crate) fn new_with_ndv_fpp(ndv: u64, fpp: f64) -> Result { + pub fn new_with_ndv_fpp(ndv: u64, fpp: f64) -> Result { if !(0.0..1.0).contains(&fpp) { return Err(ParquetError::General(format!( "False positive probability must be between 0.0 and 1.0, got {fpp}" @@ -267,7 +269,7 @@ impl Sbbf { /// Create a new [Sbbf] with given number of bytes, the exact number of bytes will be adjusted /// to the next power of two bounded by [BITSET_MIN_LENGTH] and [BITSET_MAX_LENGTH]. - pub(crate) fn new_with_num_of_bytes(num_bytes: usize) -> Self { + pub fn new_with_num_of_bytes(num_bytes: usize) -> Self { let num_bytes = optimal_num_of_bytes(num_bytes); assert_eq!(num_bytes % size_of::(), 0); let num_blocks = num_bytes / size_of::(); @@ -275,7 +277,8 @@ impl Sbbf { Self(bitset) } - pub(crate) fn new(bitset: &[u8]) -> Self { + /// Creates a new [Sbbf] from a raw byte slice. + pub fn new(bitset: &[u8]) -> Self { let data = bitset .chunks_exact(4 * 8) .map(|chunk| { @@ -304,7 +307,7 @@ impl Sbbf { /// Write the bitset in serialized form to the writer. #[cfg(not(target_endian = "little"))] - fn write_bitset(&self, mut writer: W) -> Result<(), ParquetError> { + pub fn write_bitset(&self, mut writer: W) -> Result<(), ParquetError> { for block in &self.0 { writer .write_all(block.to_le_bytes().as_slice()) @@ -317,7 +320,7 @@ impl Sbbf { /// Write the bitset in serialized form to the writer. #[cfg(target_endian = "little")] - fn write_bitset(&self, mut writer: W) -> Result<(), ParquetError> { + pub fn write_bitset(&self, mut writer: W) -> Result<(), ParquetError> { // Safety: Block is repr(transparent) and [u32; 8] can be reinterpreted as [u8; 32]. let slice = unsafe { std::slice::from_raw_parts( @@ -411,7 +414,7 @@ impl Sbbf { } /// Check if an [AsBytes] value is probably present or definitely absent in the filter - pub fn check(&self, value: &T) -> bool { + pub fn check(&self, value: &T) -> bool { self.check_hash(hash_as_bytes(value)) } From bc74c7192a48bd36a9e79b883a3482af396a2350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 20 Mar 2026 14:54:53 +0100 Subject: [PATCH 63/80] feat(parquet): add content defined chunking for arrow writer (#9450) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Which issue does this PR close? - Closes #NNN. # Rationale for this change Rust implementation of https://github.com/apache/arrow/pull/45360 Traditional Parquet writing splits data pages at fixed sizes, so a single inserted or deleted row causes all subsequent pages to shift — resulting in nearly every byte being re-uploaded to content-addressable storage (CAS) systems. CDC determines page boundaries via a rolling gearhash over column values, so unchanged data produces identical pages across different writes enabling storage cost reductions and faster upload times. See more details in https://huggingface.co/blog/parquet-cdc The original C++ implementation https://github.com/apache/arrow/pull/45360 Evaluation tool https://github.com/huggingface/dataset-dedupe-estimator where I already integrated this PR to verify that deduplication effectiveness is on par with parquet-cpp (lower is better): image # What changes are included in this PR? - **Content-defined chunker** at `parquet/src/column/chunker/` - **Arrow writer integration** integrated in `ArrowColumnWriter` - **Writer properties** via `CdcOptions` struct (`min_chunk_size`, `max_chunk_size`, `norm_level`) - **ColumnDescriptor**: added `repeated_ancestor_def_level` field to for nested field values iteration # Are these changes tested? Yes — unit tests are located in `cdc.rs` and ported from the C++ implementation. # Are there any user-facing changes? New **experimental** API, disabled by default — no behavior change for existing code: ```rust // Simple toggle (256 KiB min, 1 MiB max, norm_level 0) let props = WriterProperties::builder() .set_content_defined_chunking(true) .build(); // Excpliti CDC parameters let props = WriterProperties::builder() .set_cdc_options(CdcOptions { min_chunk_size: 128 * 1024, max_chunk_size: 512 * 1024, norm_level: 1 }) .build(); ``` --------- Co-authored-by: Ed Seidl --- parquet/benches/arrow_writer.rs | 43 +- parquet/src/arrow/arrow_writer/levels.rs | 196 ++ parquet/src/arrow/arrow_writer/mod.rs | 82 +- parquet/src/column/chunker/cdc.rs | 2156 +++++++++++++++++++ parquet/src/column/chunker/cdc_codegen.py | 118 + parquet/src/column/chunker/cdc_generated.rs | 558 +++++ parquet/src/column/chunker/mod.rs | 40 + parquet/src/column/mod.rs | 2 + parquet/src/column/writer/mod.rs | 11 +- parquet/src/file/properties.rs | 101 + parquet/src/lib.rs | 22 + parquet/src/schema/types.rs | 157 +- 12 files changed, 3447 insertions(+), 39 deletions(-) create mode 100644 parquet/src/column/chunker/cdc.rs create mode 100644 parquet/src/column/chunker/cdc_codegen.py create mode 100644 parquet/src/column/chunker/cdc_generated.rs create mode 100644 parquet/src/column/chunker/mod.rs diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs index b92f0788b2fc..2381941897c7 100644 --- a/parquet/benches/arrow_writer.rs +++ b/parquet/benches/arrow_writer.rs @@ -19,7 +19,7 @@ extern crate criterion; use criterion::{Bencher, Criterion, Throughput}; -use parquet::arrow::arrow_writer::{ArrowRowGroupWriterFactory, compute_leaves}; +use parquet::arrow::ArrowWriter; use parquet::basic::{Compression, ZstdLevel}; extern crate arrow; @@ -33,10 +33,8 @@ use arrow::datatypes::*; use arrow::util::bench_util::{create_f16_array, create_f32_array, create_f64_array}; use arrow::{record_batch::RecordBatch, util::data_gen::*}; use arrow_array::RecordBatchOptions; -use parquet::arrow::ArrowSchemaConverter; use parquet::errors::Result; -use parquet::file::properties::{WriterProperties, WriterVersion}; -use parquet::file::writer::SerializedFileWriter; +use parquet::file::properties::{CdcOptions, WriterProperties, WriterVersion}; fn create_primitive_bench_batch( size: usize, @@ -342,39 +340,21 @@ fn write_batch_with_option( batch: &RecordBatch, props: Option, ) -> Result<()> { - let mut file = Empty::default(); - let props = Arc::new(props.unwrap_or_default()); - let parquet_schema = ArrowSchemaConverter::new() - .with_coerce_types(props.coerce_types()) - .convert(batch.schema_ref())?; - let writer = SerializedFileWriter::new(&mut file, parquet_schema.root_schema_ptr(), props)?; - let row_group_writer_factory = ArrowRowGroupWriterFactory::new(&writer, batch.schema()); + let props = props.unwrap_or_default(); bench.iter(|| { - let mut row_group = row_group_writer_factory.create_column_writers(0).unwrap(); - - let mut writers = row_group.iter_mut(); - for (field, column) in batch - .schema() - .fields() - .iter() - .zip(black_box(batch).columns()) - { - for leaf in compute_leaves(field.as_ref(), column).unwrap() { - writers.next().unwrap().write(&leaf).unwrap() - } - } - - for writer in row_group.into_iter() { - black_box(writer.close()).unwrap(); - } + let mut file = Empty::default(); + let mut writer = + ArrowWriter::try_new(&mut file, batch.schema(), Some(props.clone())).unwrap(); + writer.write(black_box(batch)).unwrap(); + black_box(writer.close()).unwrap(); }); Ok(()) } fn create_batches() -> Vec<(&'static str, RecordBatch)> { - const BATCH_SIZE: usize = 4096; + const BATCH_SIZE: usize = 1024 * 1024; let mut batches = vec![]; @@ -440,6 +420,11 @@ fn create_writer_props() -> Vec<(&'static str, WriterProperties)> { .build(); props.push(("zstd_parquet_2", prop)); + let prop = WriterProperties::builder() + .set_content_defined_chunking(Some(CdcOptions::default())) + .build(); + props.push(("cdc", prop)); + props } diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs index 0ff2137d907e..d1da24872c49 100644 --- a/parquet/src/arrow/arrow_writer/levels.rs +++ b/parquet/src/arrow/arrow_writer/levels.rs @@ -40,6 +40,7 @@ //! //! \[1\] [parquet-format#nested-encoding](https://github.com/apache/parquet-format#nested-encoding) +use crate::column::chunker::CdcChunk; use crate::errors::{ParquetError, Result}; use arrow_array::cast::AsArray; use arrow_array::{Array, ArrayRef, OffsetSizeTrait}; @@ -801,11 +802,58 @@ impl ArrayLevels { pub fn non_null_indices(&self) -> &[usize] { &self.non_null_indices } + + /// Create a sliced view of this `ArrayLevels` for a CDC chunk. + /// + /// Note: `def_levels`, `rep_levels`, and `non_null_indices` are copied (not zero-copy), + /// while `array` is sliced without copying. + pub(crate) fn slice_for_chunk(&self, chunk: &CdcChunk) -> Self { + let level_offset = chunk.level_offset; + let num_levels = chunk.num_levels; + let value_offset = chunk.value_offset; + let num_values = chunk.num_values; + let def_levels = self + .def_levels + .as_ref() + .map(|levels| levels[level_offset..level_offset + num_levels].to_vec()); + let rep_levels = self + .rep_levels + .as_ref() + .map(|levels| levels[level_offset..level_offset + num_levels].to_vec()); + + // Filter non_null_indices to [value_offset, value_offset + num_values) + // and shift by -value_offset. Use binary search since the slice is sorted. + let value_end = value_offset + num_values; + let start = self + .non_null_indices + .partition_point(|&idx| idx < value_offset); + let end = self + .non_null_indices + .partition_point(|&idx| idx < value_end); + let non_null_indices: Vec = self.non_null_indices[start..end] + .iter() + .map(|&idx| idx - value_offset) + .collect(); + + let array = self.array.slice(value_offset, num_values); + let logical_nulls = array.logical_nulls(); + + Self { + def_levels, + rep_levels, + non_null_indices, + max_def_level: self.max_def_level, + max_rep_level: self.max_rep_level, + array, + logical_nulls, + } + } } #[cfg(test)] mod tests { use super::*; + use crate::column::chunker::CdcChunk; use arrow_array::builder::*; use arrow_array::types::Int32Type; @@ -2096,4 +2144,152 @@ mod tests { let v = Arc::new(array) as ArrayRef; LevelInfoBuilder::try_new(field, Default::default(), &v).unwrap() } + + #[test] + fn test_slice_for_chunk_flat() { + // Case 1: required field (max_def_level=0, no def/rep levels stored). + // Array has 6 values; all are non-null so non_null_indices covers every position. + // The chunk selects value_offset=2, num_values=3 → the sub-array [3, 4, 5]. + // Since there are no levels, num_levels=0 and level_offset are irrelevant. + // non_null_indices [0,1,2,3,4,5] filtered to [2,4) and shifted by -2 → [0,1,2]. + let array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])); + let logical_nulls = array.logical_nulls(); + let levels = ArrayLevels { + def_levels: None, + rep_levels: None, + non_null_indices: vec![0, 1, 2, 3, 4, 5], + max_def_level: 0, + max_rep_level: 0, + array, + logical_nulls, + }; + let sliced = levels.slice_for_chunk(&CdcChunk { + level_offset: 0, + num_levels: 0, + value_offset: 2, + num_values: 3, + }); + assert!(sliced.def_levels.is_none()); + assert!(sliced.rep_levels.is_none()); + assert_eq!(sliced.non_null_indices, vec![0, 1, 2]); + assert_eq!(sliced.array.len(), 3); + + // Case 2: optional field (max_def_level=1, def levels present, no rep levels). + // Array: [Some(1), None, Some(3), None, Some(5), Some(6)] + // def_levels: [1, 0, 1, 0, 1, 1] (1=non-null, 0=null) + // non_null_indices: [0, 2, 4, 5] (array positions of the four non-null values) + // + // The chunk selects level_offset=1, num_levels=3, value_offset=1, num_values=3: + // - def_levels[1..4] = [0, 1, 0] → null, non-null, null + // - sub-array slice(1, 3) = [None, Some(3), None] + // - non_null_indices filtered to [value_offset=1, value_end=4): only index 2 qualifies, + // shifted by -1 → [1] (position of Some(3) within the sliced sub-array) + let array: ArrayRef = Arc::new(Int32Array::from(vec![ + Some(1), + None, + Some(3), + None, + Some(5), + Some(6), + ])); + let logical_nulls = array.logical_nulls(); + let levels = ArrayLevels { + def_levels: Some(vec![1, 0, 1, 0, 1, 1]), + rep_levels: None, + non_null_indices: vec![0, 2, 4, 5], + max_def_level: 1, + max_rep_level: 0, + array, + logical_nulls, + }; + let sliced = levels.slice_for_chunk(&CdcChunk { + level_offset: 1, + num_levels: 3, + value_offset: 1, + num_values: 3, + }); + assert_eq!(sliced.def_levels, Some(vec![0, 1, 0])); + assert!(sliced.rep_levels.is_none()); + assert_eq!(sliced.non_null_indices, vec![1]); + assert_eq!(sliced.array.len(), 3); + } + + #[test] + fn test_slice_for_chunk_nested() { + // [[1,2],[3],[4,5]]: def=[2,2,2,2,2], rep=[0,1,0,0,1] + // Slice levels 2..5 (def=[2,2,2], rep=[0,0,1]), values 2..5 + let array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])); + let logical_nulls = array.logical_nulls(); + let levels = ArrayLevels { + def_levels: Some(vec![2, 2, 2, 2, 2]), + rep_levels: Some(vec![0, 1, 0, 0, 1]), + non_null_indices: vec![0, 1, 2, 3, 4], + max_def_level: 2, + max_rep_level: 1, + array, + logical_nulls, + }; + let sliced = levels.slice_for_chunk(&CdcChunk { + level_offset: 2, + num_levels: 3, + value_offset: 2, + num_values: 3, + }); + assert_eq!(sliced.def_levels, Some(vec![2, 2, 2])); + assert_eq!(sliced.rep_levels, Some(vec![0, 0, 1])); + // [0,1,2,3,4] filtered to [2,5) → [2,3,4] → shifted -2 → [0,1,2] + assert_eq!(sliced.non_null_indices, vec![0, 1, 2]); + assert_eq!(sliced.array.len(), 3); + } + + #[test] + fn test_slice_for_chunk_non_null_indices_boundary() { + // [1, null, 3]: non_null_indices=[0, 2]; test inclusive lower / exclusive upper bounds + let array: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])); + let logical_nulls = array.logical_nulls(); + let levels = ArrayLevels { + def_levels: Some(vec![1, 0, 1]), + rep_levels: None, + non_null_indices: vec![0, 2], + max_def_level: 1, + max_rep_level: 0, + array, + logical_nulls, + }; + assert_eq!( + levels + .slice_for_chunk(&CdcChunk { + level_offset: 0, + num_levels: 1, + value_offset: 0, + num_values: 1 + }) + .non_null_indices, + vec![0] + ); + // idx 2 in range [1,3), shifted -1 → 1 + assert_eq!( + levels + .slice_for_chunk(&CdcChunk { + level_offset: 1, + num_levels: 2, + value_offset: 1, + num_values: 2 + }) + .non_null_indices, + vec![1] + ); + // idx 2 excluded from [1,2) + assert_eq!( + levels + .slice_for_chunk(&CdcChunk { + level_offset: 1, + num_levels: 1, + value_offset: 1, + num_values: 1 + }) + .non_null_indices, + Vec::::new() + ); + } } diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 979988eebc05..2ef71d5745a2 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -17,6 +17,8 @@ //! Contains writer which writes arrow data into parquet data. +use crate::column::chunker::ContentDefinedChunker; + use bytes::Bytes; use std::io::{Read, Write}; use std::iter::Peekable; @@ -192,6 +194,9 @@ pub struct ArrowWriter { /// The maximum size in bytes for a row group, or None for unlimited max_row_group_bytes: Option, + + /// CDC chunkers persisted across row groups (one per leaf column). + cdc_chunkers: Option>, } impl std::fmt::Debug for ArrowWriter { @@ -261,6 +266,18 @@ impl ArrowWriter { let row_group_writer_factory = ArrowRowGroupWriterFactory::new(&file_writer, arrow_schema.clone()); + let cdc_chunkers = props_ptr + .content_defined_chunking() + .map(|opts| { + file_writer + .schema_descr() + .columns() + .iter() + .map(|desc| ContentDefinedChunker::new(desc, opts)) + .collect::>>() + }) + .transpose()?; + Ok(Self { writer: file_writer, in_progress: None, @@ -268,6 +285,7 @@ impl ArrowWriter { row_group_writer_factory, max_row_group_row_count, max_row_group_bytes, + cdc_chunkers, }) } @@ -383,7 +401,10 @@ impl ArrowWriter { } } - in_progress.write(batch)?; + match self.cdc_chunkers.as_mut() { + Some(chunkers) => in_progress.write_with_chunkers(batch, chunkers)?, + None => in_progress.write(batch)?, + } let should_flush = self .max_row_group_row_count @@ -869,20 +890,50 @@ enum ArrowColumnWriterImpl { impl ArrowColumnWriter { /// Write an [`ArrowLeafColumn`] pub fn write(&mut self, col: &ArrowLeafColumn) -> Result<()> { + self.write_internal(&col.0) + } + + /// Write with content-defined chunking, inserting page flushes at chunk boundaries. + fn write_with_chunker( + &mut self, + col: &ArrowLeafColumn, + chunker: &mut ContentDefinedChunker, + ) -> Result<()> { + let levels = &col.0; + let chunks = + chunker.get_arrow_chunks(levels.def_levels(), levels.rep_levels(), levels.array())?; + + let num_chunks = chunks.len(); + for (i, chunk) in chunks.iter().enumerate() { + let chunk_levels = levels.slice_for_chunk(chunk); + self.write_internal(&chunk_levels)?; + + // Add a page break after each chunk except the last + if i + 1 < num_chunks { + match &mut self.writer { + ArrowColumnWriterImpl::Column(c) => c.add_data_page()?, + ArrowColumnWriterImpl::ByteArray(c) => c.add_data_page()?, + } + } + } + Ok(()) + } + + fn write_internal(&mut self, levels: &ArrayLevels) -> Result<()> { match &mut self.writer { ArrowColumnWriterImpl::Column(c) => { - let leaf = col.0.array(); + let leaf = levels.array(); match leaf.as_any_dictionary_opt() { Some(dictionary) => { let materialized = arrow_select::take::take(dictionary.values(), dictionary.keys(), None)?; - write_leaf(c, &materialized, &col.0)? + write_leaf(c, &materialized, levels)? } - None => write_leaf(c, leaf, &col.0)?, + None => write_leaf(c, leaf, levels)?, }; } ArrowColumnWriterImpl::ByteArray(c) => { - write_primitive(c, col.0.array().as_ref(), &col.0)?; + write_primitive(c, levels.array().as_ref(), levels)?; } } Ok(()) @@ -958,7 +1009,26 @@ impl ArrowRowGroupWriter { let mut writers = self.writers.iter_mut(); for (field, column) in self.schema.fields().iter().zip(batch.columns()) { for leaf in compute_leaves(field.as_ref(), column)? { - writers.next().unwrap().write(&leaf)? + writers.next().unwrap().write(&leaf)?; + } + } + Ok(()) + } + + fn write_with_chunkers( + &mut self, + batch: &RecordBatch, + chunkers: &mut [ContentDefinedChunker], + ) -> Result<()> { + self.buffered_rows += batch.num_rows(); + let mut writers = self.writers.iter_mut(); + let mut chunkers = chunkers.iter_mut(); + for (field, column) in self.schema.fields().iter().zip(batch.columns()) { + for leaf in compute_leaves(field.as_ref(), column)? { + writers + .next() + .unwrap() + .write_with_chunker(&leaf, chunkers.next().unwrap())?; } } Ok(()) diff --git a/parquet/src/column/chunker/cdc.rs b/parquet/src/column/chunker/cdc.rs new file mode 100644 index 000000000000..f21f58780a6a --- /dev/null +++ b/parquet/src/column/chunker/cdc.rs @@ -0,0 +1,2156 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::errors::{ParquetError, Result}; +use crate::file::properties::CdcOptions; +use crate::schema::types::ColumnDescriptor; + +use super::CdcChunk; +use super::cdc_generated::{GEARHASH_TABLE, NUM_GEARHASH_TABLES}; + +/// CDC (Content-Defined Chunking) divides data into variable-sized chunks based on +/// content rather than fixed-size boundaries. +/// +/// For example, given this sequence of values in a column: +/// +/// ```text +/// File1: [1,2,3, 4,5,6, 7,8,9] +/// chunk1 chunk2 chunk3 +/// ``` +/// +/// If a value is inserted between 3 and 4: +/// +/// ```text +/// File2: [1,2,3,0, 4,5,6, 7,8,9] +/// new-chunk chunk2 chunk3 +/// ``` +/// +/// The chunking process adjusts to maintain stable boundaries across data modifications. +/// Each chunk defines a new parquet data page which is contiguously written to the file. +/// Since each page is compressed independently, the files' contents look like: +/// +/// ```text +/// File1: [Page1][Page2][Page3]... +/// File2: [Page4][Page2][Page3]... +/// ``` +/// +/// When uploaded to a content-addressable storage (CAS) system, the CAS splits the byte +/// stream into content-defined blobs with unique identifiers. Identical blobs are stored +/// only once, so Page2 and Page3 are deduplicated across File1 and File2. +/// +/// ## Implementation +/// +/// Only the parquet writer needs to be aware of content-defined chunking; the reader is +/// unaffected. Each parquet column writer holds a `ContentDefinedChunker` instance +/// depending on the writer's properties. The chunker's state is maintained across the +/// entire column without being reset between pages and row groups. +/// +/// This implements a [FastCDC]-inspired algorithm using gear hashing. The input data is +/// fed byte-by-byte into a rolling hash; when the hash matches a predefined mask, a new +/// chunk boundary candidate is recorded. To reduce the exponential variance of chunk +/// sizes inherent in a single gear hash, the algorithm requires **8 consecutive mask +/// matches** — each against a different pre-computed gear hash table — before committing +/// to a boundary. This [central-limit-theorem normalization] makes the chunk size +/// distribution approximately normal between `min_chunk_size` and `max_chunk_size`. +/// +/// The chunker receives the record-shredded column data (def_levels, rep_levels, values) +/// and iterates over the (def_level, rep_level, value) triplets while adjusting the +/// column-global rolling hash. Whenever the rolling hash matches, the chunker creates a +/// new chunk. For nested data (lists, maps, structs) chunk boundaries are restricted to +/// top-level record boundaries (`rep_level == 0`) so that a nested row is never split +/// across chunks. +/// +/// Note that boundaries are deterministically calculated exclusively based on the data +/// itself, so the same data always produces the same chunks given the same configuration. +/// +/// Ported from the C++ implementation in apache/arrow#45360 +/// (`cpp/src/parquet/chunker_internal.cc`). +/// +/// [FastCDC]: https://www.usenix.org/conference/atc16/technical-sessions/presentation/xia +/// [central-limit-theorem normalization]: https://www.cidrdb.org/cidr2023/papers/p43-low.pdf +#[derive(Debug)] +pub(crate) struct ContentDefinedChunker { + /// Maximum definition level for this column. + max_def_level: i16, + /// Maximum repetition level for this column. + max_rep_level: i16, + /// Definition level at the nearest REPEATED ancestor. + repeated_ancestor_def_level: i16, + + /// Minimum chunk size in bytes. + /// The rolling hash will not be updated until this size is reached for each chunk. + /// All data sent through the hash function counts towards the chunk size, including + /// definition and repetition levels if present. + min_chunk_size: i64, + /// Maximum chunk size in bytes. + /// A new chunk is created whenever the chunk size exceeds this value. The chunk size + /// distribution approximates a normal distribution between `min_chunk_size` and + /// `max_chunk_size`. Note that the parquet writer has a related `data_pagesize` + /// property that controls the maximum size of a parquet data page after encoding. + /// While setting `data_pagesize` smaller than `max_chunk_size` doesn't affect + /// chunking effectiveness, it results in more small parquet data pages. + max_chunk_size: i64, + /// Mask for matching against the rolling hash. + rolling_hash_mask: u64, + + /// Rolling hash state, never reset — initialized once for the entire column. + rolling_hash: u64, + /// Whether the rolling hash has matched the mask since the last chunk boundary. + has_matched: bool, + /// Current run count for the central-limit-theorem normalization. + nth_run: usize, + /// Current chunk size in bytes. + chunk_size: i64, +} + +impl ContentDefinedChunker { + pub fn new(desc: &ColumnDescriptor, options: &CdcOptions) -> Result { + let rolling_hash_mask = Self::calculate_mask( + options.min_chunk_size as i64, + options.max_chunk_size as i64, + options.norm_level, + )?; + Ok(Self { + max_def_level: desc.max_def_level(), + max_rep_level: desc.max_rep_level(), + repeated_ancestor_def_level: desc.repeated_ancestor_def_level(), + min_chunk_size: options.min_chunk_size as i64, + max_chunk_size: options.max_chunk_size as i64, + rolling_hash_mask, + rolling_hash: 0, + has_matched: false, + nth_run: 0, + chunk_size: 0, + }) + } + + /// Calculate the mask used to determine chunk boundaries from the rolling hash. + /// + /// The mask is calculated so that the expected chunk size distribution approximates + /// a normal distribution between min and max chunk sizes. + fn calculate_mask(min_chunk_size: i64, max_chunk_size: i64, norm_level: i32) -> Result { + if min_chunk_size < 0 { + return Err(ParquetError::General( + "min_chunk_size must be non-negative".to_string(), + )); + } + if max_chunk_size <= min_chunk_size { + return Err(ParquetError::General( + "max_chunk_size must be greater than min_chunk_size".to_string(), + )); + } + + let avg_chunk_size = (min_chunk_size + max_chunk_size) / 2; + // Target size after subtracting the min-size skip window and dividing by the + // number of hash tables (for central-limit-theorem normalization). + let target_size = (avg_chunk_size - min_chunk_size) / NUM_GEARHASH_TABLES as i64; + + // floor(log2(target_size)) — equivalent to C++ NumRequiredBits(target_size) - 1 + let mask_bits = if target_size > 0 { + 63 - target_size.leading_zeros() as i32 + } else { + 0 + }; + + let effective_bits = mask_bits - norm_level; + + if !(1..=63).contains(&effective_bits) { + return Err(ParquetError::General(format!( + "The number of bits in the CDC mask must be between 1 and 63, got {effective_bits}" + ))); + } + + // Create the mask by setting the top `effective_bits` bits. + Ok(u64::MAX << (64 - effective_bits)) + } + + /// Feed raw bytes into the rolling hash. + /// + /// The byte count always accumulates toward `chunk_size`, but the actual hash + /// update is skipped until `min_chunk_size` has been reached. This "skip window" + /// is the FastCDC optimization that prevents boundaries from appearing too early + /// in a chunk. + #[inline] + fn roll(&mut self, bytes: &[u8]) { + self.chunk_size += bytes.len() as i64; + if self.chunk_size < self.min_chunk_size { + return; + } + for &b in bytes { + self.rolling_hash = self + .rolling_hash + .wrapping_shl(1) + .wrapping_add(GEARHASH_TABLE[self.nth_run][b as usize]); + self.has_matched = + self.has_matched || ((self.rolling_hash & self.rolling_hash_mask) == 0); + } + } + + /// Feed exactly `N` bytes into the rolling hash (compile-time width). + /// + /// Like [`roll`](Self::roll), but the byte count is known at compile time, + /// allowing the compiler to unroll the inner loop. + #[inline(always)] + fn roll_fixed(&mut self, bytes: &[u8; N]) { + self.chunk_size += N as i64; + if self.chunk_size < self.min_chunk_size { + return; + } + for j in 0..N { + self.rolling_hash = self + .rolling_hash + .wrapping_shl(1) + .wrapping_add(GEARHASH_TABLE[self.nth_run][bytes[j] as usize]); + self.has_matched = + self.has_matched || ((self.rolling_hash & self.rolling_hash_mask) == 0); + } + } + + /// Feed a definition or repetition level (i16) into the rolling hash. + #[inline] + fn roll_level(&mut self, level: i16) { + self.roll_fixed(&level.to_le_bytes()); + } + + /// Check whether a new chunk boundary should be created. + /// + /// A boundary is created when **either** of two conditions holds: + /// + /// 1. **CLT normalization**: The rolling hash has matched the mask (`has_matched`) + /// *and* this is the 8th consecutive such match (`nth_run` reaches + /// `NUM_GEARHASH_TABLES`). Each match advances to the next gear hash table, so + /// 8 independent matches are required. A single hash table would yield + /// exponentially distributed chunk sizes; requiring 8 independent matches + /// approximates a normal (Gaussian) distribution by the central limit theorem. + /// + /// 2. **Hard size limit**: `chunk_size` has reached `max_chunk_size`. This caps + /// chunk size even if the CLT normalization sequence has not completed. + /// + /// Note: when `max_chunk_size` forces a boundary, `nth_run` is **not** reset, so + /// the CLT sequence continues from where it left off in the next chunk. This + /// matches the C++ behavior. + #[inline] + fn need_new_chunk(&mut self) -> bool { + if self.has_matched { + self.has_matched = false; + self.nth_run += 1; + if self.nth_run >= NUM_GEARHASH_TABLES { + self.nth_run = 0; + self.chunk_size = 0; + return true; + } + } + if self.chunk_size >= self.max_chunk_size { + self.chunk_size = 0; + return true; + } + false + } + + /// Compute chunk boundaries for the given column data. + /// + /// The chunking state is maintained across the entire column without being + /// reset between pages and row groups. This enables the chunking process to + /// be continued between different write calls. + /// + /// We go over the (def_level, rep_level, value) triplets one by one while + /// adjusting the column-global rolling hash based on the triplet. Whenever + /// the rolling hash matches a predefined mask it sets `has_matched` to true. + /// + /// After each triplet [`need_new_chunk`](Self::need_new_chunk) is called to + /// evaluate if we need to create a new chunk. + fn calculate( + &mut self, + def_levels: Option<&[i16]>, + rep_levels: Option<&[i16]>, + num_levels: usize, + mut roll_value: F, + ) -> Vec + where + F: FnMut(&mut Self, usize), + { + let has_def_levels = self.max_def_level > 0; + let has_rep_levels = self.max_rep_level > 0; + + let mut chunks = Vec::new(); + let mut prev_offset: usize = 0; + let mut prev_value_offset: usize = 0; + // Total number of values seen; for non-nested data this equals num_levels. + let mut total_values: usize = num_levels; + + if !has_rep_levels && !has_def_levels { + // Fastest path: non-nested, non-null data. + for offset in 0..num_levels { + roll_value(self, offset); + if self.need_new_chunk() { + chunks.push(CdcChunk { + level_offset: prev_offset, + value_offset: prev_offset, + num_levels: offset - prev_offset, + num_values: offset - prev_offset, + }); + prev_offset = offset; + } + } + // Set the previous value offset to add the last chunk. + prev_value_offset = prev_offset; + } else if !has_rep_levels { + // Non-nested data with nulls. + let def_levels = def_levels.expect("def_levels required when max_def_level > 0"); + #[allow(clippy::needless_range_loop)] + for offset in 0..num_levels { + let def_level = def_levels[offset]; + self.roll_level(def_level); + if def_level == self.max_def_level { + roll_value(self, offset); + } + if self.need_new_chunk() { + chunks.push(CdcChunk { + level_offset: prev_offset, + value_offset: prev_offset, + num_levels: offset - prev_offset, + num_values: offset - prev_offset, + }); + prev_offset = offset; + } + } + // Set the previous value offset to add the last chunk. + prev_value_offset = prev_offset; + } else { + // Nested data with nulls. + let def_levels = def_levels.expect("def_levels required for nested data"); + let rep_levels = rep_levels.expect("rep_levels required for nested data"); + let mut value_offset: usize = 0; + + for offset in 0..num_levels { + let def_level = def_levels[offset]; + let rep_level = rep_levels[offset]; + + self.roll_level(def_level); + self.roll_level(rep_level); + if def_level == self.max_def_level { + roll_value(self, value_offset); + } + + if rep_level == 0 && self.need_new_chunk() { + // If we are at a record boundary and need a new chunk, create one. + let levels_to_write = offset - prev_offset; + if levels_to_write > 0 { + chunks.push(CdcChunk { + level_offset: prev_offset, + value_offset: prev_value_offset, + num_levels: levels_to_write, + num_values: value_offset - prev_value_offset, + }); + prev_offset = offset; + prev_value_offset = value_offset; + } + } + if def_level >= self.repeated_ancestor_def_level { + // We only increment the value offset if we have a leaf value. + value_offset += 1; + } + } + total_values = value_offset; + } + + // Add the last chunk if we have any levels left. + if prev_offset < num_levels { + chunks.push(CdcChunk { + level_offset: prev_offset, + value_offset: prev_value_offset, + num_levels: num_levels - prev_offset, + num_values: total_values - prev_value_offset, + }); + } + + #[cfg(debug_assertions)] + self.validate_chunks(&chunks, num_levels, total_values); + + chunks + } + + /// Compute CDC chunk boundaries by dispatching on the Arrow array's data type + /// to feed value bytes into the rolling hash. + #[cfg(feature = "arrow")] + pub(crate) fn get_arrow_chunks( + &mut self, + def_levels: Option<&[i16]>, + rep_levels: Option<&[i16]>, + array: &dyn arrow_array::Array, + ) -> Result> { + use arrow_array::cast::AsArray; + use arrow_schema::DataType; + + let num_levels = match def_levels { + Some(def_levels) => def_levels.len(), + None => array.len(), + }; + + macro_rules! fixed_width { + ($N:literal) => {{ + let data = array.to_data(); + let buffer = data.buffers()[0].as_slice(); + let values = &buffer[data.offset() * $N..]; + self.calculate(def_levels, rep_levels, num_levels, |c, i| { + let offset = i * $N; + let slice = &values[offset..offset + $N]; + c.roll_fixed::<$N>(slice.try_into().unwrap()); + }) + }}; + } + + macro_rules! binary_like { + ($a:expr) => {{ + let a = $a; + self.calculate(def_levels, rep_levels, num_levels, |c, i| { + c.roll(a.value(i).as_ref()); + }) + }}; + } + + let dtype = array.data_type(); + let chunks = match dtype { + DataType::Null => self.calculate(def_levels, rep_levels, num_levels, |_, _| {}), + DataType::Boolean => { + let a = array.as_boolean(); + self.calculate(def_levels, rep_levels, num_levels, |c, i| { + c.roll_fixed(&[a.value(i) as u8]); + }) + } + DataType::Int8 | DataType::UInt8 => fixed_width!(1), + DataType::Int16 | DataType::UInt16 | DataType::Float16 => fixed_width!(2), + DataType::Int32 + | DataType::UInt32 + | DataType::Float32 + | DataType::Date32 + | DataType::Time32(_) + | DataType::Interval(arrow_schema::IntervalUnit::YearMonth) + | DataType::Decimal32(_, _) => fixed_width!(4), + DataType::Int64 + | DataType::UInt64 + | DataType::Float64 + | DataType::Date64 + | DataType::Time64(_) + | DataType::Timestamp(_, _) + | DataType::Duration(_) + | DataType::Interval(arrow_schema::IntervalUnit::DayTime) + | DataType::Decimal64(_, _) => fixed_width!(8), + DataType::Interval(arrow_schema::IntervalUnit::MonthDayNano) + | DataType::Decimal128(_, _) => fixed_width!(16), + DataType::Decimal256(_, _) => fixed_width!(32), + DataType::FixedSizeBinary(_) => binary_like!(array.as_fixed_size_binary()), + DataType::Binary => binary_like!(array.as_binary::()), + DataType::LargeBinary => binary_like!(array.as_binary::()), + DataType::Utf8 => binary_like!(array.as_string::()), + DataType::LargeUtf8 => binary_like!(array.as_string::()), + DataType::BinaryView => binary_like!(array.as_binary_view()), + DataType::Utf8View => binary_like!(array.as_string_view()), + DataType::Dictionary(_, _) => { + let dict = array.as_any_dictionary(); + self.get_arrow_chunks(def_levels, rep_levels, dict.keys())? + } + _ => { + return Err(ParquetError::General(format!( + "content-defined chunking is not supported for data type {dtype:?}", + ))); + } + }; + Ok(chunks) + } + + #[cfg(debug_assertions)] + fn validate_chunks(&self, chunks: &[CdcChunk], num_levels: usize, total_values: usize) { + assert!(!chunks.is_empty(), "chunks must be non-empty"); + + let first = &chunks[0]; + assert_eq!(first.level_offset, 0, "first chunk must start at level 0"); + assert_eq!(first.value_offset, 0, "first chunk must start at value 0"); + + let mut sum_levels = first.num_levels; + let mut sum_values = first.num_values; + for i in 1..chunks.len() { + let chunk = &chunks[i]; + let prev = &chunks[i - 1]; + assert!(chunk.num_levels > 0, "chunk must have levels"); + assert_eq!( + chunk.level_offset, + prev.level_offset + prev.num_levels, + "level offsets must be contiguous" + ); + assert_eq!( + chunk.value_offset, + prev.value_offset + prev.num_values, + "value offsets must be contiguous" + ); + sum_levels += chunk.num_levels; + sum_values += chunk.num_values; + } + assert_eq!(sum_levels, num_levels, "chunks must cover all levels"); + assert_eq!(sum_values, total_values, "chunks must cover all values"); + + let last = chunks.last().unwrap(); + assert_eq!( + last.level_offset + last.num_levels, + num_levels, + "last chunk must end at num_levels" + ); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::basic::Type as PhysicalType; + use crate::schema::types::{ColumnPath, Type}; + use std::sync::Arc; + + fn make_desc(max_def_level: i16, max_rep_level: i16) -> ColumnDescriptor { + let tp = Type::primitive_type_builder("col", PhysicalType::INT32) + .build() + .unwrap(); + ColumnDescriptor::new( + Arc::new(tp), + max_def_level, + max_rep_level, + ColumnPath::new(vec![]), + ) + } + + #[test] + fn test_calculate_mask_defaults() { + let mask = ContentDefinedChunker::calculate_mask(256 * 1024, 1024 * 1024, 0).unwrap(); + // avg = 640 KiB, target = (640-256)*1024/8 = 49152, log2(49152) = 15 + // mask = u64::MAX << (64 - 15) = top 15 bits set + let expected = u64::MAX << (64 - 15); + assert_eq!(mask, expected); + } + + #[test] + fn test_calculate_mask_with_norm_level() { + let mask = ContentDefinedChunker::calculate_mask(256 * 1024, 1024 * 1024, 1).unwrap(); + let expected = u64::MAX << (64 - 14); + assert_eq!(mask, expected); + } + + #[test] + fn test_calculate_mask_invalid() { + assert!(ContentDefinedChunker::calculate_mask(-1, 100, 0).is_err()); + assert!(ContentDefinedChunker::calculate_mask(100, 50, 0).is_err()); + assert!(ContentDefinedChunker::calculate_mask(100, 100, 0).is_err()); + } + + #[test] + fn test_non_nested_non_null_single_chunk() { + let options = CdcOptions { + min_chunk_size: 8, + max_chunk_size: 1024, + norm_level: 0, + }; + let mut chunker = ContentDefinedChunker::new(&make_desc(0, 0), &options).unwrap(); + + // Write a small amount of data — should produce exactly 1 chunk. + let num_values = 4; + let chunks = chunker.calculate(None, None, num_values, |c, i| { + c.roll_fixed::<4>(&(i as i32).to_le_bytes()); + }); + assert_eq!(chunks.len(), 1); + assert_eq!(chunks[0].level_offset, 0); + assert_eq!(chunks[0].value_offset, 0); + assert_eq!(chunks[0].num_levels, 4); + } + + #[test] + fn test_max_chunk_size_forces_boundary() { + let options = CdcOptions { + min_chunk_size: 256, + max_chunk_size: 1024, + norm_level: 0, + }; + let mut chunker = ContentDefinedChunker::new(&make_desc(0, 0), &options).unwrap(); + + // Write enough data to exceed max_chunk_size multiple times. + // Each i32 = 4 bytes, max_chunk_size=1024, so ~256 values per chunk max. + let num_values = 2000; + let chunks = chunker.calculate(None, None, num_values, |c, i| { + c.roll_fixed::<4>(&(i as i32).to_le_bytes()); + }); + + // Should have multiple chunks + assert!(chunks.len() > 1); + + // Verify contiguity + let mut total_levels = 0; + for (i, chunk) in chunks.iter().enumerate() { + assert_eq!(chunk.level_offset, total_levels); + if i < chunks.len() - 1 { + assert!(chunk.num_levels > 0); + } + total_levels += chunk.num_levels; + } + assert_eq!(total_levels, num_values); + } + + #[test] + fn test_deterministic_chunks() { + let options = CdcOptions { + min_chunk_size: 4, + max_chunk_size: 64, + norm_level: 0, + }; + + let roll = |c: &mut ContentDefinedChunker, i: usize| { + c.roll_fixed::<8>(&(i as i64).to_le_bytes()); + }; + + let mut chunker1 = ContentDefinedChunker::new(&make_desc(0, 0), &options).unwrap(); + let chunks1 = chunker1.calculate(None, None, 200, roll); + + let mut chunker2 = ContentDefinedChunker::new(&make_desc(0, 0), &options).unwrap(); + let chunks2 = chunker2.calculate(None, None, 200, roll); + + assert_eq!(chunks1.len(), chunks2.len()); + for (a, b) in chunks1.iter().zip(chunks2.iter()) { + assert_eq!(a.level_offset, b.level_offset); + assert_eq!(a.value_offset, b.value_offset); + assert_eq!(a.num_levels, b.num_levels); + } + } + + #[test] + fn test_nullable_non_nested() { + let options = CdcOptions { + min_chunk_size: 4, + max_chunk_size: 64, + norm_level: 0, + }; + let mut chunker = ContentDefinedChunker::new(&make_desc(1, 0), &options).unwrap(); + + let num_levels = 20; + // def_level=1 means non-null, def_level=0 means null + let def_levels: Vec = (0..num_levels) + .map(|i| if i % 3 == 0 { 0 } else { 1 }) + .collect(); + + let chunks = chunker.calculate(Some(&def_levels), None, num_levels, |c, i| { + c.roll_fixed::<4>(&(i as i32).to_le_bytes()); + }); + + assert!(!chunks.is_empty()); + let total: usize = chunks.iter().map(|c| c.num_levels).sum(); + assert_eq!(total, num_levels); + } +} + +/// Integration tests that exercise CDC through the Arrow writer/reader roundtrip. +/// Ported from the C++ test suite in `chunker_internal_test.cc`. +#[cfg(all(test, feature = "arrow"))] +mod arrow_tests { + use std::borrow::Borrow; + use std::sync::Arc; + + use arrow_array::cast::AsArray; + use arrow_array::{Array, ArrayRef, BooleanArray, Int32Array, RecordBatch}; + use arrow_schema::{DataType, Field, Schema}; + + use crate::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; + use crate::arrow::arrow_writer::ArrowWriter; + use crate::file::properties::{CdcOptions, WriterProperties}; + use crate::file::reader::{FileReader, SerializedFileReader}; + + // --- Constants matching C++ TestCDCSingleRowGroup --- + + const CDC_MIN_CHUNK_SIZE: usize = 4 * 1024; + const CDC_MAX_CHUNK_SIZE: usize = 16 * 1024; + const CDC_PART_SIZE: usize = 128 * 1024; + const CDC_EDIT_SIZE: usize = 128; + const CDC_ROW_GROUP_LENGTH: usize = 1024 * 1024; + + // --- Helpers --- + + /// Deterministic hash function matching the C++ test generator. + fn test_hash(seed: u64, index: u64) -> u64 { + let mut h = (index.wrapping_add(seed)).wrapping_mul(0xc4ceb9fe1a85ec53u64); + h ^= h >> 33; + h = h.wrapping_mul(0xff51afd7ed558ccdu64); + h ^= h >> 33; + h = h.wrapping_mul(0xc4ceb9fe1a85ec53u64); + h ^= h >> 33; + h + } + + /// Generate a deterministic array for any supported data type, matching C++ `GenerateArray`. + fn generate_array(dtype: &DataType, nullable: bool, length: usize, seed: u64) -> ArrayRef { + macro_rules! gen_primitive { + ($array_type:ty, $cast:expr) => {{ + if nullable { + let arr: $array_type = (0..length) + .map(|i| { + let val = test_hash(seed, i as u64); + if val % 10 == 0 { + None + } else { + Some($cast(val)) + } + }) + .collect(); + Arc::new(arr) as ArrayRef + } else { + let arr: $array_type = (0..length) + .map(|i| Some($cast(test_hash(seed, i as u64)))) + .collect(); + Arc::new(arr) as ArrayRef + } + }}; + } + + match dtype { + DataType::Boolean => { + if nullable { + let arr: BooleanArray = (0..length) + .map(|i| { + let val = test_hash(seed, i as u64); + if val % 10 == 0 { + None + } else { + Some(val % 2 == 0) + } + }) + .collect(); + Arc::new(arr) + } else { + let arr: BooleanArray = (0..length) + .map(|i| Some(test_hash(seed, i as u64) % 2 == 0)) + .collect(); + Arc::new(arr) + } + } + DataType::Int32 => gen_primitive!(Int32Array, |v: u64| v as i32), + DataType::Int64 => { + gen_primitive!(arrow_array::Int64Array, |v: u64| v as i64) + } + DataType::Float64 => { + gen_primitive!(arrow_array::Float64Array, |v: u64| (v % 100000) as f64 + / 1000.0) + } + DataType::Utf8 => { + let arr: arrow_array::StringArray = if nullable { + (0..length) + .map(|i| { + let val = test_hash(seed, i as u64); + if val % 10 == 0 { + None + } else { + Some(format!("str_{val}")) + } + }) + .collect() + } else { + (0..length) + .map(|i| Some(format!("str_{}", test_hash(seed, i as u64)))) + .collect() + }; + Arc::new(arr) + } + DataType::Binary => { + let arr: arrow_array::BinaryArray = if nullable { + (0..length) + .map(|i| { + let val = test_hash(seed, i as u64); + if val % 10 == 0 { + None + } else { + Some(format!("bin_{val}").into_bytes()) + } + }) + .collect() + } else { + (0..length) + .map(|i| Some(format!("bin_{}", test_hash(seed, i as u64)).into_bytes())) + .collect() + }; + Arc::new(arr) + } + DataType::FixedSizeBinary(size) => { + let size = *size; + let mut builder = arrow_array::builder::FixedSizeBinaryBuilder::new(size); + for i in 0..length { + let val = test_hash(seed, i as u64); + if nullable && val % 10 == 0 { + builder.append_null(); + } else { + let s = format!("bin_{val}"); + let bytes = s.as_bytes(); + let mut buf = vec![0u8; size as usize]; + let copy_len = bytes.len().min(size as usize); + buf[..copy_len].copy_from_slice(&bytes[..copy_len]); + builder.append_value(&buf).unwrap(); + } + } + Arc::new(builder.finish()) + } + DataType::Date32 => { + gen_primitive!(arrow_array::Date32Array, |v: u64| v as i32) + } + DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, _) => { + gen_primitive!(arrow_array::TimestampNanosecondArray, |v: u64| v as i64) + } + _ => panic!("Unsupported test data type: {dtype:?}"), + } + } + + /// Generate a RecordBatch with the given schema, matching C++ `GenerateTable`. + fn generate_table(schema: &Arc, length: usize, seed: u64) -> RecordBatch { + let arrays: Vec = schema + .fields() + .iter() + .enumerate() + .map(|(i, field)| { + generate_array( + field.data_type(), + field.is_nullable(), + length, + seed + i as u64 * 10, + ) + }) + .collect(); + RecordBatch::try_new(schema.clone(), arrays).unwrap() + } + + /// Compute the CDC byte width for a data type, matching C++ `bytes_per_record`. + /// Returns 0 for variable-length types. + fn cdc_byte_width(dtype: &DataType) -> usize { + match dtype { + DataType::Boolean => 1, + DataType::Int8 | DataType::UInt8 => 1, + DataType::Int16 | DataType::UInt16 | DataType::Float16 => 2, + DataType::Int32 + | DataType::UInt32 + | DataType::Float32 + | DataType::Date32 + | DataType::Time32(_) => 4, + DataType::Int64 + | DataType::UInt64 + | DataType::Float64 + | DataType::Date64 + | DataType::Time64(_) + | DataType::Timestamp(_, _) + | DataType::Duration(_) => 8, + DataType::Decimal128(_, _) => 16, + DataType::Decimal256(_, _) => 32, + DataType::FixedSizeBinary(n) => *n as usize, + _ => 0, // variable-length + } + } + + /// Compute bytes_per_record for determining part/edit lengths, matching C++. + fn bytes_per_record(dtype: &DataType, nullable: bool) -> usize { + let bw = cdc_byte_width(dtype); + if bw > 0 { + if nullable { bw + 2 } else { bw } + } else { + 16 // variable-length fallback, matching C++ + } + } + + /// Compute the CDC chunk size for an array slice, matching C++ `CalculateCdcSize`. + fn calculate_cdc_size(array: &dyn Array, nullable: bool) -> i64 { + let dtype = array.data_type(); + let bw = cdc_byte_width(dtype); + let result = if bw > 0 { + // Fixed-width: count only non-null values + let valid_count = array.len() - array.null_count(); + (valid_count * bw) as i64 + } else { + // Variable-length: sum of actual byte lengths + match dtype { + DataType::Utf8 => { + let a = array.as_string::(); + (0..a.len()) + .filter(|&i| a.is_valid(i)) + .map(|i| a.value(i).len() as i64) + .sum() + } + DataType::Binary => { + let a = array.as_binary::(); + (0..a.len()) + .filter(|&i| a.is_valid(i)) + .map(|i| a.value(i).len() as i64) + .sum() + } + DataType::LargeBinary => { + let a = array.as_binary::(); + (0..a.len()) + .filter(|&i| a.is_valid(i)) + .map(|i| a.value(i).len() as i64) + .sum() + } + _ => panic!("CDC size calculation not implemented for {dtype:?}"), + } + }; + + if nullable { + // Add 2 bytes per element for definition levels + result + array.len() as i64 * 2 + } else { + result + } + } + + /// Page-level metadata for a single column within a row group. + struct ColumnInfo { + page_lengths: Vec, + has_dictionary_page: bool, + } + + /// Extract per-row-group column info from Parquet data. + fn get_column_info(data: &[u8], column_index: usize) -> Vec { + let reader = SerializedFileReader::new(bytes::Bytes::from(data.to_vec())).unwrap(); + let metadata = reader.metadata(); + let mut result = Vec::new(); + for rg in 0..metadata.num_row_groups() { + let rg_reader = reader.get_row_group(rg).unwrap(); + let col_reader = rg_reader.get_column_page_reader(column_index).unwrap(); + let mut info = ColumnInfo { + page_lengths: Vec::new(), + has_dictionary_page: false, + }; + for page in col_reader { + let page = page.unwrap(); + match page.page_type() { + crate::basic::PageType::DATA_PAGE | crate::basic::PageType::DATA_PAGE_V2 => { + info.page_lengths.push(page.num_values() as i64); + } + crate::basic::PageType::DICTIONARY_PAGE => { + info.has_dictionary_page = true; + } + _ => {} + } + } + result.push(info); + } + result + } + + /// Assert that CDC chunk sizes are within the expected range. + /// Equivalent to C++ `AssertContentDefinedChunkSizes`. + fn assert_cdc_chunk_sizes( + array: &ArrayRef, + info: &ColumnInfo, + nullable: bool, + min_chunk_size: usize, + max_chunk_size: usize, + expect_dictionary_page: bool, + ) { + // Boolean and FixedSizeBinary never produce dictionary pages (matching C++) + let expect_dict = match array.data_type() { + DataType::Boolean | DataType::FixedSizeBinary(_) => false, + _ => expect_dictionary_page, + }; + assert_eq!( + info.has_dictionary_page, + expect_dict, + "dictionary page mismatch for {:?}", + array.data_type() + ); + + let page_lengths = &info.page_lengths; + assert!( + page_lengths.len() > 1, + "CDC should produce multiple pages, got {page_lengths:?}" + ); + + let bw = cdc_byte_width(array.data_type()); + // Only do exact CDC size validation for fixed-width and base binary-like types + if bw > 0 + || matches!( + array.data_type(), + DataType::Utf8 | DataType::Binary | DataType::LargeBinary + ) + { + let mut offset = 0i64; + for (i, &page_len) in page_lengths.iter().enumerate() { + let slice = array.slice(offset as usize, page_len as usize); + let cdc_size = calculate_cdc_size(slice.as_ref(), nullable); + if i < page_lengths.len() - 1 { + assert!( + cdc_size >= min_chunk_size as i64, + "Page {i}: CDC size {cdc_size} < min {min_chunk_size}, pages={page_lengths:?}" + ); + } + assert!( + cdc_size <= max_chunk_size as i64, + "Page {i}: CDC size {cdc_size} > max {max_chunk_size}, pages={page_lengths:?}" + ); + offset += page_len; + } + assert_eq!( + offset, + array.len() as i64, + "page lengths must sum to array length" + ); + } + } + + /// Write batches with CDC options and validate roundtrip. + /// Matches C++ `WriteTableToBuffer`. + fn write_with_cdc_options( + batches: &[&RecordBatch], + min_chunk_size: usize, + max_chunk_size: usize, + max_row_group_rows: Option, + enable_dictionary: bool, + ) -> Vec { + assert!(!batches.is_empty()); + let schema = batches[0].schema(); + let mut builder = WriterProperties::builder() + .set_dictionary_enabled(enable_dictionary) + .set_content_defined_chunking(Some(CdcOptions { + min_chunk_size, + max_chunk_size, + norm_level: 0, + })); + if let Some(max_rows) = max_row_group_rows { + builder = builder.set_max_row_group_row_count(Some(max_rows)); + } + let props = builder.build(); + let mut buf = Vec::new(); + let mut writer = ArrowWriter::try_new(&mut buf, schema.clone(), Some(props)).unwrap(); + for batch in batches { + writer.write(batch).unwrap(); + } + writer.close().unwrap(); + + // Roundtrip validation (matching C++ WriteTableToBuffer) + let readback = read_batches(&buf); + let original_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + let readback_rows: usize = readback.iter().map(|b| b.num_rows()).sum(); + assert_eq!(original_rows, readback_rows, "Roundtrip row count mismatch"); + if original_rows > 0 { + let original = concat_batches(batches.iter().copied()); + let roundtrip = concat_batches(&readback); + assert_eq!(original, roundtrip, "Roundtrip validation failed"); + } + + buf + } + + fn read_batches(data: &[u8]) -> Vec { + let reader = ParquetRecordBatchReaderBuilder::try_new(bytes::Bytes::from(data.to_vec())) + .unwrap() + .build() + .unwrap(); + reader.collect::, _>>().unwrap() + } + + fn concat_batches(batches: impl IntoIterator>) -> RecordBatch { + let batches: Vec<_> = batches.into_iter().collect(); + let schema = batches[0].borrow().schema(); + let batches = batches.iter().map(|b| b.borrow()); + arrow_select::concat::concat_batches(&schema, batches).unwrap() + } + + /// LCS-based diff between two sequences of page lengths (ported from C++). + /// Includes the merge-adjacent-diffs post-processing from C++. + fn find_differences(first: &[i64], second: &[i64]) -> Vec<(Vec, Vec)> { + let n = first.len(); + let m = second.len(); + let mut dp = vec![vec![0usize; m + 1]; n + 1]; + for i in 0..n { + for j in 0..m { + if first[i] == second[j] { + dp[i + 1][j + 1] = dp[i][j] + 1; + } else { + dp[i + 1][j + 1] = dp[i + 1][j].max(dp[i][j + 1]); + } + } + } + let mut common = Vec::new(); + let (mut i, mut j) = (n, m); + while i > 0 && j > 0 { + if first[i - 1] == second[j - 1] { + common.push((i - 1, j - 1)); + i -= 1; + j -= 1; + } else if dp[i - 1][j] >= dp[i][j - 1] { + i -= 1; + } else { + j -= 1; + } + } + common.reverse(); + + let mut result = Vec::new(); + let (mut last_i, mut last_j) = (0usize, 0usize); + for (ci, cj) in &common { + if *ci > last_i || *cj > last_j { + result.push((first[last_i..*ci].to_vec(), second[last_j..*cj].to_vec())); + } + last_i = ci + 1; + last_j = cj + 1; + } + if last_i < n || last_j < m { + result.push((first[last_i..].to_vec(), second[last_j..].to_vec())); + } + + // Merge adjacent diffs (matching C++ post-processing) + let mut merged: Vec<(Vec, Vec)> = Vec::new(); + for diff in result { + if let Some(prev) = merged.last_mut() { + if prev.0.is_empty() && diff.1.is_empty() { + prev.0 = diff.0; + continue; + } else if prev.1.is_empty() && diff.0.is_empty() { + prev.1 = diff.1; + continue; + } + } + merged.push(diff); + } + merged + } + + /// Assert exact page length differences between original and modified files. + /// Matches C++ `AssertPageLengthDifferences` (full version). + fn assert_page_length_differences( + original: &ColumnInfo, + modified: &ColumnInfo, + exact_equal_diffs: usize, + exact_larger_diffs: usize, + exact_smaller_diffs: usize, + edit_length: i64, + ) { + let diffs = find_differences(&original.page_lengths, &modified.page_lengths); + let expected = exact_equal_diffs + exact_larger_diffs + exact_smaller_diffs; + + if diffs.len() != expected { + eprintln!("Original: {:?}", original.page_lengths); + eprintln!("Modified: {:?}", modified.page_lengths); + for d in &diffs { + eprintln!(" Diff: {:?} vs {:?}", d.0, d.1); + } + } + assert_eq!( + diffs.len(), + expected, + "Expected {expected} diffs, got {}", + diffs.len() + ); + + let (mut eq, mut larger, mut smaller) = (0usize, 0usize, 0usize); + for (left, right) in &diffs { + let left_sum: i64 = left.iter().sum(); + let right_sum: i64 = right.iter().sum(); + if left_sum == right_sum { + eq += 1; + } else if left_sum < right_sum { + larger += 1; + assert_eq!( + left_sum + edit_length, + right_sum, + "Larger diff mismatch: {left_sum} + {edit_length} != {right_sum}" + ); + } else { + smaller += 1; + assert_eq!( + left_sum, + right_sum + edit_length, + "Smaller diff mismatch: {left_sum} != {right_sum} + {edit_length}" + ); + } + } + + assert_eq!(eq, exact_equal_diffs, "equal diffs count"); + assert_eq!(larger, exact_larger_diffs, "larger diffs count"); + assert_eq!(smaller, exact_smaller_diffs, "smaller diffs count"); + } + + /// Assert page length differences for update cases (simplified version). + /// Matches C++ `AssertPageLengthDifferences` (max_equal_diffs overload). + fn assert_page_length_differences_update( + original: &ColumnInfo, + modified: &ColumnInfo, + max_equal_diffs: usize, + ) { + let diffs = find_differences(&original.page_lengths, &modified.page_lengths); + assert!( + diffs.len() <= max_equal_diffs, + "Expected at most {max_equal_diffs} diffs, got {}", + diffs.len() + ); + for (left, right) in &diffs { + let left_sum: i64 = left.iter().sum(); + let right_sum: i64 = right.iter().sum(); + assert_eq!( + left_sum, right_sum, + "Update diff should not change total row count" + ); + } + } + + // --- FindDifferences tests (ported from C++) --- + + #[test] + fn test_find_differences_basic() { + let diffs = find_differences(&[1, 2, 3, 4, 5], &[1, 7, 8, 4, 5]); + assert_eq!(diffs.len(), 1); + assert_eq!(diffs[0].0, vec![2, 3]); + assert_eq!(diffs[0].1, vec![7, 8]); + } + + #[test] + fn test_find_differences_multiple() { + let diffs = find_differences(&[1, 2, 3, 4, 5, 6, 7], &[1, 8, 9, 4, 10, 6, 11]); + assert_eq!(diffs.len(), 3); + assert_eq!(diffs[0].0, vec![2, 3]); + assert_eq!(diffs[0].1, vec![8, 9]); + assert_eq!(diffs[1].0, vec![5]); + assert_eq!(diffs[1].1, vec![10]); + assert_eq!(diffs[2].0, vec![7]); + assert_eq!(diffs[2].1, vec![11]); + } + + #[test] + fn test_find_differences_different_lengths() { + let diffs = find_differences(&[1, 2, 3], &[1, 2, 3, 4, 5]); + assert_eq!(diffs.len(), 1); + assert!(diffs[0].0.is_empty()); + assert_eq!(diffs[0].1, vec![4, 5]); + } + + #[test] + fn test_find_differences_empty() { + let diffs = find_differences(&[], &[]); + assert!(diffs.is_empty()); + } + + #[test] + fn test_find_differences_changes_at_both_ends() { + let diffs = find_differences(&[1, 2, 3, 4, 5, 6, 7, 8, 9], &[0, 0, 2, 3, 4, 5, 7, 7, 8]); + assert_eq!(diffs.len(), 3); + assert_eq!(diffs[0].0, vec![1]); + assert_eq!(diffs[0].1, vec![0, 0]); + assert_eq!(diffs[1].0, vec![6]); + assert_eq!(diffs[1].1, vec![7]); + assert_eq!(diffs[2].0, vec![9]); + assert!(diffs[2].1.is_empty()); + } + + #[test] + fn test_find_differences_additional() { + let diffs = find_differences( + &[445, 312, 393, 401, 410, 138, 558, 457], + &[445, 312, 393, 393, 410, 138, 558, 457], + ); + assert_eq!(diffs.len(), 1); + assert_eq!(diffs[0].0, vec![401]); + assert_eq!(diffs[0].1, vec![393]); + } + + // --- Parameterized single-row-group tests via macro --- + + macro_rules! cdc_single_rg_tests { + ($mod_name:ident, $dtype:expr, $nullable:expr) => { + mod $mod_name { + use super::*; + + fn config() -> (DataType, bool, usize, usize) { + let dtype: DataType = $dtype; + let nullable: bool = $nullable; + let bpr = bytes_per_record(&dtype, nullable); + let part_length = CDC_PART_SIZE / bpr; + let edit_length = CDC_EDIT_SIZE / bpr; + (dtype, nullable, part_length, edit_length) + } + + fn make_schema(dtype: &DataType, nullable: bool) -> Arc { + Arc::new(Schema::new(vec![Field::new("f0", dtype.clone(), nullable)])) + } + + #[test] + fn delete_once() { + let (dtype, nullable, part_length, edit_length) = config(); + let schema = make_schema(&dtype, nullable); + + let part1 = generate_table(&schema, part_length, 0); + let part2 = generate_table(&schema, edit_length, 1); + let part3 = generate_table(&schema, part_length, part_length as u64); + + let base = concat_batches([&part1, &part2, &part3]); + let modified = concat_batches([&part1, &part3]); + + for enable_dictionary in [false, true] { + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + + let base_info = get_column_info(&base_data, 0); + let mod_info = get_column_info(&mod_data, 0); + assert_eq!(base_info.len(), 1); + assert_eq!(mod_info.len(), 1); + + assert_cdc_chunk_sizes( + &base.column(0).clone(), + &base_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + assert_cdc_chunk_sizes( + &modified.column(0).clone(), + &mod_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + + assert_page_length_differences( + &base_info[0], + &mod_info[0], + 0, + 0, + 1, + edit_length as i64, + ); + } + } + + #[test] + fn delete_twice() { + let (dtype, nullable, part_length, edit_length) = config(); + let schema = make_schema(&dtype, nullable); + + let part1 = generate_table(&schema, part_length, 0); + let part2 = generate_table(&schema, edit_length, 1); + let part3 = generate_table(&schema, part_length, part_length as u64); + let part4 = generate_table(&schema, edit_length, 2); + let part5 = generate_table(&schema, part_length, 2 * part_length as u64); + + let base = concat_batches([&part1, &part2, &part3, &part4, &part5]); + let modified = concat_batches([&part1, &part3, &part5]); + + for enable_dictionary in [false, true] { + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + + let base_info = get_column_info(&base_data, 0); + let mod_info = get_column_info(&mod_data, 0); + assert_eq!(base_info.len(), 1); + assert_eq!(mod_info.len(), 1); + + assert_cdc_chunk_sizes( + &base.column(0).clone(), + &base_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + assert_cdc_chunk_sizes( + &modified.column(0).clone(), + &mod_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + + assert_page_length_differences( + &base_info[0], + &mod_info[0], + 0, + 0, + 2, + edit_length as i64, + ); + } + } + + #[test] + fn insert_once() { + let (dtype, nullable, part_length, edit_length) = config(); + let schema = make_schema(&dtype, nullable); + + let part1 = generate_table(&schema, part_length, 0); + let part2 = generate_table(&schema, edit_length, 1); + let part3 = generate_table(&schema, part_length, part_length as u64); + + let base = concat_batches([&part1, &part3]); + let modified = concat_batches([&part1, &part2, &part3]); + + for enable_dictionary in [false, true] { + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + + let base_info = get_column_info(&base_data, 0); + let mod_info = get_column_info(&mod_data, 0); + assert_eq!(base_info.len(), 1); + assert_eq!(mod_info.len(), 1); + + assert_cdc_chunk_sizes( + &base.column(0).clone(), + &base_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + assert_cdc_chunk_sizes( + &modified.column(0).clone(), + &mod_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + + assert_page_length_differences( + &base_info[0], + &mod_info[0], + 0, + 1, + 0, + edit_length as i64, + ); + } + } + + #[test] + fn insert_twice() { + let (dtype, nullable, part_length, edit_length) = config(); + let schema = make_schema(&dtype, nullable); + + let part1 = generate_table(&schema, part_length, 0); + let part2 = generate_table(&schema, edit_length, 1); + let part3 = generate_table(&schema, part_length, part_length as u64); + let part4 = generate_table(&schema, edit_length, 2); + let part5 = generate_table(&schema, part_length, 2 * part_length as u64); + + let base = concat_batches([&part1, &part3, &part5]); + let modified = concat_batches([&part1, &part2, &part3, &part4, &part5]); + + for enable_dictionary in [false, true] { + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + + let base_info = get_column_info(&base_data, 0); + let mod_info = get_column_info(&mod_data, 0); + assert_eq!(base_info.len(), 1); + assert_eq!(mod_info.len(), 1); + + assert_cdc_chunk_sizes( + &base.column(0).clone(), + &base_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + assert_cdc_chunk_sizes( + &modified.column(0).clone(), + &mod_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + + assert_page_length_differences( + &base_info[0], + &mod_info[0], + 0, + 2, + 0, + edit_length as i64, + ); + } + } + + #[test] + fn update_once() { + let (dtype, nullable, part_length, edit_length) = config(); + let schema = make_schema(&dtype, nullable); + + let part1 = generate_table(&schema, part_length, 0); + let part2 = generate_table(&schema, edit_length, 1); + let part3 = generate_table(&schema, part_length, part_length as u64); + let part4 = generate_table(&schema, edit_length, 2); + + let base = concat_batches([&part1, &part2, &part3]); + let modified = concat_batches([&part1, &part4, &part3]); + + for enable_dictionary in [false, true] { + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + + let base_info = get_column_info(&base_data, 0); + let mod_info = get_column_info(&mod_data, 0); + assert_eq!(base_info.len(), 1); + assert_eq!(mod_info.len(), 1); + + assert_cdc_chunk_sizes( + &base.column(0).clone(), + &base_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + assert_cdc_chunk_sizes( + &modified.column(0).clone(), + &mod_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + + assert_page_length_differences_update(&base_info[0], &mod_info[0], 1); + } + } + + #[test] + fn update_twice() { + let (dtype, nullable, part_length, edit_length) = config(); + let schema = make_schema(&dtype, nullable); + + let part1 = generate_table(&schema, part_length, 0); + let part2 = generate_table(&schema, edit_length, 1); + let part3 = generate_table(&schema, part_length, part_length as u64); + let part4 = generate_table(&schema, edit_length, 2); + let part5 = generate_table(&schema, part_length, 2 * part_length as u64); + let part6 = generate_table(&schema, edit_length, 3); + let part7 = generate_table(&schema, edit_length, 4); + + let base = concat_batches([&part1, &part2, &part3, &part4, &part5]); + let modified = concat_batches([&part1, &part6, &part3, &part7, &part5]); + + for enable_dictionary in [false, true] { + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + + let base_info = get_column_info(&base_data, 0); + let mod_info = get_column_info(&mod_data, 0); + assert_eq!(base_info.len(), 1); + assert_eq!(mod_info.len(), 1); + + assert_cdc_chunk_sizes( + &base.column(0).clone(), + &base_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + assert_cdc_chunk_sizes( + &modified.column(0).clone(), + &mod_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + + assert_page_length_differences_update(&base_info[0], &mod_info[0], 2); + } + } + + #[test] + fn prepend() { + let (dtype, nullable, part_length, edit_length) = config(); + let schema = make_schema(&dtype, nullable); + + let part1 = generate_table(&schema, part_length, 0); + let part2 = generate_table(&schema, edit_length, 1); + let part3 = generate_table(&schema, part_length, part_length as u64); + let part4 = generate_table(&schema, edit_length, 2); + + let base = concat_batches([&part1, &part2, &part3]); + let modified = concat_batches([&part4, &part1, &part2, &part3]); + + for enable_dictionary in [false, true] { + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + + let base_info = get_column_info(&base_data, 0); + let mod_info = get_column_info(&mod_data, 0); + assert_eq!(base_info.len(), 1); + assert_eq!(mod_info.len(), 1); + + assert_cdc_chunk_sizes( + &base.column(0).clone(), + &base_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + assert_cdc_chunk_sizes( + &modified.column(0).clone(), + &mod_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + + assert!( + mod_info[0].page_lengths.len() >= base_info[0].page_lengths.len(), + "Modified should have same or more pages" + ); + + assert_page_length_differences( + &base_info[0], + &mod_info[0], + 0, + 1, + 0, + edit_length as i64, + ); + } + } + + #[test] + fn append() { + let (dtype, nullable, part_length, edit_length) = config(); + let schema = make_schema(&dtype, nullable); + + let part1 = generate_table(&schema, part_length, 0); + let part2 = generate_table(&schema, edit_length, 1); + let part3 = generate_table(&schema, part_length, part_length as u64); + let part4 = generate_table(&schema, edit_length, 2); + + let base = concat_batches([&part1, &part2, &part3]); + let modified = concat_batches([&part1, &part2, &part3, &part4]); + + for enable_dictionary in [false, true] { + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + + let base_info = get_column_info(&base_data, 0); + let mod_info = get_column_info(&mod_data, 0); + assert_eq!(base_info.len(), 1); + assert_eq!(mod_info.len(), 1); + + assert_cdc_chunk_sizes( + &base.column(0).clone(), + &base_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + assert_cdc_chunk_sizes( + &modified.column(0).clone(), + &mod_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + enable_dictionary, + ); + + let bp = &base_info[0].page_lengths; + let mp = &mod_info[0].page_lengths; + assert!(mp.len() >= bp.len()); + for i in 0..bp.len() - 1 { + assert_eq!(bp[i], mp[i], "Page {i} should be identical"); + } + assert!(mp[bp.len() - 1] >= bp[bp.len() - 1]); + } + } + + #[test] + fn empty_table() { + let (dtype, nullable, _, _) = config(); + let schema = make_schema(&dtype, nullable); + + let empty = RecordBatch::new_empty(schema); + for enable_dictionary in [false, true] { + let data = write_with_cdc_options( + &[&empty], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + enable_dictionary, + ); + let info = get_column_info(&data, 0); + // Empty table: either no row groups or one with no data pages + if !info.is_empty() { + assert!(info[0].page_lengths.is_empty()); + } + } + } + + #[test] + fn array_offsets() { + let (dtype, nullable, part_length, edit_length) = config(); + let schema = make_schema(&dtype, nullable); + + let table = concat_batches([ + &generate_table(&schema, part_length, 0), + &generate_table(&schema, edit_length, 1), + &generate_table(&schema, part_length, part_length as u64), + ]); + + for offset in [0usize, 512, 1024] { + if offset >= table.num_rows() { + continue; + } + let sliced = table.slice(offset, table.num_rows() - offset); + let data = write_with_cdc_options( + &[&sliced], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(CDC_ROW_GROUP_LENGTH), + true, + ); + let info = get_column_info(&data, 0); + assert_eq!(info.len(), 1); + + // Verify CDC actually produced content-defined chunks + assert_cdc_chunk_sizes( + &sliced.column(0).clone(), + &info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + true, + ); + } + } + } + }; + } + + // Instantiate for representative types matching C++ categories + cdc_single_rg_tests!(cdc_bool_non_null, DataType::Boolean, false); + cdc_single_rg_tests!(cdc_i32_non_null, DataType::Int32, false); + cdc_single_rg_tests!(cdc_i64_nullable, DataType::Int64, true); + cdc_single_rg_tests!(cdc_f64_nullable, DataType::Float64, true); + cdc_single_rg_tests!(cdc_utf8_non_null, DataType::Utf8, false); + cdc_single_rg_tests!(cdc_binary_nullable, DataType::Binary, true); + cdc_single_rg_tests!(cdc_fsb16_nullable, DataType::FixedSizeBinary(16), true); + cdc_single_rg_tests!(cdc_date32_non_null, DataType::Date32, false); + cdc_single_rg_tests!( + cdc_timestamp_nullable, + DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None), + true + ); + + // --- Multiple row group tests matching C++ TestCDCMultipleRowGroups --- + + mod cdc_multiple_row_groups { + use super::*; + + const PART_LENGTH: usize = 128 * 1024; + const EDIT_LENGTH: usize = 128; + const ROW_GROUP_LENGTH: usize = 64 * 1024; + + fn schema() -> Arc { + Arc::new(Schema::new(vec![ + Field::new("int32", DataType::Int32, true), + Field::new("float64", DataType::Float64, true), + Field::new("bool", DataType::Boolean, false), + ])) + } + + #[test] + fn insert_once() { + let s = schema(); + let part1 = generate_table(&s, PART_LENGTH, 0); + let part2 = generate_table(&s, PART_LENGTH, 2); + let part3 = generate_table(&s, PART_LENGTH, 4); + let edit1 = generate_table(&s, EDIT_LENGTH, 1); + let edit2 = generate_table(&s, EDIT_LENGTH, 3); + + let base = concat_batches([&part1, &edit1, &part2, &part3]); + let modified = concat_batches([&part1, &edit1, &edit2, &part2, &part3]); + assert_eq!(modified.num_rows(), base.num_rows() + EDIT_LENGTH); + + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(ROW_GROUP_LENGTH), + false, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(ROW_GROUP_LENGTH), + false, + ); + + for col in 0..s.fields().len() { + let base_info = get_column_info(&base_data, col); + let mod_info = get_column_info(&mod_data, col); + + assert_eq!(base_info.len(), 7, "expected 7 row groups for col {col}"); + assert_eq!(mod_info.len(), 7); + + // First two row groups should be identical + assert_eq!(base_info[0].page_lengths, mod_info[0].page_lengths); + assert_eq!(base_info[1].page_lengths, mod_info[1].page_lengths); + + // Middle row groups: 1 larger + 1 smaller diff + for i in 2..mod_info.len() - 1 { + assert_page_length_differences( + &base_info[i], + &mod_info[i], + 0, + 1, + 1, + EDIT_LENGTH as i64, + ); + } + // Last row group: just larger + assert_page_length_differences( + base_info.last().unwrap(), + mod_info.last().unwrap(), + 0, + 1, + 0, + EDIT_LENGTH as i64, + ); + } + } + + #[test] + fn delete_once() { + let s = schema(); + let part1 = generate_table(&s, PART_LENGTH, 0); + let part2 = generate_table(&s, PART_LENGTH, 2); + let part3 = generate_table(&s, PART_LENGTH, 4); + let edit1 = generate_table(&s, EDIT_LENGTH, 1); + let edit2 = generate_table(&s, EDIT_LENGTH, 3); + + let base = concat_batches([&part1, &edit1, &part2, &part3, &edit2]); + let modified = concat_batches([&part1, &part2, &part3, &edit2]); + + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(ROW_GROUP_LENGTH), + false, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(ROW_GROUP_LENGTH), + false, + ); + + for col in 0..s.fields().len() { + let base_info = get_column_info(&base_data, col); + let mod_info = get_column_info(&mod_data, col); + + assert_eq!(base_info.len(), 7); + assert_eq!(mod_info.len(), 7); + + assert_eq!(base_info[0].page_lengths, mod_info[0].page_lengths); + assert_eq!(base_info[1].page_lengths, mod_info[1].page_lengths); + + for i in 2..mod_info.len() - 1 { + assert_page_length_differences( + &base_info[i], + &mod_info[i], + 0, + 1, + 1, + EDIT_LENGTH as i64, + ); + } + assert_page_length_differences( + base_info.last().unwrap(), + mod_info.last().unwrap(), + 0, + 0, + 1, + EDIT_LENGTH as i64, + ); + } + } + + #[test] + fn update_once() { + let s = schema(); + let part1 = generate_table(&s, PART_LENGTH, 0); + let part2 = generate_table(&s, PART_LENGTH, 2); + let part3 = generate_table(&s, PART_LENGTH, 4); + let edit1 = generate_table(&s, EDIT_LENGTH, 1); + let edit2 = generate_table(&s, EDIT_LENGTH, 3); + let edit3 = generate_table(&s, EDIT_LENGTH, 5); + + let base = concat_batches([&part1, &edit1, &part2, &part3, &edit2]); + let modified = concat_batches([&part1, &edit3, &part2, &part3, &edit2]); + + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(ROW_GROUP_LENGTH), + false, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(ROW_GROUP_LENGTH), + false, + ); + + for col in 0..s.fields().len() { + let nullable = s.field(col).is_nullable(); + let base_info = get_column_info(&base_data, col); + let mod_info = get_column_info(&mod_data, col); + + assert_eq!(base_info.len(), 7); + assert_eq!(mod_info.len(), 7); + + // Validate CDC chunk sizes on at least the first row group + assert_cdc_chunk_sizes( + &base.column(col).slice(0, ROW_GROUP_LENGTH), + &base_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + false, + ); + + assert_eq!(base_info[0].page_lengths, mod_info[0].page_lengths); + assert_eq!(base_info[1].page_lengths, mod_info[1].page_lengths); + + // Row group containing the edit + assert_page_length_differences_update(&base_info[2], &mod_info[2], 1); + + // Remaining row groups should be identical + for i in 3..mod_info.len() { + assert_eq!(base_info[i].page_lengths, mod_info[i].page_lengths); + } + } + } + + #[test] + fn append() { + let s = schema(); + let part1 = generate_table(&s, PART_LENGTH, 0); + let part2 = generate_table(&s, PART_LENGTH, 2); + let part3 = generate_table(&s, PART_LENGTH, 4); + let edit1 = generate_table(&s, EDIT_LENGTH, 1); + let edit2 = generate_table(&s, EDIT_LENGTH, 3); + + let base = concat_batches([&part1, &edit1, &part2, &part3]); + let modified = concat_batches([&part1, &edit1, &part2, &part3, &edit2]); + + let base_data = write_with_cdc_options( + &[&base], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(ROW_GROUP_LENGTH), + false, + ); + let mod_data = write_with_cdc_options( + &[&modified], + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + Some(ROW_GROUP_LENGTH), + false, + ); + + for col in 0..s.fields().len() { + let nullable = s.field(col).is_nullable(); + let base_info = get_column_info(&base_data, col); + let mod_info = get_column_info(&mod_data, col); + + assert_eq!(base_info.len(), 7); + assert_eq!(mod_info.len(), 7); + + // Validate CDC chunk sizes on the first row group + assert_cdc_chunk_sizes( + &base.column(col).slice(0, ROW_GROUP_LENGTH), + &base_info[0], + nullable, + CDC_MIN_CHUNK_SIZE, + CDC_MAX_CHUNK_SIZE, + false, + ); + + // All row groups except last should be identical + for i in 0..base_info.len() - 1 { + assert_eq!(base_info[i].page_lengths, mod_info[i].page_lengths); + } + + // Last row group: pages should be identical except last + let bp = &base_info.last().unwrap().page_lengths; + let mp = &mod_info.last().unwrap().page_lengths; + assert!(mp.len() >= bp.len()); + for i in 0..bp.len() - 1 { + assert_eq!(bp[i], mp[i]); + } + } + } + } + + // --- Direct chunker test (kept from original) --- + + #[test] + fn test_cdc_array_offsets_direct() { + use crate::basic::Type as PhysicalType; + use crate::schema::types::{ColumnDescriptor, ColumnPath, Type}; + + let options = CdcOptions { + min_chunk_size: CDC_MIN_CHUNK_SIZE, + max_chunk_size: CDC_MAX_CHUNK_SIZE, + norm_level: 0, + }; + let desc = { + let tp = Type::primitive_type_builder("col", PhysicalType::INT32) + .build() + .unwrap(); + ColumnDescriptor::new(Arc::new(tp), 0, 0, ColumnPath::new(vec![])) + }; + + let bpr = bytes_per_record(&DataType::Int32, false); + let n = CDC_PART_SIZE / bpr; + let offset = 10usize; + + let array: Int32Array = (0..n).map(|i| test_hash(0, i as u64) as i32).collect(); + let mut chunker = super::ContentDefinedChunker::new(&desc, &options).unwrap(); + let chunks = chunker.get_arrow_chunks(None, None, &array).unwrap(); + + let sliced = array.slice(offset, n - offset); + let mut chunker2 = super::ContentDefinedChunker::new(&desc, &options).unwrap(); + let chunks2 = chunker2.get_arrow_chunks(None, None, &sliced).unwrap(); + + let values: Vec = chunks.iter().map(|c| c.num_values).collect(); + let values2: Vec = chunks2.iter().map(|c| c.num_values).collect(); + + assert!(values.len() > 1, "expected multiple chunks, got {values:?}"); + assert_eq!(values.len(), values2.len(), "chunk count must match"); + + assert_eq!( + values[0] - values2[0], + offset, + "offsetted first chunk should be {offset} values shorter" + ); + assert_eq!( + &values[1..], + &values2[1..], + "all chunks after the first must be identical" + ); + } +} diff --git a/parquet/src/column/chunker/cdc_codegen.py b/parquet/src/column/chunker/cdc_codegen.py new file mode 100644 index 000000000000..3675c92d0281 --- /dev/null +++ b/parquet/src/column/chunker/cdc_codegen.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Produce the given number gearhash tables for rolling hash calculations. + +Each table consists of 256 64-bit integer values and by default 8 tables are +produced. The tables are written to a Rust source file. + +The generated numbers are deterministic "random" numbers created by MD5 hashing +a fixed seed and the table index. This ensures that the tables are the same +across different runs and platforms. The function of generating the numbers is +less important as long as they have sufficiently uniform distribution. + +Reference implementations: +- https://github.com/Borelset/destor/blob/master/src/chunking/fascdc_chunking.c +- https://github.com/nlfiedler/fastcdc-rs/blob/master/examples/table64.rs + +Usage: + python cdc_codegen.py [ntables] + + ntables: Number of gearhash tables to generate (default 8). + + The generated source file is written to ./cdc_generated.rs +""" + +import hashlib +import pathlib +import sys +from io import StringIO + + +template = """\ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This table should be identical with +// https://github.com/apache/arrow/blob/main/cpp/src/parquet/chunker_internal_generated.h +// Ensure that both tables remain in sync after any changes. + +#[rustfmt::skip] +pub(crate) const NUM_GEARHASH_TABLES: usize = {ntables}; + +#[rustfmt::skip] +pub(crate) const GEARHASH_TABLE: [[u64; 256]; NUM_GEARHASH_TABLES] = [ +{content}]; +""" + + +def generate_hash(n: int, seed: int): + """Produce predictable hash values for a given seed and n using MD5. + + The value can be arbitrary as long as it is deterministic and has a uniform + distribution. The MD5 hash is used to produce a 16 character hexadecimal + string which is then converted to a 64-bit integer. + """ + value = bytes([seed] * 64 + [n] * 64) + hasher = hashlib.md5(value) + return hasher.hexdigest()[:16] + + +def generate_hashtable(seed: int, length=256): + """Generate and render a single gearhash table.""" + table = [generate_hash(n, seed=seed) for n in range(length)] + + out = StringIO() + out.write(f" // seed = {seed}\n") + out.write(" [\n") + for i in range(0, length, 4): + values = [f"0x{value}" for value in table[i : i + 4]] + out.write(f" {', '.join(values)},\n") + out.write(" ]") + + return out.getvalue() + + +def generate_source(ntables=8, relative_path="cdc_generated.rs"): + """Generate a Rust source file with multiple gearhash tables.""" + path = pathlib.Path(__file__).parent / relative_path + tables = [generate_hashtable(seed) for seed in range(ntables)] + content = ",\n".join(tables) + text = template.format(ntables=ntables, content=content) + path.write_text(text) + + +if __name__ == "__main__": + ntables = int(sys.argv[1]) if len(sys.argv) > 1 else 8 + generate_source(ntables) diff --git a/parquet/src/column/chunker/cdc_generated.rs b/parquet/src/column/chunker/cdc_generated.rs new file mode 100644 index 000000000000..4222e3669245 --- /dev/null +++ b/parquet/src/column/chunker/cdc_generated.rs @@ -0,0 +1,558 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[rustfmt::skip] +pub(crate) const NUM_GEARHASH_TABLES: usize = 8; + +#[rustfmt::skip] +pub(crate) const GEARHASH_TABLE: [[u64; 256]; NUM_GEARHASH_TABLES] = [ + // seed = 0 + [ + 0xf09f35a563783945, 0x0dcc5b3bc5ae410a, 0x63f1ea8d22554270, 0xfbe5ee7bd05a7b61, + 0x3f692ed5e9934aba, 0xaab3755952250eb8, 0xdefb168dc2888fa5, 0x501b36f7c77a7d47, + 0xd2fff45d1989642d, 0x80217c1c600e30a6, 0xb9469ee2e43df7ac, 0x3654b76a61999706, + 0x6ea73dfe5de0c6b6, 0xdfd662e1937a589d, 0x0dbe0cc74b188a68, 0xde45f4e6d73ffc6f, + 0xcdf7a7759e70d87e, 0x5d6a951b8d38c310, 0xdc9423c3813fcf2c, 0x25dc2976e167ffce, + 0xc2555baa1d031c84, 0x115bc3f2230a3ab6, 0xd4b10260f350bede, 0xdfd3501ab447d723, + 0x022e79217edaf167, 0x1635e2255c5a7526, 0xa0a750350cc77102, 0xc027133e05d39f56, + 0xd949459779cf0387, 0xb92f1464f5c688c2, 0xd9ac5f3e8b42f2f3, 0xdf02bb6f5ecaac21, + 0x8156f988fac7bfa4, 0xe4580f97bede2ec8, 0x44fe7d17a76fca32, 0x885f59bd54c2014c, + 0x435e63ec655ffae9, 0x5ebc51930967b1f1, 0x5428c2084ac29e47, 0x9465938fec30e36b, + 0xc7cb3de4977772cd, 0x15692d7c201e8c3a, 0x505ee65cdc4b17f4, 0x7d9839a0a7aead6b, + 0xeef5f5b6a0105291, 0x76c2fb232ce7f5bf, 0x5c13893c1c3ff3a9, 0x65b6b547d4442f98, + 0xb8ad7487c8c96fce, 0x906bcf51c99974f8, 0x2f56e48bb943a48c, 0xbc9ab109f82d3a44, + 0xcd5160cdc8c7e735, 0xbe9acb9df3427732, 0x386b91d477d7fade, 0x36be463621dd5af2, + 0xcbe6a2faffd627a8, 0x9c8fd528463a2f5a, 0xb9b88c6bb802b184, 0xb414b4e665c597c7, + 0xbedb142568209556, 0x5360d81c25429dce, 0x63a69a960a952f37, 0xc900d63899e1b503, + 0x1abc63a8b37c7728, 0xa8b3a8b6409080eb, 0x495e391f662959f6, 0xdf1e136f3e12229b, + 0x33d5fc526b0dd38d, 0x321221ae2abfac63, 0x7fde18351fda7395, 0xed79fe5c3a6aa4c3, + 0x2dd6965a4867d8d4, 0x54813ca20fe8799b, 0x5d59ea6456465c39, 0x0de0c294d1936b81, + 0x4aaf0755002c588c, 0x3530a1857ad04c6d, 0xb8a64f4ce184442b, 0xe0def10bceedfa17, + 0x46e38d0a443757ec, 0x9795a1c645ee16d7, 0x7e531def245eac8a, 0x683b25c43a0716cf, + 0x884583d372da219d, 0x5b06b62c910416e5, 0x54b6902fbebd3dbe, 0x931198d40a761a75, + 0xead7d8e830013590, 0x80b4d5dc99bfaced, 0xf98272c8108a1ad2, 0x1adce054289a0ec6, + 0x7d53a1143c56b465, 0x497fbe4f00c92b52, 0x525e4cc2e81ebd69, 0xc94478e0d5508ff6, + 0xb8a5da83c196d07c, 0x7667a921b65b0603, 0xf236fabbdefe6cd1, 0x53da978d19a92b98, + 0xc604f6e97087124d, 0x2cbd27221924b094, 0x65cd1102c985b1d2, 0x08c0755dc1a97eb4, + 0x5e0419e921c0fef1, 0x282d2c1196f84a29, 0xe21117fcfc5793f7, 0xcf4e985dc38e6c2e, + 0xd521f4f264d55616, 0xde69b04c485f2a10, 0x59410e245305178a, 0xceab1d477c943601, + 0xa9805732d71ee5e9, 0x054cd443896974f6, 0xf2b517717a423a3e, 0x09517937fa9fac95, + 0x4938233e9ca871e3, 0x9132cbaf56f83ec0, 0x4703421ed1dd027d, 0xfd9933f4e6f1ec4e, + 0xf237c7fded2274a8, 0xdf4616efe68cd7b4, 0x5e46de0f39f0a380, 0x3d41e0c6d8e095b0, + 0xc5272f8a5bb2df09, 0x68aa78e8301fb964, 0xbf5b5b52c8e32ae0, 0xbf28ed3df74bdcf7, + 0xd6198f64c833815a, 0x8cd99d2974267544, 0xd90560ea4465ff2c, 0x571d65ad7ad59261, + 0x309453518baa367a, 0xa60538377bc79fb2, 0xace515da1ab4183c, 0xf56d3c8d891d1c5b, + 0x5b0d8370b59def49, 0x775866ce7c83c762, 0x3d76085695c8e18a, 0xba064d1a9af1b114, + 0xc84ef7cd7b98b521, 0x90b9231681c2bc37, 0x37e2b13e6f585b6b, 0x1d0a34e55e0f369f, + 0x86bb8019cf41447c, 0x4b95c6ef55b3f71f, 0x3b6ed1660732b310, 0x617eee603d137f21, + 0xf4f6278b464f3bbc, 0xdfb763b720da205a, 0x353478899b871cb7, 0xe45fbbff574cc41e, + 0x1a94b60847907d72, 0xb10eef051eff67a5, 0xf0e012ec6a284d40, 0xcc1cd1a11b926d7c, + 0xcf9d9c5453e19cad, 0x270febcc0fc0e86b, 0xd6567568778b781e, 0x7323b98965eeb46b, + 0xccecd374567086ff, 0xef7b44bfc497a704, 0xebc479c051a9f0a5, 0xc9b7410e3e00a235, + 0x1d084f7ecdf83dab, 0xc8a9a97e33ba8ba3, 0x8c75318f5b2350d6, 0xaa3cd5d0c684bdda, + 0xa81125fe0901bedf, 0xf7bcd76020edfc93, 0x834ee4c12e75874f, 0xb2bb8a7beb44fa14, + 0x32cd26f50a4f4e4d, 0x0fc5817ca55d959a, 0xd6e4ae2e3ae10718, 0x074abdcceb8d6e38, + 0xc0cc5f4f9b3a9c43, 0x1115d364363595b2, 0x69861db2eb19f2e8, 0x59b8d804cf92bc67, + 0x9bac9785e5e4b863, 0x7fa0e17a41869561, 0x10d3c9633f0c709c, 0x534a03deee6bc44a, + 0x73b1f7201257f581, 0x46fd6a11e2e0706b, 0x494abb554946e67a, 0xb5d6da317864dc8e, + 0x402ded9238f39687, 0xd8fa37d2cbd6d290, 0xcc818293fcb06791, 0x6482ab344806cd4d, + 0x0956e6ee9d8eb60b, 0x01fee622d8465ac8, 0xae7ece370cbd9c35, 0x7ff09e937a177279, + 0xa2c29ee7a33ca5f1, 0x990e8dbee083923b, 0x4a819b72f610863a, 0xddecfad79d3f08be, + 0x627372480fac20a7, 0x802154d6eca2db4c, 0x8fcf02e42f805e55, 0x040a911ff8cea977, + 0xbb544485bc64d0d4, 0xaddde1aeb406d0fb, 0xf6b35fae23dce66f, 0xc07a9fb3645d2f9b, + 0xccd113907e9c0fed, 0xd17af369984fd213, 0x9223823c59a083e7, 0xe19d475606b81013, + 0xe181ac116a90e57a, 0x71f7b6258c6def4c, 0x2246f34b45964f7c, 0xd74aedaea2d31751, + 0xb1add86e5dd305d1, 0xeb9ba881f16d6471, 0xef7600e036f5c6ff, 0x1d50bc9735b8fb85, + 0xe63942bd1f3e2969, 0x9241ba9f8b3f4e72, 0xee8bb2bca07d35b6, 0x55cd55dab522654e, + 0x94d0cfa7c1a6845d, 0x02f9845d559884c3, 0x8ce70ea21063b560, 0xd70998028ef08b74, + 0xdfdb5bbee310876b, 0x4e21b2e348256d16, 0xde007a981c13debc, 0xe51950cbbddabfdd, + 0xd223301dbe9957c1, 0x084b8634cc2cce4b, 0x90e551378aa9d70c, 0x833b533ac633e448, + 0x7891e232882da57f, 0xa1bf26f0163ce2b3, 0xf33a0171eb9c68d5, 0x2e7de18ca69b3fa2, + 0x666fd6f175619199, 0x1239d37edb5feb9f, 0xfa9fc9382e61ff5c, 0x3ca4ad427e3c126f, + 0x37c6dd4c2c31ae6e, 0x1f1bacb619d427b2, 0x7dd09f5d10759afe, 0xc8d941432327d733, + 0x2b389ba25e1d43a7, 0xa4e3030c3740ff21, 0xcc56dae13fd37463, 0x2481457c175b560f, + 0x9deb35bde77c5c41, 0x847aa6ea5549a0c3, 0xcde01bb48b6e7f02, 0x15a28844e64cb211, + ], + // seed = 1 + [ + 0xecfcba92fe5691a3, 0x71377799fea34699, 0xb284c9096fa614e5, 0x54534170f40de6c8, + 0xbbd804d45884fba3, 0x44929a896388c8a1, 0x79b712508e0fa3b1, 0xeb53ab280af31054, + 0x351ea23a6319da7a, 0x2fbe55d9819d85a2, 0x34f4b6568dcd28b1, 0x8c94ea5e5d82967a, + 0x09068d333a46d3c5, 0x762ad4f64cb73381, 0xd5c6db5ef0e22640, 0x36d8ab5a36175680, + 0xd41fe333cdc3525a, 0xa1f51dbdf20ce781, 0x1410a95e786c8be6, 0x96b7499a670c2b41, + 0x3912e1037835d893, 0x272c5bd83e1e9115, 0x2ea7f91cad82a0d6, 0xcd10e85662ce9931, + 0xedad49be8d5e8b74, 0x7ccd8fe0f37d12bc, 0xfac0482005eed593, 0x4513991681f6c8b0, + 0x2804d612eb0ad37d, 0x7cca9e8412b81d34, 0x85ffd6707192b7b8, 0xea0560aeea954411, + 0x0122d28226102bba, 0xf51c47cdbd22fdd1, 0x3707d851183ff17c, 0xaef5a1465f3e902d, + 0xbcb38c2d8736a04f, 0x4025317e864bef15, 0x8d3f66d86e1ea58f, 0xc16759a3d97ed79a, + 0x1c62abdc0659f2f5, 0x23b3eb4e699bd28f, 0x5083c4fceed3ccaf, 0xa65bf34562cc989c, + 0xaa5865932fd79064, 0xf24d08d268c24593, 0x7fbd00a215196999, 0x7812cd366d752964, + 0x62e8dcb27ef3d945, 0xf08b7984e1b946dc, 0x547d23ad9a5c1dcf, 0x496b1fb249b27fb7, + 0xcd692e1db5f3b3ba, 0x41931e39f1e1bc61, 0x286c6a7d7edae82b, 0x17ef6638b6c4ca6e, + 0x609beb5a2576a934, 0xcc5e16fe4a69b83c, 0xbbd14d08b078fc24, 0x2a617680f481cb94, + 0x81dbbd5f86e6d039, 0xeb8205e1fc8ecc3c, 0xe5e3bb576faa8042, 0x5d6f1eb9d9df01b5, + 0x9a47b8739c10fb44, 0x398a7caad7ea7696, 0x9c0fc1d7c46adde6, 0x67cd6de0a51978a6, + 0x68ccc4b77a21cca4, 0x1e067066b82f415c, 0xf7ddade6535e1819, 0xf2185c884291751b, + 0xc322b7381fcbe34f, 0x242f593e88290b9b, 0x8e11ccc0ea5e84a3, 0x40e3a2e3346db8a2, + 0xf18bfc3ad2931a2c, 0x2468397394b00144, 0xeae199cce14e6817, 0x05b462686c75a1ae, + 0xda096cb859c51673, 0xd87aeb967a906bef, 0xaabc74493cb02fe6, 0x74d48fc2e7da143e, + 0x6ec1c8fed3f2c1fd, 0xe01e0704b463f18e, 0xc3d88a4d3a8056e4, 0xd01ae0ffab6c8f3f, + 0x881ba052620ae7c7, 0xcea033aef0a823a5, 0x8d2cad91d83df1e3, 0x18746d205e66dbe9, + 0x3061f8e58d046650, 0xd819c59f0ce2cf8b, 0x144e89e93635e870, 0x3415e88279b21651, + 0xd6f7ab944b86c3fa, 0x45f1dd15d0f67bdc, 0xbf0d97c7f4fa24f4, 0x34a7de520a57fcd2, + 0x4ba86fda03e9e2bc, 0xa7995265a025b552, 0x698f6819d5f51cf7, 0xd07dbe9d8a156981, + 0x2683945373857fc1, 0x116f8a84f96167de, 0x8bc832bd85595ebf, 0xb206519d74fdfafa, + 0xde9519b2e9b5cc5f, 0x16fdd6f2da1d8163, 0x7ba32bd48ef56f11, 0x6f4e4d7ee8b29717, + 0xd31576dde7468aad, 0x023bb08848676045, 0xf6dcc083178160b7, 0x42035f426250e683, + 0x343732993cfed89f, 0x0640a870a22d3d58, 0x65cff80b53b4ae6a, 0x27996fa17ab05215, + 0xfd5db01401b21a04, 0x894508784bc1673c, 0x5bfcf43a2380e27d, 0x4cd6dcc2715583b7, + 0xa43b3763e7d4c902, 0x6da83e12ef0c1257, 0xfe80a602b0335aff, 0x293a7d8f4ff344de, + 0xb4ae7c2b8956bf5a, 0x6b45432d38254b4d, 0xd086acbdf15d9455, 0xa4d19e43f41ea87b, + 0xf01f13ba4bb87fbf, 0xca582cf301a299ff, 0x0ddad3d45298fa7d, 0x0646a130459c3999, + 0xc08e3af3747e2cee, 0xfc7db8aa9ed67295, 0x783b329e7bd79d5f, 0x732dbc607957af7b, + 0x8e446ac19fb26555, 0xff1dfa4d61dc89a5, 0xb6fbc46bd8d011d8, 0x185147ec5779f0d7, + 0x6eb2cf6149a5380f, 0xb0e773df803a1eae, 0xc07706c5519bfce5, 0xc35abcf54fa95f14, + 0x40a01d99a38608ea, 0x776dcd6f603c277f, 0x6ae12389b1d6d0bb, 0x8bd981448df92bb9, + 0x426a6a7ca21a2c16, 0x87efd5b71c1bad26, 0x71fb7fc4cd41de48, 0xdd9033c45619d463, + 0x40eaab322654cef7, 0xe077fffed6f3e3a2, 0x375a4dbef9384447, 0x2066b009d2c4a100, + 0xeca4a5794a068447, 0x2128f64bddf341a1, 0x738b4bb1be90bd61, 0x433772cf3813d52e, + 0x9540c88add8e4474, 0x0b6d5decd21d3519, 0x654ead966745642d, 0xe1bfb03c3b4bdb4c, + 0x0b977a9937515b1f, 0x0a4587509ef63870, 0xe89f0de1d9cfd44a, 0x23a91390272e7f68, + 0xd92defbc9096b8d8, 0x004db87174612539, 0xc88ecaabdd1a71f1, 0x050de38393073346, + 0x8af1426d7964e038, 0xf352c4fef8ad5c87, 0x6f26bc7408e26548, 0x0d41543fd9bf3084, + 0xfc4e07553a840fc6, 0x5ef117de86a555a9, 0x1f11c42dffb5ae1b, 0x4147648f07490fa5, + 0x09b35fd7671b21aa, 0x1453b14f7ccca481, 0x944f6fcce4c9b2ba, 0x5b08dd2e3583dc06, + 0xe0220df78dc9c22d, 0x1c200b9506cbf666, 0x8a0b7465eadb523b, 0xfbcb43a91a1e2d80, + 0xe697f44be3c36a58, 0x2f8a8e48fb7e350d, 0x7baba71b8920d55f, 0x10edc0216105bc96, + 0x52db07c79d7a7a63, 0x1916e8cef9452ac3, 0x5cbbbf21f867b6cc, 0xadd583365a690a4b, + 0x4e4ca2c8bffc2fdb, 0xf5fe3416d2eebcfe, 0x839af8b85e452476, 0x8496c0c54ad44e16, + 0x6c46f1ecad4482bf, 0xb794cad76ae18715, 0x67b762eec7c62985, 0x52dc9e68df5b3a53, + 0x0cc7e444b422a5f9, 0xadbfe90841c112b0, 0xfe37b136f0ca5c34, 0xcfe9e47948a8d73e, + 0xee90572b86a30d91, 0x549e72d8262830aa, 0x3361564b469f32c6, 0x1e6eba9e0d2648e2, + 0x5f8e2b2ac5fcb4eb, 0xe4224fa5f71f7cc6, 0x7357a9230c76757b, 0xcad70f74aaf6b702, + 0xeef28ced23894cc2, 0x753fdd3352aefd68, 0x1fed6ba90bbeb9d2, 0x05316f4ab4034b4b, + 0x3396df022b9f63d6, 0x82d7125a7cfd0935, 0x3519a71caf1f87f0, 0xd1dfb7a5cc3974be, + 0xbfae40ecbdbbcc2a, 0x152c11778e08dd54, 0x4a96566a6c848554, 0x3a84d621c340cdd7, + 0xfd47aa1887e2fb03, 0xa63cae94b2f1d099, 0xed61783f3e5b75e0, 0xefd44864106019be, + 0x145ff78b80b081aa, 0x34670e5fcea9230e, 0x876ef976328db371, 0x4221f3a5269942a6, + 0x95315cbd85c648f4, 0x3ca344dc7c3b1600, 0x38421ea39ff28780, 0x31dbeee967c0435c, + 0x27437c3e268402e7, 0xdd0cf8343312a654, 0x965ab9dad1d8aa29, 0xf871706dd3e23509, + 0xce23d06c7a25e699, 0x1b37d59382b27589, 0x3407f004723d6324, 0x56efb69cdb5deaa1, + 0xf46cdd2b9fd604e0, 0xcad3ca79fdac69bd, 0x7252802a574e63cb, 0xc281fb8acc6ec1d3, + ], + // seed = 2 + [ + 0xdd16cb672ba6979c, 0x3954eaa9ec41ae41, 0x52cb802771d2966d, 0xf57ed8eb0d0294f2, + 0x768be23c71da2219, 0x6131e22d95a84ad3, 0xd849e4e49bb15842, 0x18e8e5c4978cf00d, + 0x3af5e5867ce1f9bd, 0x06c75a9fffe83d63, 0xe8de75a00b58a065, 0x0a773251bc0d755a, + 0x629dc21e54548329, 0x2a168f5e5a883e70, 0x33547375f0996c86, 0xdfcb4c7680451322, + 0x55c1ecaaaa57e397, 0x4546c346c24f5a31, 0x6f8f0401dfabc86c, 0x7760d2d36ee340b4, + 0xf6448e48bdeb229d, 0xba70e1633b4dba65, 0x069cda561e273054, 0xa010b6a84aebf340, + 0x5c23b8229eee34b6, 0xea63c926d90153af, 0x7d7de27b3e43ec1b, 0xea119541eddc3491, + 0xf1259daeddfc724c, 0x2873ca9a67730647, 0xa1e7710dade32607, 0x758de030b61d43fd, + 0xd2c9bcbfa475edb4, 0x18ade47bb8a0aa29, 0xf7a74af0ff1aea88, 0x6f8873274a987162, + 0x6963e8d876f4d282, 0xd435d4fe448c6c5b, 0x93ec80ba404cafff, 0xcf90d24c509e41e7, + 0x5f0fc8a62923e36e, 0x9224878fe458f3a4, 0xd9a039edf1945bcd, 0x0877d1892c288441, + 0x75205491f4b4740b, 0x30f9d2d523a9085b, 0x4b7f4029fa097c99, 0x170bb013745709d4, + 0x7087af537f11ef2e, 0x28c62b88e08fc464, 0x84bbcb3e0bb56271, 0x485a4b099165c681, + 0x357c63357caa9292, 0x819eb7d1aee2d27e, 0xdaa759eb9c0f8c9d, 0x42cdc36729cc3db5, + 0x9489aa852eddbb06, 0x8161e4f85a84e6d4, 0xa964863fdad3eb29, 0xcc095ddbce1a6702, + 0x3ecfadbb8dc2ce58, 0x971316509b95a231, 0xc8f484d1dbc38427, 0xae9c510c463574c0, + 0xdf2b31179600c21a, 0x440de87bada4dfa3, 0xbd8d30f3f6fb7522, 0x84e6d7f678a0e2d0, + 0x0ec4d74323e15975, 0xf6947610dad6d9ab, 0x73a55a95d73fe3a5, 0x3e5f623024d37eda, + 0x8d99a728d95d9344, 0x8b82a7956c4acdc4, 0x7faeaea4385b27f6, 0x540625ff4aa2ff21, + 0x4aa43b3ebd92ce2b, 0x899646a6df2da807, 0x49225115780942d7, 0xe16606636af89525, + 0xb980bcf893888e33, 0xf9ed57695291b0d8, 0x5c6dd14464619afa, 0x50606d69b733d4f3, + 0x7fb1af465b990f97, 0x3fab2634c8bbd936, 0x556da6168838b902, 0x0f15975902a30e1f, + 0xb29d782ae9e1991f, 0xae00e26ff8f7e739, 0xd3da86458bb292d5, 0x4528ee0afb27e4ce, + 0x49882d5ba49fabad, 0x7e873b6a7cf875ee, 0x777edd535113c912, 0x94ed05e7ff149594, + 0x0b8f95fc4211df43, 0x9135c2b42426fef2, 0x411e6c2b47307073, 0x503207d1af0c8cf8, + 0xd76f8619059f9a79, 0x64d24617855dee45, 0xf7bc7a877923196a, 0xd6cc42ed6a65be79, + 0xe3912ff09d4fc574, 0x4192d03b2bc2460a, 0xa0dcc37dad98af85, 0xfc59049b2a5818a4, + 0x2128bae90a5b975f, 0xbe7067ca05ea3294, 0x5bab7e7753064c4f, 0x42cbf0949ef88443, + 0x564df4bbd017492c, 0xf2c2eb500cf80564, 0x5b92e67eb00e92af, 0x8c4103eef59c0341, + 0x83412122b8284998, 0x888daf2da0636b6d, 0x4d54b10303dd07d6, 0x201190e7c1e7b5ed, + 0x3797510bb53a5771, 0x03f7bc598b570b79, 0xdc1e15d67d94f73e, 0x721e8b499ebe02c1, + 0x71f954f606d13fa0, 0x0c7a2e408c168bf0, 0x07df2ef14f69c89d, 0xe295096f46b4baaf, + 0x7a2037916438737e, 0xd1e861aeaf8676ea, 0xb36ebdce368b8108, 0xb7e53b090ddb5d25, + 0x5a606607b390b1aa, 0x475e52994f4a2471, 0xbcc2038ba55b2078, 0x28b8a6b6c80df694, + 0xb5f0130ec972c9a2, 0x7a87cd2a93276b54, 0x4d0eec7ecf92d625, 0xac1a8ce16269a42e, + 0xa4ca0237ca9637b8, 0xd8dc8ff91202b6ff, 0x75b29846799d7678, 0x761b11a5edd9c757, + 0xf2581db294ef3307, 0xe3173c2b6a48e20f, 0xe46fd7d486d65b3c, 0x1352024303580d1f, + 0x2d665dae485c1d6d, 0x4e0905c825d74d3b, 0x14ff470c331c229e, 0xbdc656b8613d8805, + 0x36de38e396345721, 0xaae682c1aa8ff13b, 0x57eb28d7b85a1052, 0xf3145290231d443a, + 0xd0f68095e23cbe39, 0x67f99b3c2570b33d, 0x54575285f3017a83, 0x9b2f7bb03d836a79, + 0xa57b209d303367a9, 0x7ccb545dd0939c79, 0x1392b79a37f4716d, 0x6e81bb91a3c79bcd, + 0x2c2cd80307dddf81, 0xb949e119e2a16cbb, 0x69625382c4c7596f, 0xf19c6d97204fb95c, + 0x1b2ea42a24b6b05e, 0x8976f83cd43d20ac, 0x7149dd3de44c9872, 0xc79f1ae2d2623059, + 0xca17a4f143a414e1, 0x66d7a1a21b6f0185, 0xed2c6198fe73f113, 0x16a5f0295cbe06af, + 0x5f27162e38d98013, 0xf54d9f295bdc0f76, 0x9ba7d562073ef77b, 0xa4a24daaa2cfc571, + 0x49884cf486da43cd, 0x74c641c0e2148a24, 0xbff9dcbff504c482, 0xf8fc2d9403c837ab, + 0x6ccc44828af0bb1e, 0xbcf0d69b4c19dfdb, 0x8fe0d962d47abf8f, 0xa65f1d9d5514271d, + 0x26ff393e62ef6a03, 0xc7153500f283e8fc, 0xea5ed99cdd9d15cd, 0xfc16ac2ba8b48bb7, + 0xf49694b70041c67a, 0xbd35dd30f5d15f72, 0xcf10ad7385f83f98, 0x709e52e27339cdc2, + 0xe9505cb3ec893b71, 0x2ffa610e4a229af7, 0x12e1bc774d1f0e52, 0xe301a3bb7eacccc8, + 0x1fdd3b6dcd877ebf, 0x56a7e8bda59c05aa, 0x99acd421035d6ab4, 0xfd21e401cecd2808, + 0x9a89d23df8b8d46f, 0x4e26b1f1eb297b9c, 0x9df24d973e1eae07, 0xe6cdc74da62a6318, + 0xfc360d74df992db0, 0xf4eca0a739514c98, 0x481c515ba9bf5215, 0xce89cce80f5f3022, + 0xf487a10fc80e4777, 0x235b379a87e41832, 0x76f72e028371f194, 0xd044d4a201325a7d, + 0x47d8e855e0ffbdde, 0x268ae196fe7334b0, 0x123f2b26db46faa8, 0x11741175b86eb083, + 0x72ee185a423e6e31, 0x8da113dfe6f6df89, 0x286b72e338bbd548, 0xa922246204973592, + 0x7237b4f939a6b629, 0x31babda9bedf039a, 0xb2e8f18c6aeec258, 0x0f5f6ce6dd65a45e, + 0x8f9071a0f23e57d3, 0x71307115ba598423, 0xcbe70264c0e1768c, 0x1c23729f955681a8, + 0xfbc829099bc2fc24, 0x9619355cbc37d5d6, 0xea694d4e59b59a74, 0xb41cf8d3a7c4f638, + 0xae1e792df721cd0b, 0x7cd855d28aac11f6, 0xca11ba0efec11238, 0x7c433e554ce261d8, + 0xe3140366f042b6ba, 0x8a59d68642b3b18c, 0x094fcdd5d7bccac2, 0x9517d80356362c37, + 0x4a20a9949c6c74e8, 0xc25bcf1699d3b326, 0xa8893f1d1ed2f340, 0x9b58986e0e8a886e, + 0x29d78c647587ce41, 0x3b210181df471767, 0xd45e8e807627849d, 0x1ec56bc3f2b653e3, + 0x974ff23068558b00, 0xdb72bdac5d34262c, 0x23225143bb206b57, 0xd0a34cfe027cbb7e, + ], + // seed = 3 + [ + 0x39209fb3eb541043, 0xee0cd3754563088f, 0x36c05fc545bf8abe, 0x842cb6381a9d396b, + 0xd5059dcb443ce3bf, 0xe92545a8dfa7097e, 0xb9d47558d8049174, 0xc6389e426f4c2fc0, + 0xd8e0a6e4c0b850d3, 0x7730e54360bd0d0d, 0x6ecb4d4c50d050d5, 0x07a16584d4eb229f, + 0x13305d05f4a92267, 0xb278ddd75db4baec, 0x32381b774138608f, 0x61fe7a7163948057, + 0x460c58a9092efee6, 0x553bf895d9b5ff62, 0x899daf2dabfd0189, 0xf388ab9c1c4b6f70, + 0xd600fe47027ea4cd, 0x16d527ec2b5ef355, 0x5ac1f58ff6908c81, 0xa08d79ff8ee9ffe8, + 0xc1060a80b7a5e117, 0x14b2c23118c60bda, 0x8cc0defbb890df8f, 0xe29540fd94c6d28b, + 0xa604f003f82d5b71, 0xa67583d4eb066d18, 0xd62cbd796322b3fc, 0x070cfe244cdcccf3, + 0x73557c30b3af47e5, 0x2e544e31153a2163, 0x996eef7464d5bead, 0xbc71cb5ab0586cdc, + 0x0bfcb6c1b517ed69, 0x62b4f1fcc82e8ca0, 0x0edbc68f544965c5, 0x40fa39baa24af412, + 0xf39aeb2413dab165, 0x17e6013e7afee738, 0x8109bff1c8d42a9d, 0x3cd99863390989b5, + 0x02021a4cc9c336c8, 0xa06060778cb60aa4, 0xd96591db60bc1e06, 0xd2727175183f4022, + 0xcdc1f1c5bce3e7ce, 0xb393ccc447872a37, 0xdf6efe63257ead3a, 0x20729d0340dbceb6, + 0x9f3d2d26fc0ea0d7, 0xf392e0885189bd79, 0xdf2ee01eb212b8b6, 0x6e103a0c0f97e2c3, + 0x96c604a763bd841b, 0x9fc590c43bba0169, 0xf92dcd5ddc248c40, 0x113a8b54446941dc, + 0x5943eda146b46bb8, 0xbf657901a36a39a7, 0x5a4e0e7ea6568971, 0xb94c635bae9f9117, + 0x2626fb65b3a4ef81, 0xa59bfd5478ce97de, 0x79112ba9cc1a1c63, 0xf41f102f002cf39c, + 0x0a589bcbfb7ff1c8, 0xa1478c53540c4fa1, 0x60d55e72c86dfaca, 0x312e7b6840ea7a39, + 0x8aae72dcccfe1f75, 0xff2f51f55bf0247a, 0x3c2e4b109edb4a90, 0x5c6d73f6525c7637, + 0xe49acb04a199f61c, 0x27860642d966df7f, 0x541ce75fb1e21c30, 0xd9fcd6f90806c7cc, + 0xb87c27bc93a7969b, 0x92f77a1179b8f8dc, 0xb1f29379deb89ed4, 0x7e63ead35808efe7, + 0x13545183d7fa5420, 0x575f593e34cf029d, 0x27f1199fb07344ae, 0xe67f95f7dc741455, + 0x49b478b761ab850b, 0xd7bedf794adfc21e, 0xdc788dcd2dda40ae, 0x14673eb9f4d8ad35, + 0x0cced3c71ecf5eb1, 0xe62d4e6c84471180, 0xdfe1b9e2cb4ada7d, 0x70185a8fce980426, + 0x0ce2db5e8f9553d6, 0x1fedc57bb37b7264, 0xb9310a2e970b3760, 0x989ff8ab9805e87d, + 0x0b912d7eb712d9ee, 0x1fe272830379e67c, 0x16e6a73aff4738fb, 0xeed196d98ba43866, + 0x7088ca12d356cbe2, 0x23539aa43a71eee0, 0xed52f0311fa0f7ad, 0xa12b16233f302eea, + 0xc477786f0870ecb4, 0xd603674717a93920, 0x4abe0ae17fa62a4c, 0xa18f1ad79e4edc8d, + 0xc49fe6db967c6981, 0xcc154d7e3c1271e9, 0xdd075d640013c0c0, 0xc026cd797d10922a, + 0xead7339703f95572, 0x4342f6f11739eb4b, 0x9862f4657d15c197, 0x4f3cb1d4d392f9ff, + 0xe35bffa018b97d03, 0x600c755031939ad3, 0xb8c6557ffea83abf, 0x14c9e7f2f8a122ea, + 0x0a2eb9285ee95a7c, 0x8823fec19840c46f, 0x2c4c445c736ed1d0, 0x83181dff233449f1, + 0x15ed3fca3107bef5, 0x305e9adb688a4c71, 0x7dbef196f68a3e2e, 0x93e47ece3e249187, + 0x8353c5e890ead93c, 0xea8a7ae66abafdf7, 0xf956dbb6becf7f74, 0x9f37c494fbfdb6e4, + 0x11c6cbaa2485dd32, 0x206f336fcca11320, 0x9befe9a59135d8fe, 0x5f3ef8b8db92c7db, + 0xbb305e556ce0ce9a, 0xf26bdafb1305887f, 0xcbf28abe23f08c61, 0x0bc64173b914e00b, + 0x9168da52e983f54a, 0x6ea41d09c3574a3e, 0x78aa44d4a74459ae, 0x2931422878387bf5, + 0x018f64a3a92c2d9c, 0x9be43f6752e66b34, 0xae378890decd1152, 0x07325329a1cb7623, + 0x3b96f4ee3dd9c525, 0x2d6ebcdbe77d61a3, 0x10e32b0e975f510c, 0xffc007b9da959bf9, + 0x38bf66c6559e5d90, 0xbe22bdf0bf8899fe, 0x87807d7a991632a8, 0x149a0d702816766a, + 0x026f723db057e9ab, 0xeeecb83625ec6798, 0xcec2ed5984208148, 0xd985a78e97f03c84, + 0xf96c279e7927b116, 0x99d5027b3204f6e2, 0x13a84878c3d34c55, 0x5cf5ec96229e9676, + 0x0bc36b07e4f8e289, 0xbed33b80a069914d, 0x2fbfbdd1ff4b9396, 0xab352bb6982da90f, + 0x154d219e4fa3f62b, 0x4d087512bb6b9be7, 0xc582e31775ee400e, 0x7dadb002ae8c4a4e, + 0xaae2957375c1aee2, 0x5f36ca643356625b, 0xf87cf8eb76e07fb7, 0x46f432a755e02cc3, + 0x36087e07aba09642, 0xe5642c1e4ebb9939, 0xb9152d22338eefad, 0xf7ba44278a22cf7f, + 0xd3b8013502acd838, 0x7761511da6482659, 0xb0857621638e8e50, 0x552eddb4a8b1d5f5, + 0xc43d9861e812c3ea, 0xd765c2aada47910c, 0x21c935b68f552b19, 0x6256d5641a2b47dc, + 0xab711d8e6c94bc79, 0xa8d0b91a2a01ab81, 0x5e6d66141e8d632a, 0x7638285124d5d602, + 0x794876dbca3e471f, 0x951937d8682670ce, 0x0f99cb1f52ed466a, 0x8c7cd205543b804c, + 0x2fd24d74a9c33783, 0xe5dcb7b7762e5af1, 0x45e6749cca4af77c, 0x540ac7ee61f2259f, + 0x89c505c72802ce86, 0xeab83b9d2d8000d1, 0x9f01d5e76748d005, 0xc740aaef3035b6d0, + 0x49afcd31d582d054, 0xcba5dc4c1efb5ddc, 0xc0a4c07434350ca1, 0xfc8dfaddcc65ee80, + 0x157c9780f6e4b2d9, 0x9762a872e1797617, 0xc4afae2cf3c7e1bd, 0x71cde14591b595d4, + 0x8843c3e0e641f3b9, 0xd92ecd91dce28750, 0x1474e7a1742cb19f, 0xec198e22764fa06b, + 0x39394edb47330c7d, 0x00ba1d925242533d, 0xaed8702536c6fb30, 0x6d3618e531c2967a, + 0x77f7cedcd7cc0411, 0xbc1e2ab82be5b752, 0x07b0cf9223676977, 0x596c693b099edd53, + 0xbb7f570f5b9b2811, 0x96bfdad3c4a6840c, 0x668015e79b60c534, 0x3ad38d72123f1366, + 0x6b994d81d2fcbb09, 0x70885f022c5052d8, 0xc891ee79d9306a7b, 0x2c4df05c0ed02497, + 0x19ebc13816898be2, 0xea7c64df11c392a2, 0xb7663e88dd12e1bd, 0x79f768cb8e154c21, + 0x1fb21b12e945933b, 0xe6a9045643f6906e, 0x544c47acd7e15371, 0xb7709b14f727e3d1, + 0x326ee36a46942971, 0x477f1cf7b0e2d847, 0x88b8f6b82b3b0c24, 0x18bc357b80e3cd5c, + 0x3333de70e4d66e0b, 0x4fd4c5e148583cf6, 0xae1b62f3008c0af3, 0xc49f419b6ab29cf5, + 0x2c29fa65afc3fa28, 0x4b19d93734d03009, 0x7dd6c09e589276ad, 0x1cece97f30de48ad, + ], + // seed = 4 + [ + 0x58bdf4338602e4fb, 0x71a5620b02c926d5, 0x3811c960129c2d9f, 0x29c2fb11fccac567, + 0x0d6b1ea7780f1352, 0xcc4d3ddfae3f87b3, 0xfdd30257362a586b, 0xabc948fde69f25f1, + 0x51b3523469d30f7b, 0xe0f0322724405ace, 0xd3729266d896da1e, 0xb10c37e5147915bf, + 0x8b577039f9fa32a3, 0xe677c6a9cbfb44b3, 0x7317a756ebb51a03, 0xf8e988ef37359485, + 0x600fc1ef3f469ff3, 0xbf0b8f8520444e01, 0x3711168b08b63d73, 0x34146f2944a6cb36, + 0x717feb263862cdde, 0x7185f8347db00412, 0x900798d82127e693, 0x84089e976a473268, + 0x10f8308c0d293719, 0xf62a618d4e5719b8, 0x8bdbd257a1a9516f, 0xf49f666fd7a75110, + 0xbaf45e2db7864339, 0xe4efa1ea0c627697, 0x3e71d4c82a09fe10, 0x54a2a51cf12127bb, + 0xa0592c9f54ba14cd, 0x27dd627a101c7a42, 0x3d2ceb44b3d20d72, 0x7ee1f94a68ca8f5d, + 0x7e8cb8651b006c36, 0xbd9fa7ca3a475259, 0x856de173586a7b34, 0xcedb291b594cb1b5, + 0xa3d6e462fd21cddc, 0x74561d10af9118e4, 0x13a3d389fc2d4b36, 0xeea8594a4a054856, + 0xf56d7474d9ba4b13, 0x25ddce2f6490b2fd, 0x920653ff3a8d830b, 0xcd8c0c9cdac740d1, + 0x2c348a738db9c4a0, 0x2967ccbe8ea44c22, 0x47963f69adb049f8, 0xf9d01eb5b4cf7eb6, + 0x7a5c26eb63a86bd2, 0x62ad8b7a71fa0566, 0xb373213179f250ae, 0x589d4e9a88245a4d, + 0x433dafebe2d558a8, 0x521fbef2c8fe4399, 0x62a31f9ff9ccd46b, 0x51602203eba7c1a6, + 0x9afc8c451b06c99f, 0xb529085bdbaffcea, 0xac251825cc75892b, 0x94976a5bce23d58e, + 0xdd17925b6c71b515, 0x568fd07a57bce92e, 0xefac31200d8bd340, 0x716c3e466b540ef9, + 0x3d2c9e380063c69b, 0x14168f9a3662dd83, 0xd298c7504dbc412f, 0x74490a94f016719f, + 0x0e0da431e1ab80c8, 0xe321f63dc6b169ae, 0xf08671544febc95a, 0x39324450cc394b3b, + 0xea6e3d35f1aa3a70, 0x8ef8a886508ce486, 0xdc1a631ef0a17f06, 0xfda2b3fbcd79e87b, + 0xd75bcae936403b10, 0xf88b5bd9f035f875, 0xc43efec2e3792dd4, 0xe9fac21a9d47cd94, + 0xc2876f0c4b7d47c3, 0xaba156cf49f368b4, 0x5ccda2170fa58bf9, 0xadc92c879ed18df7, + 0x110c1b227354e6c8, 0x298ee7a603249200, 0xde92142ede0e8ee7, 0x88e4a4610644ba9e, + 0xbb62d277e7641d3a, 0xb9be1985b7bf8073, 0x29024e5426cdb0d1, 0xf6aefd01f3092ab8, + 0x2a07087b313133aa, 0x6d71f445d6dfc839, 0x1e2412ff12e5526b, 0xed5cdeba6617b9e1, + 0x20b1d0d5e5f8760e, 0x12ff15705c368260, 0x7bf4338b7c387203, 0x34ff25f00cd06185, + 0x1148c706c518cf28, 0x5c04f0623388f025, 0xcb9d649275d87d79, 0x9b5f0c24fabc42ec, + 0x1a7b5e7964e33858, 0x2a81bbd8efdc6793, 0x8d05431ffe42752e, 0x83915cd511002677, + 0x580ed4d791837b31, 0x5982e041d19ff306, 0xcad0d08fa5d864ca, 0x867bee6efe1afa63, + 0x26467b0320f23009, 0xd842414dfda4ec36, 0x047fcdcbc0a76725, 0xbddb340a3768aeca, + 0xef4ce6fa6e99ab45, 0x88c5b66c7762bf9b, 0x5679f1c51ffb225d, 0xdab79048317d77ee, + 0xf14e9b8a8ba03803, 0xe77f07f7731184c1, 0x4c2aab9a108c1ef5, 0xa137795718e6ad97, + 0x8d6c7cc73350b88b, 0x5c34e2ae74131a49, 0xd4828f579570a056, 0xb7862594da5336fc, + 0x6fd590a4a2bed7a5, 0x138d327de35e0ec1, 0xe8290eb33d585b0b, 0xcee01d52cdf88833, + 0x165c7c76484f160e, 0x7232653da72fc7f6, 0x66600f13445ca481, 0x6bbdf0a01f7b127d, + 0xd7b71d6a1992c73b, 0xcf259d37ae3fda4a, 0xf570c70d05895acf, 0x1e01e6a3e8f60155, + 0x2dacbb83c2bd3671, 0x9c291f5a5bca81af, 0xd976826c68b4ee90, 0x95112eec1f6310a2, + 0x11ebc7f623bc4c9a, 0x18471781b1122b30, 0x48f7c65414b00187, 0x6834b03efa2f5c30, + 0x0875ef5c2c56b164, 0x45248d4f2a60ba71, 0x5a7d466e7f7ba830, 0x2bebe6a5e42c4a1d, + 0xd871d8483db51d10, 0x6ee37decd2fd392f, 0x7d724392010cede3, 0x8e96ef11e1c9bcc8, + 0x804a61d86b89d178, 0xbb1b83ce956055ec, 0xcb44e107410ff64f, 0xc426bb09ee0ba955, + 0x057c08f42c3dd7f1, 0x40ea1ec148602bdf, 0xc24688deeb65d7f1, 0xd8bcc53c768ba4e4, + 0x16e0e3af65c1106c, 0xfc12f7e7d647218b, 0x70d6e1d3ee93cef4, 0x01d2a505c4541ef9, + 0x1ef79e16e764d5c3, 0x0363d14d13870b98, 0xb56ef64345d06b11, 0xe653d557ebb7c346, + 0x8304a8597c2b2706, 0x1536e1322ce7e7bb, 0x525aec08a65af822, 0x91f66d6e98d28e43, + 0xe65af12c0b5c0274, 0xdf6ae56b7d5ea4c2, 0x5cef621cedf3c81c, 0x41e8b1ffd4889944, + 0xb5c0f452c213c3e5, 0x77af86f3e67e499b, 0xe20e76ea5b010704, 0xbdc205ab0c889ec0, + 0xc76d93eb0469cd83, 0x17ac27f65cab0034, 0xd49ec4531fd62133, 0x07a873ea2f1b9984, + 0xbff270dfef0032ee, 0x1764dbe91592f255, 0xe40363126f79e859, 0xa06cad3ab46971f6, + 0x0be596e90dedd875, 0x3387cce5c1658461, 0x44246acf88a9585e, 0xe0ad82b92d5ecb2c, + 0x2177491c9a1600a6, 0x16e7c4aac0f02422, 0x75792eeeec15c4e1, 0x2309cd359d08ee30, + 0x7cd9831dd1b83b0a, 0x374914a7c4ee8cf0, 0x0dd17765c9ac2e54, 0xb7847470ba9a7688, + 0xfba4f4bbe2991173, 0x422b203fc3de040e, 0x63bfcaf2ecf2ab0e, 0x0c5559f3a192946e, + 0xfdf80675c1847695, 0xf5f570accab842c9, 0x65cc5a448767afea, 0x1efeb0a7ee234f2f, + 0x9b05f03d81e7b5d2, 0xe7c31317a8626cf4, 0x620f2a53081d0398, 0x1b6de96cdd9943ae, + 0x8c226a436777d303, 0xa08fbbd50fafb10d, 0x6a64c5ec20104883, 0x9c9c653502c0f671, + 0x678a02b2174f52a0, 0x68e008ba16bbad4b, 0xa317c16d2efb860f, 0xeab2075d17ed714c, + 0x565eeeddf0c4ea15, 0x8ec8e94d242a6c19, 0x139e8e27d9000fae, 0xc977a7ff1b33d2f5, + 0x1d0accca84420346, 0xc9e82602cd436e03, 0x6a2231da53d2ccd3, 0xb44b12d917826e2a, + 0x4f4567c6a74cf0b9, 0xd8e115a42fc6da8f, 0xb6bbe79d95742a74, 0x5686c647f1707dab, + 0xa70d58eb6c008fc5, 0xaaedc2dbe4418026, 0x6661e2267bdcfd3d, 0x4882a6eda7706f9e, + 0xf6c2d2c912dafdd0, 0x2f2298c142fd61f9, 0x31d75afeb17143a8, 0x1f9b96580a2a982f, + 0xa6cd3e5604a8ad49, 0x0dae2a80aad17419, 0xdb9a9d12868124ac, 0x66b6109f80877fac, + 0x9a81d9c703a94029, 0xbd3b381b1e03c647, 0xe88bc07b70f31083, 0x4e17878356a55822, + ], + // seed = 5 + [ + 0xb3c58c2483ad5ead, 0x6570847428cdcf6c, 0x2b38adbf813ac866, 0x8cb9945d37eb9ad3, + 0xf5b409ec3d1aed1c, 0xa35f4bffc9bb5a93, 0x5db89cde3c9e9340, 0xff1225231b2afb2b, + 0x157b0b212b9cc47d, 0xf03faf97a2b2e04d, 0x86fdab8544a20f87, 0xfcb8732744ae5c1c, + 0xd91744c0787986d5, 0x5f8db2a76d65ad05, 0xcff605cbed17a90d, 0xf80284980a3164e7, + 0x59cc24e713fccc7d, 0x268982cada117ce4, 0xcd020e63896e730e, 0xe760dc46e9fe9885, + 0x6aaece8ab49c6b5d, 0x7451194d597aae3e, 0x35d4385900332457, 0xa40fb563a096583d, + 0xa797b612f7f11b76, 0x2fed6eb68e6a2b9b, 0x2f06ee64aeffd943, 0x9dd0e49d9ca45330, + 0x97d48f08bd7f1d8f, 0x1cfa7fe3ebe4d8ee, 0x2a2ba076bd397d42, 0x68c4344f7472f333, + 0xce21ec31987d74b5, 0xb73dabdc91d84088, 0x801aadee592222fe, 0xaf41345398ebc3f5, + 0x8a8f653d7f15ee46, 0xce2d065ff2ba2965, 0x4e05da515da2adb7, 0xa6dbdb8aa25f0fd4, + 0xca9f9666bbd2d5a9, 0x6b917ce50bd46408, 0x1550cc564ba6c84d, 0xb3063ae043506504, + 0x84e5f96bb796653d, 0xe2364798096cf6e3, 0x3b0dfedf6d3a53d0, 0xb7e4c7c77bde8d93, + 0xe99545bac9ab418a, 0xa0e31f96889507bb, 0x883c74f80c346885, 0xf674ae0b039fd341, + 0x8bb6ce2d5e8d1c75, 0x0c48737966a7ed7c, 0x04fcdf897b34c61c, 0xe96ac181bacbd4d6, + 0x5a9c55a6106a9c01, 0x2520f020de4f45d3, 0x935730955e94d208, 0xce5ad4d7f3f67d3b, + 0xa4b6d107fe2d81ca, 0x4f0033f50ae7944e, 0x32c5d28dd8a645a7, 0x57ce018223ef1039, + 0x2cbab15a661ab68e, 0x6de08798c0b5bec2, 0xee197fb2c5c007c6, 0x31b630ac63e7bda2, + 0xab98785aefe9efe3, 0xa36006158a606bf7, 0x7b20376b9f4af635, 0xa40762fdc3c08680, + 0x943b5faffd0ebee2, 0x7f39f41d0b81f06e, 0x7c4b399b116a90f8, 0x24e1662ac92bc9f3, + 0xcf586fc4e8e6c7db, 0xe46e0d047eeb12d7, 0xe8021076e4ea9958, 0x11fc13492e3ca22a, + 0xd61eae01410397e3, 0x7e8c4a58036a8e9f, 0x068a6de267970745, 0x64faab129bef1a41, + 0xb4a6f720943dad01, 0x631491058d73a9d5, 0xdad4fe95eab3ec02, 0x0a8b141c5c3a44f6, + 0x9fc69d4c2b335b98, 0x94d5f84a07d6e4cd, 0x1b73965de143c608, 0x443932c2dda54bcc, + 0x7397818fb0b04cd2, 0xef4ab03a1202b277, 0xf3d2ee459c0c2b92, 0x182d4daf8b058a87, + 0x90e63035d7b51368, 0xba4cd8b9a95d45fd, 0x12a7392c76731090, 0x890d264ec5d082d2, + 0xeeaf5c363da4994e, 0xd6aad756902123fb, 0xb531ebebdb28f191, 0xe71ce659fc59babd, + 0x37c1b94f63f2dcb5, 0xe4e3abeb311f9b96, 0x4a31b72ccb8695d3, 0x52cae1f0629fdce4, + 0xe5b0475e2ed71369, 0x2724e8c3506414fb, 0xbab0367920672deb, 0x0161a781c305449f, + 0x37b70f40f5bb60be, 0xddd1094c50251a01, 0x3b28283afd17224e, 0x06dec0cfe889fc6b, + 0x47608ea95bb4902d, 0xad883ebc12c00e82, 0x9e8d7ae0f7a8df29, 0xa79443e9f7c013a1, + 0xcfa26f68b7c68b71, 0x33ae6cc19bda1f23, 0xd9741e22b407887f, 0xf2bff78066d46b1c, + 0x794123191c9d32d4, 0x56cb6b903764ec76, 0x98775d0ef91e1a5a, 0xae7b713bc15c1db9, + 0x3b4c1a7870ed7a0d, 0x46666965f305cc34, 0x0ea0c3b2e9c6b3cd, 0x4dc387039a143bff, + 0x5f38bb9229ef9477, 0xea5d39ba72af7850, 0x69a5ed0174ce2b6d, 0x06969a36bfe7594d, + 0x0adee8e4065ccaa3, 0x908a581d57113718, 0x64822d6c5a8190ed, 0x8c5068b56ace4e4c, + 0x88ba3b4fb4e30bef, 0xa6ec0b8bb5896cfe, 0x4e23fcc6b47996fd, 0xe18e75b0dd549c7a, + 0xcd90f17e106cf939, 0x1666fdfb2ef7c52f, 0x4fae325f206dd88c, 0xe7bc1160e25b062d, + 0x3cc999cb246db950, 0xc5930a7326cd5c37, 0xb008a48a211367bd, 0xc5559da145a88fd4, + 0x1e3ad46655fac69c, 0x7834266b4841bfd7, 0xa764450fbffc58cc, 0x54d8cf93a939c667, + 0x93c51f11b21b2d9d, 0x0964112082ed65cc, 0x4c2df21213e7fb03, 0xf0405bc877468615, + 0x17b4fc835d116ab4, 0xa6b112ae5f3cb4ef, 0x23cfc8a7fd38a46e, 0x8e0a360dc2774808, + 0x24ca9c8092105ad5, 0xafd3f75524f2e0d5, 0x4f39ed7dbaddc24c, 0xe5e362c7679a7875, + 0x00914a916b07b389, 0xdfe1119b7d5ab5da, 0xabd6ed9940e46161, 0x630ed2044171e22c, + 0xdecc244157dd1601, 0x777e6d5b4b4868d5, 0x9b3530bee67017d8, 0xd2faf08b291fdcb9, + 0x006e99455d6523de, 0xd559b5817f6955b5, 0xefcc1063b0088c61, 0xed73145ae0f00ae7, + 0xab2af402cf5b7421, 0x897767f537644926, 0x26c9c0473ca83695, 0x192e34e1881b2962, + 0xf7cf666ec3b3d020, 0x27f9b79c7404afb7, 0xe533e8bed3010767, 0xe5817838e11d05d3, + 0x65659c531bd36517, 0xd427c5e0a23836fd, 0xf3eab7ea58fa3528, 0x07683adae1289f35, + 0x201d6af7e896dd32, 0xd5da938b9a21ad88, 0x843fb73ad67bc316, 0x1782ec7d5feef21b, + 0x943f66f6ec772877, 0x7e9112e7b26da097, 0xeac8161f8663c2c7, 0xe8600db480a9ebf4, + 0x07807fc90f6eaf5f, 0xe0e4c9deb41abf83, 0xbdf533db271f9c15, 0xb398411b0497afe2, + 0xdebb45ef25448940, 0xe7a5decefcd376c4, 0xaf1ef3c728c83735, 0xb8b83a99355cb15a, + 0x6444a0344f1611e4, 0xe8bb7f5cf3c60179, 0x77ab5c5177e75ff7, 0xc38fd6fa849d585d, + 0x390d57d53029060a, 0xa66327eb7b8b593c, 0x6350a14f6fcd5ac9, 0x2c08125bcd7008b4, + 0x2d00c299a6a6bf8e, 0x6b0039c1f68d1445, 0x0035150c5d06f143, 0xa34d01628cc927e1, + 0xdf5b3164d7b2ede1, 0x8167db1d0583d72e, 0x4e13b341cd2ae8bc, 0xa693d9b1f416e306, + 0xc15ed7ca0bc67609, 0xdc344313c1c4f0af, 0x88b6887ccf772bb4, 0x6326d8f93ca0b20e, + 0x6964fad667dc2f11, 0xe9783dd38fc6d515, 0x359ed258fa022718, 0x27ac934d1f7fd60a, + 0xd68130437294dbcc, 0xaf5f869921f8f416, 0x2b8f149b4ab4bf9f, 0xc41caca607e421cb, + 0x7746976904238ef9, 0x604cb5529b1532f0, 0x1c94cd17c4c4e4ab, 0xe833274b734d6bbe, + 0xe9f1d3ef674539ce, 0x64f56ed68d193c6a, 0xe34192343d8ecfc1, 0xcb162f6c3aa71fe8, + 0x99eaf25f4c0f8fa4, 0x92f11e7361cb8d02, 0xb89170cddff37197, 0x4f86e68a51e071e3, + 0x31abf6afd911a75b, 0x6d20cf259c269333, 0x4150b9f88fcb6513, 0x705063989ebf7451, + 0x559231d927c84410, 0x1ca8ec4b098bc687, 0xebed22405c9180e0, 0xaa815b37d052af59, + ], + // seed = 6 + [ + 0x946ac62246e04460, 0x9cebee264fcbc1ae, 0x8af54943a415652b, 0x2b327ed3b17b8682, + 0x983fde47b3c3847e, 0x10a3013f99a2ad33, 0x6e230bb92d2721ef, 0x1cf8b8369e5c5c50, + 0x7f64017f2b7b3738, 0xd393248a62417fa1, 0x9ff01c0b20a372c5, 0xb0e44abce7e7c220, + 0xcebb9f88d48a815f, 0xdb7df6bd09033886, 0x7844fc82b6fa9091, 0x72d095449863b8ec, + 0xc13e678c89da2c7e, 0x6caf4d5ad231d12f, 0x2e0ab7b5fcf35c49, 0xf410720cb932a70f, + 0xd66ea581f16fce06, 0x175c9f002f57dc98, 0xccbcfd0d32988775, 0xfde4c407d3b0a232, + 0x5db2931ae7e97223, 0x6e07e2173085809f, 0x6e1d1ec0f9cad73c, 0xb2fc251a7f802619, + 0xbc1fc17f04f342de, 0x8de8f21ec658e078, 0x72c0f40cbee53fd6, 0x0678244411fc17a1, + 0x1d5837ca166b9bbd, 0xc8cada003c554345, 0x6a2fe2bfb2e58652, 0xfca9d797a6f7988b, + 0x6699e24ac737948b, 0x69623ffcb05789ba, 0x946429c529d95b75, 0x0d14df0b2a13970f, + 0x593d8592c440dfec, 0x2ee176f3d7e74b94, 0xae003f1da3be9e26, 0x0c7b02c4c0f6764a, + 0x3117e2fa1f632462, 0xf0f23265b6f1eaeb, 0x3111255d9b10c137, 0xc82745e509a00397, + 0xbd1d04037005fea7, 0xe104ab0dd22a9036, 0x51b27ce50851ac7a, 0xb2cb9fb21b471b15, + 0x29d298074c5a3e26, 0x6ebdf2058b737418, 0xc4a974041431b96f, 0x1ec5a30ccb6bdaac, + 0xe818beede9bf4425, 0x4b69b1bce67a5555, 0xf5c35f1eb0d62698, 0xf4509bbd8e99867c, + 0xb17206debd52e1bc, 0x35785668c770b3be, 0xe9343987ff5863bc, 0x2ee768499ac73114, + 0x5132bb3426eeaaf4, 0x471bce2c6833c5ff, 0xbb9a2d5428e6f6f9, 0xd5678943c595792d, + 0xab2a65e7f81e479c, 0xa82407bb23990b31, 0xdae321383984923c, 0x01823bb22648e6f1, + 0xda6e8df4214a8b04, 0x0e172bb88e03d94f, 0x552da6c22e362777, 0x7ce67329fb0e90cb, + 0x7b2d7f287ede7ebf, 0xd44f8222500651bd, 0x4acca1ef58fbb8ab, 0x428ecf058df9656b, + 0xd7e1ec6a8987c185, 0x365be6a54b253246, 0x168849be1e271ee8, 0x6a00f3c4151a8db2, + 0x37602727ca94b33d, 0xf6b50f18504fa9ce, 0x1c10817f6bc872de, 0x4bfe1fe42b0f3638, + 0x135fad4b8ef6143b, 0x1b25ad2bafc25f58, 0x41e37f85cf321f92, 0xfc73f75d9d5b9bea, + 0x9eb3694d1e9cb7e1, 0x601d51f08fa83b90, 0x234a2a9b88366f41, 0x63fe903e16f2c3bf, + 0x1cdbd34fa751c0b0, 0x0ce4fc6747c0558c, 0x51ed72afb8bb49aa, 0x20313ba13ca12c96, + 0x271fa38f9ebd54c1, 0x3696a5ac03a8edde, 0x05602be7df625702, 0x11f1ac73790f7a9f, + 0xa2836c099f0810bd, 0xe5ac2e47caa532fa, 0xd9c000a66d39f681, 0xd93d900e6f3d9d5f, + 0x792c81c65b7900f2, 0x5c5dce790ee20da1, 0x74ff1950edec1aee, 0x71fc85fa1e277d8f, + 0x0e77df17d6546cbc, 0x07debad44816c3b4, 0xbafa721581e92a70, 0x8ab6fbe2ed27bba8, + 0xe83243a20dea304a, 0xaa85a63a84c00a07, 0xde0e79917fc4153a, 0x21bb445e83537896, + 0xeedcac49fc0b433a, 0xffb2926a810ae57a, 0xf724be1f41d28702, 0x79cb95746039bb3b, + 0x5a54fe3742a00900, 0xda4768d64922c04f, 0x420396a84a339dae, 0xa171e26ee5e8724e, + 0x4c8da7c5d289c20a, 0x9ebd79a1a8e94742, 0x39235232b97e9782, 0xb75df0be9bba7d80, + 0x0c1d204dd87d48fc, 0x8f81f3e7177266e8, 0xe4a460b39e78d72b, 0x50b98fa151e65351, + 0xb7cb585c3ee1eddc, 0x11cdad9a76ee1dc4, 0xa38054a78595dc1c, 0x92f09e2ec4978edc, + 0xa8f0061b5efdabaa, 0x04bcc4abc224d230, 0xc58606738e692d46, 0xdd2b27b565952433, + 0x19e6ed1b740beec0, 0xceadd49b2ef9891f, 0x328178c28fe95cad, 0xe5ad4c43afe02848, + 0x03c0cb538cd967c0, 0xec4352526d19a630, 0x4c7e99389d39b031, 0xf65dd05362c2deb6, + 0xd1e70daf6879d28d, 0xbe9f57db6309b265, 0xa4b66f370b872bb7, 0xe26896fbc6ee1fd5, + 0xac705e661bfcf7c5, 0xab4d0d07d7f09940, 0x976417c06aeb6267, 0x8161c684a6bd468c, + 0xf77b6b9976dc4601, 0xc6489b779a39c12c, 0xb2aa58d5681cea1a, 0x043b1b40f8c3e04c, + 0x681fcbfadc845430, 0xab8896c921ba8def, 0x57aaf172606f37b2, 0xc3735048cd5eb8d7, + 0xa7078b96955631bd, 0xdd6b3543aa187f33, 0xc7103ea4a2a697fd, 0x8d7b95f6ff1f7407, + 0xe44f419e84709530, 0xf340caa9132cbb0a, 0x2ba407283143c66c, 0xe1be240ca636c844, + 0x90d32f2877ac08bc, 0x5d26e6294b2c8673, 0x4a6b2f5b27c87a44, 0x961fb9043f76d34f, + 0x0afee02d8d3c55d2, 0x6228e3f48c42e5dc, 0xc338e69ee6593675, 0x853f74b16efb7bdd, + 0xd062f40bdd22e687, 0x647164b9ab4c4190, 0xf94689f67d598369, 0x8e4b29d87a5012d7, + 0xaf02b8b925656fbd, 0x7a722a767179a630, 0xb5c8afe937a75ace, 0xfdb8e8d02d279372, + 0x887ef700cb25fae1, 0xcfe9bd912f72cabe, 0xb1d4dedc24f978de, 0x517522d38319cc2a, + 0x7dd87b2b36aab798, 0x579c4ff3046b5a04, 0xf5c5975c5028b7a7, 0x7094579d1000ec84, + 0xbc8d5b1ea70a5291, 0x161b2d783be8855c, 0xd26d0b0d6d18279f, 0x0be1945f02a78bd5, + 0xb822a5a9e045415b, 0x2fe9d68b1ccc3562, 0xb2e375960033d14f, 0x26aca04e49b4ff22, + 0x732a81c862112aea, 0x8bd901ed6e4260b8, 0xe839532c561ad5b0, 0x8fb6e4d517a79b12, + 0x0dd37f8c0be9b429, 0xc8ad87ad12f1b1b0, 0xc51f3aa62b90318b, 0x031a7e8b86c1cefc, + 0xa95547af2b70fc76, 0x9cb3615c5a98801e, 0xa387e3c3341d7032, 0xa087ea52a1debaef, + 0x16325ec9a2e6e835, 0x587944a484c585eb, 0xc8879033bde22ecc, 0xa39dbfce709c464a, + 0x7acc010f99208774, 0x98dd2973a096c5ad, 0x26458b51139f198c, 0x2f5d19575e8c4f02, + 0x726643f0d38af352, 0x44d879b6d73e6e94, 0xa68a03885c980abe, 0x06048acd161c40c0, + 0xa4dab8f89d405d28, 0x7120c880cb04be18, 0xa062ace22a1cf0cf, 0x3901a9daf29704f4, + 0xff08f3ed989db30a, 0x6d22b13e874c67e9, 0x80c6f35518d73f4d, 0xc23c2a521aac6f29, + 0x2e708fd83aaa42e0, 0x7fc3780f55f1b0fd, 0xabb3075c98cf87f2, 0xb4df3f40f7c61143, + 0x2a04418098a76d75, 0x0d9eeee9509b2d37, 0x6be8ae51f4b59cdc, 0xe746cc7c00e4a2ab, + 0x785bc6df9cac597c, 0x33cb6620ce8adc48, 0xc1ba30739bffcef7, 0x6d95771f18e503f7, + 0xf7be3ae2e62652ff, 0xc8d82ffd2a73c62b, 0x8725a3ba5b110973, 0x67ed6b9c724757ec, + ], + // seed = 7 + [ + 0xc0272d42c19ff3ae, 0x4694228b43ea043b, 0x5709a6ef8a462841, 0xc9210a1e538805c9, + 0x279b171196113ec2, 0x859b769fc2d9e815, 0x0d5d3125a2bf14d3, 0x22bca1cfefa878ba, + 0x481b6bf58037bd83, 0x4933ba8647728d22, 0xf08c7b6b56f6e1b6, 0x374e8af5a15407c7, + 0xa95c4dc3d2487a5c, 0x9b832808ff11e751, 0xf2048507e9da01d5, 0xa9c576189f544a4a, + 0xf6c2a45b2e9d2b41, 0x9b9874c9f10ecc2f, 0x37d9b5f51f8c149e, 0x93aead54c9de9467, + 0x59cf0b4af262da23, 0xe7e9929af18194b2, 0x9df2644e33eb0178, 0xde4122d6f0671938, + 0xf005786c07f4800b, 0xb1fc9d254b5d1039, 0x0bf1088631f6dd7b, 0x665623f0a4b8f0c7, + 0x60f0113a9187db7c, 0xfd7cceda4f0d23a6, 0x26c01e9d89955940, 0x33afa1dfc0f5a6a0, + 0xeb77daf215e9283c, 0xc7575214bf85edb4, 0xeb0d804bf297e616, 0x84bff4ffd564f747, + 0xc4ac33189246f620, 0x43ef61213ecc1005, 0xcbbb0dea6cd96acd, 0x8ed27abfa8cfcb05, + 0x543b61529cb996b6, 0xa5f987ca41ea5e59, 0x3c50e0ac5254cb7a, 0x4192b0446c06d1e6, + 0x3e86592e21b45388, 0xdb766f06fcc6e51e, 0x0448ee36efe632db, 0x663c9db689253e35, + 0x72e0bd4985331dd4, 0xff501b5bf7d94e74, 0xe911ce758e2113a8, 0xec3a8d03a75a6ba4, + 0xaf6b4b72f56edc83, 0xf284857936c0a391, 0x5ba6feff407d46f4, 0x9d689c26de9d6702, + 0x28c04a9083726b5d, 0x2ccf4a627a029730, 0x7b4719500d4f0c71, 0x76470a9a7da250a8, + 0xcc48409404a1c890, 0xccefbdc7ec9a8055, 0xe0db91bff3cc42d3, 0x0532436426141254, + 0xf2ee9325e6f0ff0b, 0x149c20a5fbb28d9d, 0xe71624cd8d2d14d4, 0x8f01d4dc8cc2dd77, + 0x29cf409b333015b7, 0xba8bebd211884dd1, 0xc3396635e8c8db1d, 0x8ed0f6208d0528b8, + 0x0d90b43fdd0ee334, 0xd73c9a3333a044c7, 0xa2595cd208dbdc38, 0xae93cb264f940c09, + 0x8e0538d8afb07a97, 0x19115ec881385ba2, 0xa886f9e6a8039c6a, 0xcd5d62147ce3ecac, + 0xaecdf9e0bb4969f7, 0x2ddd631c53dcad10, 0x73ad1c97b3412054, 0xb08915fa2722efc6, + 0x97966047e5067eb0, 0x337f1675ed91445c, 0xb3a833d150b96a0d, 0x5940a98fe35e5e2e, + 0xfd03cc354ed0d8ff, 0x4e65b98291a8644a, 0x14a259f2852a60b2, 0x7648e3478c1e8e5f, + 0xbc0fbef6d9a919b4, 0xbec4302081346cf1, 0x57d2ce7aa1c7c511, 0x234c209d8f4e1ac3, + 0x87cf80cc933ce443, 0x7c262c616931e94e, 0xc5e33b049cf9eddf, 0x1a80790ed03ae51b, + 0xf2e8b9494f7220cf, 0x124cb59c14fff3ff, 0xa8a06cbfdb86ce18, 0x9068ef1f80b37653, + 0x0c55417b8d90338f, 0xcd579a523f6bcd30, 0xa31bfe2476a8d2a9, 0x1f8d142208094223, + 0x332dc40a5203cfad, 0xf8792fe5b2d33b4c, 0x443bd9668bf9461e, 0xc9019db0ace1409e, + 0x781bea919a113e8b, 0xb0f11d866abfbeec, 0xcfe139a60db0c26a, 0x869ab8721e6aa39e, + 0xdb48a4977717837a, 0x588a5ff151065b18, 0xe4a251ea0028864d, 0x7f0e43ba408a77c3, + 0x65f66dd50a536135, 0x6f49e934d9331c3e, 0xb8d742e0f0fa6b09, 0xe4e9b272deca2348, + 0xaee132ff902f773c, 0x43f658f7c2a0c90a, 0x28cb4dbc76cc53ea, 0x7d92253aa99ac39b, + 0x4fea3d832370baab, 0xb29e36936e51d78e, 0xea10778712321064, 0xff4f21f8ef274be2, + 0x84eff18ddfa0933f, 0xd0ec6a9f86c758a0, 0xaf82e5973c431ae0, 0x352023c00c045425, + 0xad34d7bc4a2f8961, 0xbdb4a02a24d4dee0, 0x354a4846d97447cf, 0x331a8b944d5bc19f, + 0x5ce04f8e17909035, 0x6497581bad8f4aab, 0x07c503bba647111e, 0x85f412ba78e1f7ff, + 0x7f3b920fd20f4cff, 0x424e1a9a4ce34e2f, 0x3035e2d62e1b9f0a, 0xef63114bff7b729a, + 0xe86a05889ab6bb60, 0xee0830cf095585a1, 0x4a54f7fa47d9c94b, 0x17daeece9fcb556a, + 0xc506d3f391834c6f, 0xb3f24be362e1af64, 0xc435e4e23608efdd, 0xeeba9caaa4cc1768, + 0x5a71f306daddc22d, 0x18e5205f41eba1a0, 0x7b29b4d1f6610925, 0x065cb65a0258d9a9, + 0x3e5ac8faa9fd1f95, 0x3b362362c1ea0470, 0xce0e4f6434db7a2e, 0xf327341098de52f2, + 0xcfca3b9e2a1992c3, 0x7483bf9401233e41, 0xbafbac531c6f9281, 0x4b52dd71b2c106f8, + 0xdf73b66e50b5a1f7, 0x237aec0202a20283, 0x23dd5be23dffdf2b, 0xea9730731ee122ef, + 0x5cb3f846014fbcd3, 0xc3b21c8ffdce9201, 0x06a99a02f91a8760, 0x721a81fa8fd7b7a3, + 0x6aafcdddc53cbcd8, 0xd03b464005a93bcc, 0x8212edc1b1669dcb, 0x71f4c31364c31bc7, + 0xfeeec0eba8772307, 0x1948d00a13d88cf1, 0x19064fd6d943ada8, 0x4ec8d31722697bfd, + 0x596d9a953a516609, 0xc4cb4bff53507da2, 0x1d59f3c5be36e4ca, 0xe5b4fc5bf6044c9b, + 0x1bb74e052232f735, 0x04e8a0db611ddd5d, 0x8d04eaa009b421bf, 0xa7878ae0ac0e6d58, + 0x28c1030217cab2b3, 0x827943767e56a883, 0x28fce5fa02d22809, 0xb30c322fffc8c58e, + 0x1ca5a6a9f8066c5b, 0xb24db5f1462b2513, 0x02f653b89b7e5f6c, 0xe31f8fb5d5f78eee, + 0x266acc514ed93501, 0x936879d1c6fddcc4, 0xcd51be3636af1952, 0x3fdbb6fc332c78c8, + 0x9eb656379fa73094, 0x056146cc92fa0f96, 0xed6c4f1836c027c3, 0x021e0bb5d2113f2a, + 0x8983e42ec1c626b3, 0x73ea9bc6513ad9c9, 0x0c904903b24f4247, 0xacbac1e6243e2525, + 0x0b1069a0c230fb06, 0x77d709fca3fc1ce5, 0x87ad0f65020947e6, 0x555302641c53f4e6, + 0x65ea87871fa9aaee, 0x58aaf4ecc1067bb4, 0x1a66c48cc4c65b3f, 0xca96aca48b2ea969, + 0xa68eb70bad14de2b, 0x5ccdb3d7e00a6f6e, 0xe178fbfec73fe72f, 0x2b63d6a16b83e890, + 0x32fdb7a5330fbae0, 0x2ab5803c8d1bf32c, 0xda838388c1527c94, 0x16a50bdc4de24acb, + 0xe561301f134c074a, 0xd7ae63d2816b4db1, 0x036aabd4df0dd741, 0xc5e0db8783435b9d, + 0x9c4386cf0a07f3b2, 0x6a72ac1aa56a13a1, 0x299bbdb04bb20a23, 0x138c1018fda16b81, + 0x0e354f0b3bda49df, 0x9f4c295b23127437, 0xd133ceb2bd561341, 0xd8b4bfd5a526ac29, + 0xcdd0a70ddc1c7bbd, 0x81dce595bf572225, 0x1c6f925c05f6efd7, 0x8ae5097553856ea0, + 0x3aabeaeef248f60d, 0xd9005809d19a69e2, 0x2a3a1a314311cc27, 0x89bb2dc76b2b624a, + 0x50a2a95d0412e289, 0x9def8df564e68581, 0xf49010a9b2e2ea5c, 0x8602ae175d9ff3f0, + 0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e, + ]]; diff --git a/parquet/src/column/chunker/mod.rs b/parquet/src/column/chunker/mod.rs new file mode 100644 index 000000000000..c4caf18af66b --- /dev/null +++ b/parquet/src/column/chunker/mod.rs @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Content-defined chunking (CDC) for Parquet data pages. +//! +//! CDC creates data page boundaries based on content rather than fixed sizes, +//! enabling efficient deduplication in content-addressable storage (CAS) systems. +//! See [`CdcOptions`](crate::file::properties::CdcOptions) for configuration. + +mod cdc; +mod cdc_generated; + +pub(crate) use cdc::ContentDefinedChunker; + +/// A chunk of data with level and value offsets for record-shredded nested data. +#[derive(Debug, Clone, Copy)] +pub(crate) struct CdcChunk { + /// The start offset of this chunk inside the given levels. + pub level_offset: usize, + /// The start offset of this chunk inside the given values array. + pub value_offset: usize, + /// The number of levels in this chunk. + pub num_levels: usize, + /// The number of values (Arrow array elements) in this chunk. + pub num_values: usize, +} diff --git a/parquet/src/column/mod.rs b/parquet/src/column/mod.rs index 1e534bdd6b77..115c8dd01b80 100644 --- a/parquet/src/column/mod.rs +++ b/parquet/src/column/mod.rs @@ -117,6 +117,8 @@ //! assert_eq!(rep_levels, vec![0, 1, 0, 1, 1]); //! ``` +#[cfg(feature = "arrow")] +pub(crate) mod chunker; pub mod page; #[cfg(feature = "encryption")] pub(crate) mod page_encryption; diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index c014397f132e..4c3dbabc2132 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -100,6 +100,15 @@ impl ColumnWriter<'_> { downcast_writer!(self, typed, typed.get_estimated_total_bytes()) } + /// Finalize the currently buffered values as a data page. + /// + /// This is used by content-defined chunking to force a page boundary at + /// content-determined positions. + #[cfg(feature = "arrow")] + pub(crate) fn add_data_page(&mut self) -> Result<()> { + downcast_writer!(self, typed, typed.add_data_page()) + } + /// Close this [`ColumnWriter`], returning the metadata for the column chunk. pub fn close(self) -> Result { downcast_writer!(self, typed, typed.close()) @@ -1001,7 +1010,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> { /// Adds data page. /// Data page is either buffered in case of dictionary encoding or written directly. - fn add_data_page(&mut self) -> Result<()> { + pub(crate) fn add_data_page(&mut self) -> Result<()> { // Extract encoded values let values_data = self.encoder.flush_data_page()?; diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs index ae21de304404..ae15cc6b8263 100644 --- a/parquet/src/file/properties.rs +++ b/parquet/src/file/properties.rs @@ -61,6 +61,64 @@ pub const DEFAULT_STATISTICS_TRUNCATE_LENGTH: Option = Some(64); pub const DEFAULT_OFFSET_INDEX_DISABLED: bool = false; /// Default values for [`WriterProperties::coerce_types`] pub const DEFAULT_COERCE_TYPES: bool = false; +/// Default minimum chunk size for content-defined chunking: 256 KiB. +pub const DEFAULT_CDC_MIN_CHUNK_SIZE: usize = 256 * 1024; +/// Default maximum chunk size for content-defined chunking: 1024 KiB. +pub const DEFAULT_CDC_MAX_CHUNK_SIZE: usize = 1024 * 1024; +/// Default normalization level for content-defined chunking. +pub const DEFAULT_CDC_NORM_LEVEL: i32 = 0; + +/// EXPERIMENTAL: Options for content-defined chunking (CDC). +/// +/// Content-defined chunking is an experimental feature that optimizes parquet +/// files for content addressable storage (CAS) systems by writing data pages +/// according to content-defined chunk boundaries. This allows for more +/// efficient deduplication of data across files, hence more efficient network +/// transfers and storage. +/// +/// Each content-defined chunk is written as a separate parquet data page. The +/// following options control the chunks' size and the chunking process. Note +/// that the chunk size is calculated based on the logical value of the data, +/// before any encoding or compression is applied. +#[derive(Debug, Clone, Copy)] +pub struct CdcOptions { + /// Minimum chunk size in bytes, default is 256 KiB. + /// The rolling hash will not be updated until this size is reached for each chunk. + /// Note that all data sent through the hash function is counted towards the chunk + /// size, including definition and repetition levels if present. + pub min_chunk_size: usize, + /// Maximum chunk size in bytes, default is 1024 KiB. + /// The chunker will create a new chunk whenever the chunk size exceeds this value. + /// Note that the parquet writer has a related [`data_page_size_limit`] property that + /// controls the maximum size of a parquet data page after encoding. While setting + /// `data_page_size_limit` to a smaller value than `max_chunk_size` doesn't affect + /// the chunking effectiveness, it results in more small parquet data pages. + /// + /// [`data_page_size_limit`]: WriterPropertiesBuilder::set_data_page_size_limit + pub max_chunk_size: usize, + /// Number of bit adjustment to the gearhash mask in order to center the chunk size + /// around the average size more aggressively, default is 0. + /// Increasing the normalization level increases the probability of finding a chunk, + /// improving the deduplication ratio, but also increasing the number of small chunks + /// resulting in many small parquet data pages. The default value provides a good + /// balance between deduplication ratio and fragmentation. + /// Use norm_level=1 or norm_level=2 to reach a higher deduplication ratio at the + /// expense of fragmentation. Negative values can also be used to reduce the + /// probability of finding a chunk, resulting in larger chunks and fewer data pages. + /// Note that values outside [-3, 3] are not recommended, prefer using the default + /// value of 0 for most use cases. + pub norm_level: i32, +} + +impl Default for CdcOptions { + fn default() -> Self { + Self { + min_chunk_size: DEFAULT_CDC_MIN_CHUNK_SIZE, + max_chunk_size: DEFAULT_CDC_MAX_CHUNK_SIZE, + norm_level: DEFAULT_CDC_NORM_LEVEL, + } + } +} /// Parquet writer version. /// @@ -168,6 +226,7 @@ pub struct WriterProperties { column_index_truncate_length: Option, statistics_truncate_length: Option, coerce_types: bool, + content_defined_chunking: Option, #[cfg(feature = "encryption")] pub(crate) file_encryption_properties: Option>, } @@ -364,6 +423,13 @@ impl WriterProperties { self.coerce_types } + /// EXPERIMENTAL: Returns content-defined chunking options, or `None` if CDC is disabled. + /// + /// For more details see [`WriterPropertiesBuilder::set_content_defined_chunking`] + pub fn content_defined_chunking(&self) -> Option<&CdcOptions> { + self.content_defined_chunking.as_ref() + } + /// Returns encoding for a data page, when dictionary encoding is enabled. /// /// This is not configurable. @@ -487,6 +553,7 @@ pub struct WriterPropertiesBuilder { column_index_truncate_length: Option, statistics_truncate_length: Option, coerce_types: bool, + content_defined_chunking: Option, #[cfg(feature = "encryption")] file_encryption_properties: Option>, } @@ -510,6 +577,7 @@ impl Default for WriterPropertiesBuilder { column_index_truncate_length: DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH, statistics_truncate_length: DEFAULT_STATISTICS_TRUNCATE_LENGTH, coerce_types: DEFAULT_COERCE_TYPES, + content_defined_chunking: None, #[cfg(feature = "encryption")] file_encryption_properties: None, } @@ -535,6 +603,7 @@ impl WriterPropertiesBuilder { column_index_truncate_length: self.column_index_truncate_length, statistics_truncate_length: self.statistics_truncate_length, coerce_types: self.coerce_types, + content_defined_chunking: self.content_defined_chunking, #[cfg(feature = "encryption")] file_encryption_properties: self.file_encryption_properties, } @@ -750,6 +819,37 @@ impl WriterPropertiesBuilder { self } + /// EXPERIMENTAL: Sets content-defined chunking options, or disables CDC with `None`. + /// + /// When enabled, data page boundaries are determined by a rolling hash of the + /// column values, so unchanged data produces identical byte sequences across + /// file versions. This enables efficient deduplication on content-addressable + /// storage systems. + /// + /// Only supported through the Arrow writer interface ([`ArrowWriter`]). + /// + /// # Panics + /// + /// Panics if `min_chunk_size == 0` or `max_chunk_size <= min_chunk_size`. + /// + /// [`ArrowWriter`]: crate::arrow::arrow_writer::ArrowWriter + pub fn set_content_defined_chunking(mut self, options: Option) -> Self { + if let Some(ref options) = options { + assert!( + options.min_chunk_size > 0, + "min_chunk_size must be positive" + ); + assert!( + options.max_chunk_size > options.min_chunk_size, + "max_chunk_size ({}) must be greater than min_chunk_size ({})", + options.max_chunk_size, + options.min_chunk_size + ); + } + self.content_defined_chunking = options; + self + } + /// Sets FileEncryptionProperties (defaults to `None`) #[cfg(feature = "encryption")] pub fn with_file_encryption_properties( @@ -1033,6 +1133,7 @@ impl From for WriterPropertiesBuilder { column_index_truncate_length: props.column_index_truncate_length, statistics_truncate_length: props.statistics_truncate_length, coerce_types: props.coerce_types, + content_defined_chunking: props.content_defined_chunking, #[cfg(feature = "encryption")] file_encryption_properties: props.file_encryption_properties, } diff --git a/parquet/src/lib.rs b/parquet/src/lib.rs index 98106a2c1059..916892fafeae 100644 --- a/parquet/src/lib.rs +++ b/parquet/src/lib.rs @@ -67,6 +67,28 @@ //! * [`ArrowColumnWriter`] for writing using multiple threads, //! * [`RowFilter`] to apply filters during decode //! +//! ### EXPERIMENTAL: Content-Defined Chunking +//! +//! [`ArrowWriter`] supports content-defined chunking (CDC), which creates data page +//! boundaries based on content rather than fixed sizes. CDC enables efficient +//! deduplication in content-addressable storage (CAS) systems: when the same data +//! appears in successive file versions, it will produce identical byte sequences that +//! CAS backends can deduplicate. +//! +//! Enable CDC via [`WriterProperties`]: +//! +//! ```rust +//! # use parquet::file::properties::{WriterProperties, CdcOptions}; +//! let props = WriterProperties::builder() +//! .set_content_defined_chunking(Some(CdcOptions::default())) +//! .build(); +//! ``` +//! +//! See [`CdcOptions`] for chunk size and normalization parameters. +//! +//! [`WriterProperties`]: file::properties::WriterProperties +//! [`CdcOptions`]: file::properties::CdcOptions +//! //! [`ArrowWriter`]: arrow::arrow_writer::ArrowWriter //! [`ParquetRecordBatchReaderBuilder`]: arrow::arrow_reader::ParquetRecordBatchReaderBuilder //! [`ParquetPushDecoder`]: arrow::push_decoder::ParquetPushDecoder diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs index 85f3ed48972c..2925557e7b86 100644 --- a/parquet/src/schema/types.rs +++ b/parquet/src/schema/types.rs @@ -853,6 +853,9 @@ pub struct ColumnDescriptor { /// The maximum repetition level for this column max_rep_level: i16, + /// The definition level at the nearest REPEATED ancestor, or 0 if none. + repeated_ancestor_def_level: i16, + /// The path of this column. For instance, "a.b.c.d". path: ColumnPath, } @@ -872,11 +875,22 @@ impl ColumnDescriptor { max_def_level: i16, max_rep_level: i16, path: ColumnPath, + ) -> Self { + Self::new_with_repeated_ancestor(primitive_type, max_def_level, max_rep_level, path, 0) + } + + pub(crate) fn new_with_repeated_ancestor( + primitive_type: TypePtr, + max_def_level: i16, + max_rep_level: i16, + path: ColumnPath, + repeated_ancestor_def_level: i16, ) -> Self { Self { primitive_type, max_def_level, max_rep_level, + repeated_ancestor_def_level, path, } } @@ -893,6 +907,12 @@ impl ColumnDescriptor { self.max_rep_level } + /// Returns the definition level at the nearest REPEATED ancestor, or 0 if none. + #[inline] + pub fn repeated_ancestor_def_level(&self) -> i16 { + self.repeated_ancestor_def_level + } + /// Returns [`ColumnPath`] for this column. pub fn path(&self) -> &ColumnPath { &self.path @@ -1069,7 +1089,16 @@ impl SchemaDescriptor { let mut path = Vec::with_capacity(INIT_SCHEMA_DEPTH); for (root_idx, f) in tp.get_fields().iter().enumerate() { path.clear(); - build_tree(f, root_idx, 0, 0, &mut leaves, &mut leaf_to_base, &mut path); + build_tree( + f, + root_idx, + 0, + 0, + 0, + &mut leaves, + &mut leaf_to_base, + &mut path, + ); } Self { @@ -1191,11 +1220,13 @@ fn count_leaves(tp: &TypePtr, n_leaves: &mut usize) { } } +#[allow(clippy::too_many_arguments)] fn build_tree<'a>( tp: &'a TypePtr, root_idx: usize, mut max_rep_level: i16, mut max_def_level: i16, + mut repeated_ancestor_def_level: i16, leaves: &mut Vec, leaf_to_base: &mut Vec, path_so_far: &mut Vec<&'a str>, @@ -1210,6 +1241,7 @@ fn build_tree<'a>( Repetition::REPEATED => { max_def_level += 1; max_rep_level += 1; + repeated_ancestor_def_level = max_def_level; } _ => {} } @@ -1218,12 +1250,14 @@ fn build_tree<'a>( Type::PrimitiveType { .. } => { let mut path: Vec = vec![]; path.extend(path_so_far.iter().copied().map(String::from)); - leaves.push(Arc::new(ColumnDescriptor::new( + let desc = ColumnDescriptor::new_with_repeated_ancestor( tp.clone(), max_def_level, max_rep_level, ColumnPath::new(path), - ))); + repeated_ancestor_def_level, + ); + leaves.push(Arc::new(desc)); leaf_to_base.push(root_idx); } Type::GroupType { fields, .. } => { @@ -1233,6 +1267,7 @@ fn build_tree<'a>( root_idx, max_rep_level, max_def_level, + repeated_ancestor_def_level, leaves, leaf_to_base, path_so_far, @@ -1941,6 +1976,122 @@ mod tests { assert_eq!(descr.column(3).max_rep_level(), 1); } + #[test] + fn test_schema_build_tree_repeated_ancestor_def_level() { + // Flat columns: no REPEATED ancestor → repeated_ancestor_def_level = 0 + let message_type = " + message m { + REQUIRED INT32 a; + OPTIONAL INT32 b; + OPTIONAL group s { + OPTIONAL INT32 x; + } + } + "; + let schema = parse_message_type(message_type).expect("should parse schema"); + let descr = SchemaDescriptor::new(Arc::new(schema)); + assert_eq!(descr.column(0).repeated_ancestor_def_level(), 0); // a + assert_eq!(descr.column(1).repeated_ancestor_def_level(), 0); // b + assert_eq!(descr.column(2).repeated_ancestor_def_level(), 0); // s.x + + // Standard list: OPTIONAL outer, REPEATED group, OPTIONAL element + // repeated_ancestor_def_level is the def_level at the REPEATED group (= 2) + let message_type = " + message m { + OPTIONAL group c (LIST) { + REPEATED group list { + OPTIONAL INT32 element; + } + } + } + "; + let schema = parse_message_type(message_type).expect("should parse schema"); + let descr = SchemaDescriptor::new(Arc::new(schema)); + // c(optional)=1, list(repeated)=2, element(optional)=3 + assert_eq!(descr.column(0).max_def_level(), 3); + assert_eq!(descr.column(0).max_rep_level(), 1); + assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2); + + // Required list: REQUIRED outer, REPEATED group, REQUIRED element + // No OPTIONAL nodes between REPEATED and leaf, so repeated_ancestor_def_level == max_def_level + let message_type = " + message m { + REQUIRED group c (LIST) { + REPEATED group list { + REQUIRED INT32 element; + } + } + } + "; + let schema = parse_message_type(message_type).expect("should parse schema"); + let descr = SchemaDescriptor::new(Arc::new(schema)); + // list(repeated)=1, element(required)=1 + assert_eq!(descr.column(0).max_def_level(), 1); + assert_eq!(descr.column(0).max_rep_level(), 1); + assert_eq!(descr.column(0).repeated_ancestor_def_level(), 1); + + // Nested lists: innermost REPEATED wins + let message_type = " + message m { + OPTIONAL group outer (LIST) { + REPEATED group list { + OPTIONAL group inner (LIST) { + REPEATED group list2 { + OPTIONAL INT32 element; + } + } + } + } + } + "; + let schema = parse_message_type(message_type).expect("should parse schema"); + let descr = SchemaDescriptor::new(Arc::new(schema)); + // outer(opt)=1, list(rep)=2, inner(opt)=3, list2(rep)=4, element(opt)=5 + assert_eq!(descr.column(0).max_def_level(), 5); + assert_eq!(descr.column(0).max_rep_level(), 2); + assert_eq!(descr.column(0).repeated_ancestor_def_level(), 4); + + // Struct inside list: all sibling leaves share the same repeated_ancestor_def_level + let message_type = " + message m { + OPTIONAL group bag (LIST) { + REPEATED group list { + REQUIRED group item { + OPTIONAL INT32 x; + REQUIRED INT32 y; + } + } + } + } + "; + let schema = parse_message_type(message_type).expect("should parse schema"); + let descr = SchemaDescriptor::new(Arc::new(schema)); + // bag(opt)=1, list(rep)=2, item(req)=2, x(opt)=3 + assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2); // bag.list.item.x + // bag(opt)=1, list(rep)=2, item(req)=2, y(req)=2 + assert_eq!(descr.column(1).repeated_ancestor_def_level(), 2); // bag.list.item.y + + // Map type: key (required) and value (optional) under the same REPEATED group + let message_type = " + message m { + OPTIONAL group my_map (MAP) { + REPEATED group key_value { + REQUIRED BYTE_ARRAY key (UTF8); + OPTIONAL INT32 value; + } + } + } + "; + let schema = parse_message_type(message_type).expect("should parse schema"); + let descr = SchemaDescriptor::new(Arc::new(schema)); + // my_map(opt)=1, key_value(rep)=2, key(req)=2 + assert_eq!(descr.column(0).max_def_level(), 2); + assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2); // key: max_def == repeated_ancestor + // my_map(opt)=1, key_value(rep)=2, value(opt)=3 + assert_eq!(descr.column(1).max_def_level(), 3); + assert_eq!(descr.column(1).repeated_ancestor_def_level(), 2); // value: max_def > repeated_ancestor + } + #[test] #[should_panic(expected = "Cannot call get_physical_type() on a non-primitive type")] fn test_get_physical_type_panic() { From 322f9ce681ed51aa0c99b6517d5f43b7279ecc52 Mon Sep 17 00:00:00 2001 From: Kunal <155142500+kunalsinghdadhwal@users.noreply.github.com> Date: Fri, 20 Mar 2026 19:26:43 +0530 Subject: [PATCH 64/80] [Variant] Add unshred_variant support for Binary and LargeBinary types (#9576) # Which issue does this PR close? - Closes #9526 # Rationale for this change `shred_variant` already supports Binary and LargeBinary types (#9525, #9554), but unshred_variant does not handle these types. This means shredded Binary/LargeBinary columns cannot be converted back to unshredded VariantArrays. # What changes are included in this PR? Adds unshred_variant support for DataType::Binary and DataType::LargeBinary in parquet-variant-compute/src/unshred_variant.rs: - New enum variants PrimitiveBinary and PrimitiveLargeBinary - Match arms in append_row and try_new_opt - AppendToVariantBuilder impls for BinaryArray and LargeBinaryArray # Are these changes tested? Yes # Are there any user-facing changes? No breaking changes --------- Signed-off-by: Kunal Singh Dadhwal --- .../src/unshred_variant.rs | 62 +++++++++++++++++-- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/parquet-variant-compute/src/unshred_variant.rs b/parquet-variant-compute/src/unshred_variant.rs index cfe413460086..2df36fa63f02 100644 --- a/parquet-variant-compute/src/unshred_variant.rs +++ b/parquet-variant-compute/src/unshred_variant.rs @@ -19,9 +19,9 @@ use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder}; use arrow::array::{ - Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, FixedSizeListArray, - GenericListArray, GenericListViewArray, LargeStringArray, ListLikeArray, PrimitiveArray, - StringArray, StringViewArray, StructArray, + Array, AsArray as _, BinaryArray, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, + FixedSizeListArray, GenericListArray, GenericListViewArray, LargeBinaryArray, LargeStringArray, + ListLikeArray, PrimitiveArray, StringArray, StringViewArray, StructArray, }; use arrow::buffer::NullBuffer; use arrow::datatypes::{ @@ -107,7 +107,9 @@ enum UnshredVariantRowBuilder<'a> { PrimitiveString(UnshredPrimitiveRowBuilder<'a, StringArray>), PrimitiveStringView(UnshredPrimitiveRowBuilder<'a, StringViewArray>), PrimitiveLargeString(UnshredPrimitiveRowBuilder<'a, LargeStringArray>), + PrimitiveBinary(UnshredPrimitiveRowBuilder<'a, BinaryArray>), PrimitiveBinaryView(UnshredPrimitiveRowBuilder<'a, BinaryViewArray>), + PrimitiveLargeBinary(UnshredPrimitiveRowBuilder<'a, LargeBinaryArray>), PrimitiveUuid(UnshredPrimitiveRowBuilder<'a, FixedSizeBinaryArray>), List(ListUnshredVariantBuilder<'a, GenericListArray>), LargeList(ListUnshredVariantBuilder<'a, GenericListArray>), @@ -150,7 +152,9 @@ impl<'a> UnshredVariantRowBuilder<'a> { Self::PrimitiveString(b) => b.append_row(builder, metadata, index), Self::PrimitiveStringView(b) => b.append_row(builder, metadata, index), Self::PrimitiveLargeString(b) => b.append_row(builder, metadata, index), + Self::PrimitiveBinary(b) => b.append_row(builder, metadata, index), Self::PrimitiveBinaryView(b) => b.append_row(builder, metadata, index), + Self::PrimitiveLargeBinary(b) => b.append_row(builder, metadata, index), Self::PrimitiveUuid(b) => b.append_row(builder, metadata, index), Self::List(b) => b.append_row(builder, metadata, index), Self::LargeList(b) => b.append_row(builder, metadata, index), @@ -232,7 +236,9 @@ impl<'a> UnshredVariantRowBuilder<'a> { DataType::Utf8 => primitive_builder!(PrimitiveString, as_string), DataType::Utf8View => primitive_builder!(PrimitiveStringView, as_string_view), DataType::LargeUtf8 => primitive_builder!(PrimitiveLargeString, as_string), + DataType::Binary => primitive_builder!(PrimitiveBinary, as_binary), DataType::BinaryView => primitive_builder!(PrimitiveBinaryView, as_binary_view), + DataType::LargeBinary => primitive_builder!(PrimitiveLargeBinary, as_binary), DataType::FixedSizeBinary(16) => { primitive_builder!(PrimitiveUuid, as_fixed_size_binary) } @@ -413,7 +419,9 @@ impl_append_to_variant_builder!(BooleanArray); impl_append_to_variant_builder!(StringArray); impl_append_to_variant_builder!(StringViewArray); impl_append_to_variant_builder!(LargeStringArray); +impl_append_to_variant_builder!(BinaryArray); impl_append_to_variant_builder!(BinaryViewArray); +impl_append_to_variant_builder!(LargeBinaryArray); impl_append_to_variant_builder!(PrimitiveArray); impl_append_to_variant_builder!(PrimitiveArray); impl_append_to_variant_builder!(PrimitiveArray); @@ -675,7 +683,9 @@ impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> { #[cfg(test)] mod tests { use crate::VariantArray; - use arrow::array::{BinaryViewArray, LargeStringArray, StringViewArray}; + use arrow::array::{ + BinaryArray, BinaryViewArray, LargeBinaryArray, LargeStringArray, StringViewArray, + }; use parquet_variant::Variant; #[test] @@ -720,4 +730,48 @@ mod tests { assert_eq!(result.value(1), Variant::from("middle")); assert_eq!(result.value(2), Variant::from("world")); } + + #[test] + fn test_unshred_binary_typed_value() { + let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00]; + let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]); + + let typed_value: arrow::array::ArrayRef = + std::sync::Arc::new(BinaryArray::from_iter_values(vec![ + &b"\x00\x01\x02"[..], + &b"\xff\xaa"[..], + &b"\xde\xad\xbe\xef"[..], + ])); + + let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None); + + let result = crate::unshred_variant(&variant_array).unwrap(); + + assert_eq!(result.len(), 3); + assert_eq!(result.value(0), Variant::from(&b"\x00\x01\x02"[..])); + assert_eq!(result.value(1), Variant::from(&b"\xff\xaa"[..])); + assert_eq!(result.value(2), Variant::from(&b"\xde\xad\xbe\xef"[..])); + } + + #[test] + fn test_unshred_largebinary_typed_value() { + let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00]; + let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]); + + let typed_value: arrow::array::ArrayRef = + std::sync::Arc::new(LargeBinaryArray::from_iter_values(vec![ + &b"\x00\x01\x02"[..], + &b"\xff\xaa"[..], + &b"\xde\xad\xbe\xef"[..], + ])); + + let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None); + + let result = crate::unshred_variant(&variant_array).unwrap(); + + assert_eq!(result.len(), 3); + assert_eq!(result.value(0), Variant::from(&b"\x00\x01\x02"[..])); + assert_eq!(result.value(1), Variant::from(&b"\xff\xaa"[..])); + assert_eq!(result.value(2), Variant::from(&b"\xde\xad\xbe\xef"[..])); + } } From 6cadf3b4de916c707e2103b123a168154e668a33 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 20 Mar 2026 15:00:39 -0400 Subject: [PATCH 65/80] Prepare for 58.1.0 Release (#9573) # Which issue does this PR close? - part of https://github.com/apache/arrow-rs/issues/9108 # Rationale for this change Prepare for next release # What changes are included in this PR? 1. Update version to `58.1.0` 2. Add changelog. See rendered preview here: https://github.com/alamb/arrow-rs/blob/alamb/prepare_58.1.0/CHANGELOG.md # Are these changes tested? By CI # Are there any user-facing changes? Yes --- CHANGELOG-old.md | 196 ++++++++++++++++++++++ CHANGELOG.md | 273 +++++++++++-------------------- Cargo.toml | 42 ++--- dev/release/update_change_log.sh | 4 +- 4 files changed, 317 insertions(+), 198 deletions(-) diff --git a/CHANGELOG-old.md b/CHANGELOG-old.md index 300c1f4b2e40..273884ea1fa6 100644 --- a/CHANGELOG-old.md +++ b/CHANGELOG-old.md @@ -19,6 +19,202 @@ # Historical Changelog + +## [58.0.0](https://github.com/apache/arrow-rs/tree/58.0.0) (2026-02-19) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/57.3.0...58.0.0) + +**Breaking changes:** + +- Remove support for List types in bit\_length kernel [\#9350](https://github.com/apache/arrow-rs/pull/9350) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020)) +- Optimize `from_bitwise_unary_op` [\#9297](https://github.com/apache/arrow-rs/pull/9297) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) +- Mark `BufferBuilder::new_from_buffer` as unsafe [\#9292](https://github.com/apache/arrow-rs/pull/9292) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) +- \[Variant\] Support `['fieldName']` in VariantPath parser [\#9276](https://github.com/apache/arrow-rs/pull/9276) ([klion26](https://github.com/klion26)) +- Remove parquet arrow\_cast dependency [\#9077](https://github.com/apache/arrow-rs/pull/9077) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold)) +- feat: change default behavior for Parquet `PageEncodingStats` to bitmask [\#9051](https://github.com/apache/arrow-rs/pull/9051) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([WaterWhisperer](https://github.com/WaterWhisperer)) +- \[arrow\] Minimize allocation in GenericViewArray::slice\(\) [\#9016](https://github.com/apache/arrow-rs/pull/9016) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([maxburke](https://github.com/maxburke)) + +**Implemented enhancements:** + +- Avoid allocating a `Vec` in `StructBuilder` [\#9427](https://github.com/apache/arrow-rs/issues/9427) +- Zstd context reuse [\#9401](https://github.com/apache/arrow-rs/issues/9401) +- Optimize `from_bitwise_unary_op` [\#9364](https://github.com/apache/arrow-rs/issues/9364) +- Support `RunEndEncoded` in ord comparator [\#9360](https://github.com/apache/arrow-rs/issues/9360) +- Support `RunEndEncoded` arrays in `arrow-json` [\#9359](https://github.com/apache/arrow-rs/issues/9359) +- Support `BinaryView` in `bit_length` kernel [\#9351](https://github.com/apache/arrow-rs/issues/9351) +- Remove support for `List` types in `bit_length` kernel [\#9349](https://github.com/apache/arrow-rs/issues/9349) +- Support roundtrip `ListView` in parquet arrow writer [\#9344](https://github.com/apache/arrow-rs/issues/9344) +- Support `ListView` in `length` kernel [\#9343](https://github.com/apache/arrow-rs/issues/9343) +- Support `ListView` in sort kernel [\#9341](https://github.com/apache/arrow-rs/issues/9341) +- Add some way to create a Timestamp from a `DateTime` [\#9337](https://github.com/apache/arrow-rs/issues/9337) +- Introduce `DataType::is_list` and `DataType::IsBinary` [\#9326](https://github.com/apache/arrow-rs/issues/9326) +- Performance of creating all null dictionary array can be improved [\#9321](https://github.com/apache/arrow-rs/issues/9321) +- \[arrow-avro\] Add missing Arrow DataType support with `avro_custom_types` round-trip + non-custom fallbacks [\#9290](https://github.com/apache/arrow-rs/issues/9290) + +**Fixed bugs:** + +- ArrowArrayStreamReader errors on zero-column record batches [\#9394](https://github.com/apache/arrow-rs/issues/9394) +- Regression on main \(58\): Parquet argument error: Parquet error: Required field type\_ is missing [\#9315](https://github.com/apache/arrow-rs/issues/9315) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] + +**Documentation updates:** + +- Improve safety documentation of the `Array` trait [\#9314](https://github.com/apache/arrow-rs/pull/9314) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Improve docs and add build\(\) method to `{Null,Boolean,}BufferBuilder` [\#9155](https://github.com/apache/arrow-rs/pull/9155) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Improve `ArrowReaderBuilder::with_row_filter` documentation [\#9153](https://github.com/apache/arrow-rs/pull/9153) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb)) +- docs: Improve main README.md and highlight community [\#9119](https://github.com/apache/arrow-rs/pull/9119) ([alamb](https://github.com/alamb)) +- Docs: Add additional documentation and example for `make_array` [\#9112](https://github.com/apache/arrow-rs/pull/9112) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- doc: fix link on FixedSizeListArray doc [\#9033](https://github.com/apache/arrow-rs/pull/9033) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) + +**Performance improvements:** + +- Replace `ArrayData` with direct Array construction [\#9338](https://github.com/apache/arrow-rs/pull/9338) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao)) +- Remove some `unsafe` and allocations when creating PrimitiveArrays from Vec and `from_trusted_len_iter` [\#9299](https://github.com/apache/arrow-rs/pull/9299) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- parquet: rle skip decode loop when batch contains all max levels \(aka no nulls\) [\#9258](https://github.com/apache/arrow-rs/pull/9258) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24)) +- Improve parquet BinaryView / StringView decoder performance \(up to -35%\) [\#9236](https://github.com/apache/arrow-rs/pull/9236) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan)) +- Avoid a clone when creating `BooleanArray` from ArrayData [\#9159](https://github.com/apache/arrow-rs/pull/9159) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Avoid overallocating arrays in coalesce primitives / views [\#9132](https://github.com/apache/arrow-rs/pull/9132) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) +- perf: Avoid ArrayData allocation in PrimitiveArray::reinterpret\_cast [\#9129](https://github.com/apache/arrow-rs/pull/9129) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- \[Parquet\] perf: Create StructArrays directly rather than via `ArrayData` \(1% improvement\) [\#9120](https://github.com/apache/arrow-rs/pull/9120) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Avoid clones in `make_array` for `StructArray` and `GenericByteViewArray` [\#9114](https://github.com/apache/arrow-rs/pull/9114) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- perf: optimize hex decoding in json \(1.8x faster in binary-heavy\) [\#9091](https://github.com/apache/arrow-rs/pull/9091) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H)) +- Speed up binary kernels \(30% faster `and` and `or`\), add `BooleanBuffer::from_bitwise_binary_op` [\#9090](https://github.com/apache/arrow-rs/pull/9090) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- perf: improve field indexing in JSON StructArrayDecoder \(1.7x speed up\) [\#9086](https://github.com/apache/arrow-rs/pull/9086) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H)) +- bench: added to row\_format benchmark conversion of 53 non-nested columns [\#9081](https://github.com/apache/arrow-rs/pull/9081) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) +- perf: improve calculating length performance for view byte array in row conversion [\#9080](https://github.com/apache/arrow-rs/pull/9080) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) +- perf: improve calculating length performance for nested arrays in row conversion [\#9079](https://github.com/apache/arrow-rs/pull/9079) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) +- perf: improve calculating length performance for `GenericByteArray` in row conversion [\#9078](https://github.com/apache/arrow-rs/pull/9078) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) + +**Closed issues:** + +- BatchCoalescer::push\_batch panics on schema mismatch instead of returning error [\#9389](https://github.com/apache/arrow-rs/issues/9389) +- Release arrow-rs / parquet Minor version `57.3.0` \(January 2026\) [\#9240](https://github.com/apache/arrow-rs/issues/9240) +- \[Variant\] support `..` and `['fieldName']` syntax in the VariantPath parser [\#9050](https://github.com/apache/arrow-rs/issues/9050) +- Support Float16 for create\_random\_array [\#9028](https://github.com/apache/arrow-rs/issues/9028) + +**Merged pull requests:** + +- Avoid allocating a `Vec` in `StructBuilder` [\#9428](https://github.com/apache/arrow-rs/pull/9428) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Fokko](https://github.com/Fokko)) +- fix: fixed trait functions clash get\_date\_time\_part\_extract\_fn \(\#8221\) [\#9424](https://github.com/apache/arrow-rs/pull/9424) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([esavier](https://github.com/esavier)) +- \[Minor\] Use per-predicate projection masks in arrow\_reader\_clickbench benchmark [\#9413](https://github.com/apache/arrow-rs/pull/9413) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan)) +- Fix `ArrowArrayStreamReader` for 0-columns record batch streams [\#9405](https://github.com/apache/arrow-rs/pull/9405) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jonded94](https://github.com/jonded94)) +- Use zstd::bulk API in IPC and Parquet with context reuse for compression and decompression [\#9400](https://github.com/apache/arrow-rs/pull/9400) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) +- Reproduce the issue of \#9370 in a minimal, end-to-end way [\#9399](https://github.com/apache/arrow-rs/pull/9399) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jonded94](https://github.com/jonded94)) +- perf: optimize skipper for varint values used when projecting Avro record types [\#9397](https://github.com/apache/arrow-rs/pull/9397) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev)) +- fix: return error instead of panic on schema mismatch in BatchCoalescer::push\_batch [\#9390](https://github.com/apache/arrow-rs/pull/9390) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([bvolpato-dd](https://github.com/bvolpato-dd)) +- Minor: Add additional test coverage for WriterProperties::{max\_row\_group\_row\_count,max\_row\_group\_size} [\#9387](https://github.com/apache/arrow-rs/pull/9387) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb)) +- Moving invalid\_utf8 tests into a separate mod [\#9384](https://github.com/apache/arrow-rs/pull/9384) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl)) +- Update sysinfo requirement from 0.37.1 to 0.38.1 [\#9383](https://github.com/apache/arrow-rs/pull/9383) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- feat: support RunEndEncoded arrays in arrow-json reader and writer [\#9379](https://github.com/apache/arrow-rs/pull/9379) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Abhisheklearn12](https://github.com/Abhisheklearn12)) +- Remove lint issues in parquet-related code. [\#9375](https://github.com/apache/arrow-rs/pull/9375) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([brunal](https://github.com/brunal)) +- Add RunEndEncoded array comparator [\#9368](https://github.com/apache/arrow-rs/pull/9368) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020)) +- feat: support BinaryView in bit\_length kernel [\#9363](https://github.com/apache/arrow-rs/pull/9363) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Abhisheklearn12](https://github.com/Abhisheklearn12)) +- Add regression tests for Parquet large binary offset overflow [\#9361](https://github.com/apache/arrow-rs/pull/9361) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([vigneshsiva11](https://github.com/vigneshsiva11)) +- feat: add max\_row\_group\_bytes option to WriterProperties [\#9357](https://github.com/apache/arrow-rs/pull/9357) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([yonipeleg33](https://github.com/yonipeleg33)) +- doc: remove disclaimer about `ListView` not being fully supported [\#9356](https://github.com/apache/arrow-rs/pull/9356) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) +- Move row\_filter async tests from parquet async reader [\#9355](https://github.com/apache/arrow-rs/pull/9355) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl)) +- \[Parquet\] Allow setting page size per column [\#9353](https://github.com/apache/arrow-rs/pull/9353) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao)) +- feat: Support roundtrip ListView in parquet arrow writer [\#9352](https://github.com/apache/arrow-rs/pull/9352) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([codephage2020](https://github.com/codephage2020)) +- feat: add ListView and LargeListView support to arrow-ord [\#9347](https://github.com/apache/arrow-rs/pull/9347) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020)) +- Support ListView in length kernel [\#9346](https://github.com/apache/arrow-rs/pull/9346) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([vegarsti](https://github.com/vegarsti)) +- feat: Add from\_datetime method to Timestamp types [\#9345](https://github.com/apache/arrow-rs/pull/9345) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020)) +- \[main\] Update version to 57.3.0, add changelog [\#9334](https://github.com/apache/arrow-rs/pull/9334) ([alamb](https://github.com/alamb)) +- build\(deps\): update pyo3 requirement from 0.27.1 to 0.28.0 [\#9331](https://github.com/apache/arrow-rs/pull/9331) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add `DataType::is_list` and `DataType::is_binary` [\#9327](https://github.com/apache/arrow-rs/pull/9327) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([AdamGS](https://github.com/AdamGS)) +- Fix string array equality when the values buffer is the same and only the offsets to access it differ [\#9325](https://github.com/apache/arrow-rs/pull/9325) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann)) +- perf: skip validation of dictionary keys if all null [\#9322](https://github.com/apache/arrow-rs/pull/9322) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett)) +- parquet: use rwlock instead of mutex in predicate cache [\#9319](https://github.com/apache/arrow-rs/pull/9319) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24)) +- nit: remove usused code [\#9318](https://github.com/apache/arrow-rs/pull/9318) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24)) +- Remove unnecessary Arc\ [\#9316](https://github.com/apache/arrow-rs/pull/9316) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich)) +- Optimize data page statistics conversion \(up to 4x\) [\#9303](https://github.com/apache/arrow-rs/pull/9303) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) +- \[regression\] Error with adaptive predicate pushdown: "Invalid offset in sparse column chunk data: 754, no matching page found." [\#9301](https://github.com/apache/arrow-rs/pull/9301) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl)) +- Improve `PrimitiveArray::from_iter` perf [\#9294](https://github.com/apache/arrow-rs/pull/9294) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) +- Add additional Arrow type support [\#9291](https://github.com/apache/arrow-rs/pull/9291) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838)) +- fix: ensure `BufferBuilder::truncate` doesn't overset length [\#9288](https://github.com/apache/arrow-rs/pull/9288) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) +- Add benchmark for row group index reader perf [\#9285](https://github.com/apache/arrow-rs/pull/9285) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew)) +- fix union array row converter to handle non-sequential type ids [\#9283](https://github.com/apache/arrow-rs/pull/9283) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([friendlymatthew](https://github.com/friendlymatthew)) +- parquet: reduce clone in delta byte array decoder [\#9282](https://github.com/apache/arrow-rs/pull/9282) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24)) +- fix: fix \[\[NULL\]\] array doesn't roundtrip in arrow-row bug [\#9275](https://github.com/apache/arrow-rs/pull/9275) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lichuang](https://github.com/lichuang)) +- Enhance list casting, adding more cases for list views [\#9274](https://github.com/apache/arrow-rs/pull/9274) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) +- \[Variant\] Add path index access tests for list [\#9273](https://github.com/apache/arrow-rs/pull/9273) ([liamzwbao](https://github.com/liamzwbao)) +- Factor out json reader's static make\_decoder args to a struct [\#9271](https://github.com/apache/arrow-rs/pull/9271) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich)) +- make\_decoder accepts borrowed DataType instead of owned [\#9270](https://github.com/apache/arrow-rs/pull/9270) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich)) +- Implement a more generic from\_nested\_iter method for list arrays [\#9268](https://github.com/apache/arrow-rs/pull/9268) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann)) +- Move extension type construction logic out of Field [\#9266](https://github.com/apache/arrow-rs/pull/9266) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich)) +- fix: support casting string to f16 [\#9262](https://github.com/apache/arrow-rs/pull/9262) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) +- Add additional coverage for StringViewArray comparisons [\#9257](https://github.com/apache/arrow-rs/pull/9257) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Upgrade to object store 0.13.1 [\#9256](https://github.com/apache/arrow-rs/pull/9256) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan)) +- \[Parquet\] test adaptive predicate pushdown with skipped page [\#9251](https://github.com/apache/arrow-rs/pull/9251) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl)) +- Speed up string view comparison \(up to 3x\) [\#9250](https://github.com/apache/arrow-rs/pull/9250) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) +- Add bench for LocalFileSystem [\#9248](https://github.com/apache/arrow-rs/pull/9248) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan)) +- \[Parquet\] Add test for reading/writing long UTF8 StringViews [\#9246](https://github.com/apache/arrow-rs/pull/9246) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb)) +- \[Parquet\] test adaptive predicate pushdown with skipped page [\#9243](https://github.com/apache/arrow-rs/pull/9243) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([erratic-pattern](https://github.com/erratic-pattern)) +- Add tests and fixes for schema resolution bug [\#9237](https://github.com/apache/arrow-rs/pull/9237) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838)) +- Revert "Seal Array trait \(\#9092\)", mark `Array` as `unsafe` [\#9234](https://github.com/apache/arrow-rs/pull/9234) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gabotechs](https://github.com/gabotechs)) +- Speedup filter \(up to ~1.5x\) `FilterBuilder::Optimize`/`BitIndexIterator`/`iter_set_bits_rev` [\#9229](https://github.com/apache/arrow-rs/pull/9229) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) +- optimize `RowGroupIndexReader` for single row group reads [\#9226](https://github.com/apache/arrow-rs/pull/9226) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew)) +- test: improve arrow-row fuzz tests [\#9222](https://github.com/apache/arrow-rs/pull/9222) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) +- arrow-cast: support packing to Dictionary\(\_, Utf8View/BinaryView\) [\#9220](https://github.com/apache/arrow-rs/pull/9220) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ethan-tyler](https://github.com/ethan-tyler)) +- Add additional test coverage for `BatchCoalescer` push\_batch\_with\_filter [\#9218](https://github.com/apache/arrow-rs/pull/9218) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- \[Parquet\] Optimize appending max level comparison in DefinitionLevelDecoder [\#9217](https://github.com/apache/arrow-rs/pull/9217) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann)) +- Remove dead code to fix clippy failure on main [\#9215](https://github.com/apache/arrow-rs/pull/9215) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- \[Parquet\] perf: reuse seeked File clone in ChunkReader::get\_read\(\) [\#9214](https://github.com/apache/arrow-rs/pull/9214) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([fvaleye](https://github.com/fvaleye)) +- fix: \[9018\]Fixed RunArray slice offsets\(row, cast, eq\) [\#9213](https://github.com/apache/arrow-rs/pull/9213) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([manishkr](https://github.com/manishkr)) +- Add benchmarks for reading struct arrays from parquet [\#9210](https://github.com/apache/arrow-rs/pull/9210) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann)) +- Support casting negative scale decimals to numeric [\#9207](https://github.com/apache/arrow-rs/pull/9207) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Chiicake](https://github.com/Chiicake)) +- Deprecate `ArrowReaderOptions::with_page_index` and update API [\#9199](https://github.com/apache/arrow-rs/pull/9199) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew)) +- arrow-ipc: add reset method to DictionaryTracker [\#9196](https://github.com/apache/arrow-rs/pull/9196) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett)) +- Avoid a clone when creating `ListArray` from ArrayData [\#9194](https://github.com/apache/arrow-rs/pull/9194) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Avoid a clone when creating `ListViewArray` from ArrayData [\#9193](https://github.com/apache/arrow-rs/pull/9193) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Avoid a clone when creating `MapArray` from ArrayData [\#9192](https://github.com/apache/arrow-rs/pull/9192) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Avoid a clone when creating `NullArray` from ArrayData [\#9191](https://github.com/apache/arrow-rs/pull/9191) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Avoid a clone when creating `PrimitiveArray` from ArrayData [\#9190](https://github.com/apache/arrow-rs/pull/9190) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Avoid a clone when creating `RunEndArray` from ArrayData [\#9189](https://github.com/apache/arrow-rs/pull/9189) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Avoid a clone when creating `UnionArray` from ArrayData [\#9188](https://github.com/apache/arrow-rs/pull/9188) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Avoid a clone when creating `FixedSizeListArray` from ArrayData [\#9187](https://github.com/apache/arrow-rs/pull/9187) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Avoid a clone when creating `FixedSizeBinaryArray` from ArrayData [\#9186](https://github.com/apache/arrow-rs/pull/9186) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Avoid a clone when creating `DictionaryArray` from ArrayData [\#9185](https://github.com/apache/arrow-rs/pull/9185) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- fix: take\_run return empty array instead of panic. [\#9182](https://github.com/apache/arrow-rs/pull/9182) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([thorfour](https://github.com/thorfour)) +- lint: remove unused function \(fix clippy [\#9178](https://github.com/apache/arrow-rs/pull/9178) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) +- add `#[inline]` to `BitIterator` `next` function [\#9177](https://github.com/apache/arrow-rs/pull/9177) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) +- Add ListView support to `arrow-row` and `arrow-ord` [\#9176](https://github.com/apache/arrow-rs/pull/9176) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz)) +- arrow-cast: Add display formatter for ListView [\#9175](https://github.com/apache/arrow-rs/pull/9175) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz)) +- Add BinaryFormatSupport and Row Encoder to `arrow-avro` Writer [\#9171](https://github.com/apache/arrow-rs/pull/9171) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838)) +- docs\(parquet\): move async parquet example into ArrowReaderBuilder docs [\#9167](https://github.com/apache/arrow-rs/pull/9167) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([vigneshsiva11](https://github.com/vigneshsiva11)) +- feat\(array\): add `RecordBatchStream` trait [\#9166](https://github.com/apache/arrow-rs/pull/9166) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lidavidm](https://github.com/lidavidm)) +- refactor: streamline date64 tests [\#9165](https://github.com/apache/arrow-rs/pull/9165) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([cht42](https://github.com/cht42)) +- docs: update examples in ArrowReaderOptions to use in-memory buffers [\#9163](https://github.com/apache/arrow-rs/pull/9163) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AndreaBozzo](https://github.com/AndreaBozzo)) +- Add Avro Reader projection API [\#9162](https://github.com/apache/arrow-rs/pull/9162) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838)) +- Avoid a clone when creating StringArray/BinaryArray from ArrayData [\#9160](https://github.com/apache/arrow-rs/pull/9160) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- fix missing utf8 check for conversion from BinaryViewArray to StringViewArray [\#9158](https://github.com/apache/arrow-rs/pull/9158) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Minor: try and avoid an allocation creating `GenericByteViewArray` from `ArrayData` [\#9156](https://github.com/apache/arrow-rs/pull/9156) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Add find\_nth\_set\_bit\_position [\#9151](https://github.com/apache/arrow-rs/pull/9151) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) +- feat: add null comparison handling in make\_comparator [\#9150](https://github.com/apache/arrow-rs/pull/9150) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H)) +- Uncomment part of test\_utf8\_single\_column\_reader\_test [\#9148](https://github.com/apache/arrow-rs/pull/9148) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl)) +- arrow-ipc: Add tests for nested dicts for Map and Union arrays [\#9146](https://github.com/apache/arrow-rs/pull/9146) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz)) +- Update ASF copyright year in NOTICE [\#9145](https://github.com/apache/arrow-rs/pull/9145) ([mohit7705](https://github.com/mohit7705)) +- Avoid panic on Date32 overflow [\#9144](https://github.com/apache/arrow-rs/pull/9144) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([cht42](https://github.com/cht42)) +- feat: add `reserve` to `Rows` [\#9142](https://github.com/apache/arrow-rs/pull/9142) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) +- docs\(variant\): fix VariantObject::get documentation to reflect Option return type [\#9139](https://github.com/apache/arrow-rs/pull/9139) ([mohit7705](https://github.com/mohit7705)) +- Add `BooleanBufferBuilder::extend_trusted_len` [\#9137](https://github.com/apache/arrow-rs/pull/9137) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) +- fix: support cast from `Null` to list view/run encoded/union types [\#9134](https://github.com/apache/arrow-rs/pull/9134) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) +- Fix clippy [\#9130](https://github.com/apache/arrow-rs/pull/9130) ([alamb](https://github.com/alamb)) +- Fix IPC roundtripping dicts nested in ListViews [\#9126](https://github.com/apache/arrow-rs/pull/9126) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz)) +- Update readme for geospatial crate [\#9124](https://github.com/apache/arrow-rs/pull/9124) ([paleolimbot](https://github.com/paleolimbot)) +- \[Parquet\] perf: Create `PrimitiveArray`s directly rather than via `ArrayData` [\#9122](https://github.com/apache/arrow-rs/pull/9122) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb)) +- \[Parquet\] perf: Create Utf8/BinaryViewArray directly rather than via `ArrayData` [\#9121](https://github.com/apache/arrow-rs/pull/9121) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb)) +- \[parquet\] Add row group index virtual column [\#9117](https://github.com/apache/arrow-rs/pull/9117) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew)) +- docs\(parquet\): add example for preserving dictionary encoding [\#9116](https://github.com/apache/arrow-rs/pull/9116) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AndreaBozzo](https://github.com/AndreaBozzo)) +- doc: add example of RowFilter usage [\#9115](https://github.com/apache/arrow-rs/pull/9115) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sonhmai](https://github.com/sonhmai)) +- docs: Update release schedule in README.md [\#9111](https://github.com/apache/arrow-rs/pull/9111) ([alamb](https://github.com/alamb)) +- feat: add benchmarks for json parser [\#9107](https://github.com/apache/arrow-rs/pull/9107) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H)) +- chore: switch test from `bincode` to maintained `postcard` crate \(RUSTSEC-2025-0141 \) [\#9104](https://github.com/apache/arrow-rs/pull/9104) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- Add nullif\_kernel benchmark [\#9089](https://github.com/apache/arrow-rs/pull/9089) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) +- \[Variant\] Support Shredded Lists/Array in `variant_get` [\#9049](https://github.com/apache/arrow-rs/pull/9049) ([liamzwbao](https://github.com/liamzwbao)) +- fix:\[9018\]Fixed RunArray slice offsets [\#9036](https://github.com/apache/arrow-rs/pull/9036) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([manishkr](https://github.com/manishkr)) +- Support Float16 for create\_random\_array [\#9029](https://github.com/apache/arrow-rs/pull/9029) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([niebayes](https://github.com/niebayes)) +- fix: display `0 secs` for empty DayTime/MonthDayNano intervals [\#9023](https://github.com/apache/arrow-rs/pull/9023) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) +- Add options to skip decoding `Statistics` and `SizeStatistics` in Parquet metadata [\#9008](https://github.com/apache/arrow-rs/pull/9008) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl)) + ## [57.3.0](https://github.com/apache/arrow-rs/tree/57.3.0) (2026-02-02) [Full Changelog](https://github.com/apache/arrow-rs/compare/57.2.0...57.3.0) diff --git a/CHANGELOG.md b/CHANGELOG.md index e73ee8ba3356..baccdfa79cbd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,200 +19,123 @@ # Changelog -## [58.0.0](https://github.com/apache/arrow-rs/tree/58.0.0) (2026-02-19) +## [58.1.0](https://github.com/apache/arrow-rs/tree/58.1.0) (2026-03-20) -[Full Changelog](https://github.com/apache/arrow-rs/compare/57.3.0...58.0.0) - -**Breaking changes:** - -- Remove support for List types in bit\_length kernel [\#9350](https://github.com/apache/arrow-rs/pull/9350) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020)) -- Optimize `from_bitwise_unary_op` [\#9297](https://github.com/apache/arrow-rs/pull/9297) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) -- Mark `BufferBuilder::new_from_buffer` as unsafe [\#9292](https://github.com/apache/arrow-rs/pull/9292) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) -- \[Variant\] Support `['fieldName']` in VariantPath parser [\#9276](https://github.com/apache/arrow-rs/pull/9276) ([klion26](https://github.com/klion26)) -- Remove parquet arrow\_cast dependency [\#9077](https://github.com/apache/arrow-rs/pull/9077) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold)) -- feat: change default behavior for Parquet `PageEncodingStats` to bitmask [\#9051](https://github.com/apache/arrow-rs/pull/9051) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([WaterWhisperer](https://github.com/WaterWhisperer)) -- \[arrow\] Minimize allocation in GenericViewArray::slice\(\) [\#9016](https://github.com/apache/arrow-rs/pull/9016) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([maxburke](https://github.com/maxburke)) +[Full Changelog](https://github.com/apache/arrow-rs/compare/58.0.0...58.1.0) **Implemented enhancements:** -- Avoid allocating a `Vec` in `StructBuilder` [\#9427](https://github.com/apache/arrow-rs/issues/9427) -- Zstd context reuse [\#9401](https://github.com/apache/arrow-rs/issues/9401) -- Optimize `from_bitwise_unary_op` [\#9364](https://github.com/apache/arrow-rs/issues/9364) -- Support `RunEndEncoded` in ord comparator [\#9360](https://github.com/apache/arrow-rs/issues/9360) -- Support `RunEndEncoded` arrays in `arrow-json` [\#9359](https://github.com/apache/arrow-rs/issues/9359) -- Support `BinaryView` in `bit_length` kernel [\#9351](https://github.com/apache/arrow-rs/issues/9351) -- Remove support for `List` types in `bit_length` kernel [\#9349](https://github.com/apache/arrow-rs/issues/9349) -- Support roundtrip `ListView` in parquet arrow writer [\#9344](https://github.com/apache/arrow-rs/issues/9344) -- Support `ListView` in `length` kernel [\#9343](https://github.com/apache/arrow-rs/issues/9343) -- Support `ListView` in sort kernel [\#9341](https://github.com/apache/arrow-rs/issues/9341) -- Add some way to create a Timestamp from a `DateTime` [\#9337](https://github.com/apache/arrow-rs/issues/9337) -- Introduce `DataType::is_list` and `DataType::IsBinary` [\#9326](https://github.com/apache/arrow-rs/issues/9326) -- Performance of creating all null dictionary array can be improved [\#9321](https://github.com/apache/arrow-rs/issues/9321) -- \[arrow-avro\] Add missing Arrow DataType support with `avro_custom_types` round-trip + non-custom fallbacks [\#9290](https://github.com/apache/arrow-rs/issues/9290) +- Reuse compression dict lz4\_block [\#9566](https://github.com/apache/arrow-rs/issues/9566) +- \[Variant\] Add `variant_to_arrow` `Struct` type support [\#9529](https://github.com/apache/arrow-rs/issues/9529) +- \[Variant\] Add `unshred_variant` support for `Binary` and `LargeBinary` types [\#9526](https://github.com/apache/arrow-rs/issues/9526) +- \[Variant\] Add `shred_variant` support for `LargeUtf8` and `LargeBinary` types [\#9525](https://github.com/apache/arrow-rs/issues/9525) +- \[Variant\] `variant_get` tests clean up [\#9517](https://github.com/apache/arrow-rs/issues/9517) +- parquet\_variant: Support LargeUtf8 typed value in `unshred_variant` [\#9513](https://github.com/apache/arrow-rs/issues/9513) +- parquet-variant: Support string view typed value in `unshred_variant` [\#9512](https://github.com/apache/arrow-rs/issues/9512) +- Deprecate ArrowTimestampType::make\_value in favor of from\_naive\_datetime [\#9490](https://github.com/apache/arrow-rs/issues/9490) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Followup for support \['fieldName'\] in VariantPath [\#9478](https://github.com/apache/arrow-rs/issues/9478) +- Speedup DELTA\_BINARY\_PACKED decoding when bitwidth is 0 [\#9476](https://github.com/apache/arrow-rs/issues/9476) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] +- Support CSV files encoded with charsets other than UTF-8 [\#9465](https://github.com/apache/arrow-rs/issues/9465) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Expose Avro writer schema when building the reader [\#9460](https://github.com/apache/arrow-rs/issues/9460) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Python: avoid importing pyarrow classes ever time [\#9438](https://github.com/apache/arrow-rs/issues/9438) +- Add `append_nulls` to `MapBuilder` [\#9431](https://github.com/apache/arrow-rs/issues/9431) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Add `append_non_nulls` to `StructBuilder` [\#9429](https://github.com/apache/arrow-rs/issues/9429) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Add `append_value_n` to GenericByteBuilder [\#9425](https://github.com/apache/arrow-rs/issues/9425) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Optimize `from_bitwise_binary_op` [\#9378](https://github.com/apache/arrow-rs/issues/9378) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Configurable Arrow representation of UTC timestamps for Avro reader [\#9279](https://github.com/apache/arrow-rs/issues/9279) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] **Fixed bugs:** -- ArrowArrayStreamReader errors on zero-column record batches [\#9394](https://github.com/apache/arrow-rs/issues/9394) -- Regression on main \(58\): Parquet argument error: Parquet error: Required field type\_ is missing [\#9315](https://github.com/apache/arrow-rs/issues/9315) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] +- MutableArrayData::extend does not copy child values for ListView arrays [\#9561](https://github.com/apache/arrow-rs/issues/9561) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- ListView interleave bug [\#9559](https://github.com/apache/arrow-rs/issues/9559) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Flight encoding panics with "no dict id for field" with nested dict arrays [\#9555](https://github.com/apache/arrow-rs/issues/9555) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] +- "DeltaBitPackDecoder only supports Int32Type and Int64Type" but unsigned types are supported too [\#9551](https://github.com/apache/arrow-rs/issues/9551) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] +- Potential overflow when calling `util::bit_mask::set_bits` \(soundness issue\) [\#9543](https://github.com/apache/arrow-rs/issues/9543) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- handle Null type in try\_merge for Struct, List, LargeList, and Union [\#9523](https://github.com/apache/arrow-rs/issues/9523) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Invalid offset in sparse column chunk data for multiple predicates [\#9516](https://github.com/apache/arrow-rs/issues/9516) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] +- debug\_assert\_eq! in BatchCoalescer panics in debug mode when batch\_size \< 4 [\#9506](https://github.com/apache/arrow-rs/issues/9506) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- Parquet Statistics::null\_count\_opt wrongly returns Some\(0\) when stats are missing [\#9451](https://github.com/apache/arrow-rs/issues/9451) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] +- Error "Not all children array length are the same!" when decoding rows spanning across page boundaries in parquet file when using `RowSelection` [\#9370](https://github.com/apache/arrow-rs/issues/9370) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] +- Avro schema resolution not properly supported for complex types [\#9336](https://github.com/apache/arrow-rs/issues/9336) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] **Documentation updates:** -- Improve safety documentation of the `Array` trait [\#9314](https://github.com/apache/arrow-rs/pull/9314) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Improve docs and add build\(\) method to `{Null,Boolean,}BufferBuilder` [\#9155](https://github.com/apache/arrow-rs/pull/9155) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Improve `ArrowReaderBuilder::with_row_filter` documentation [\#9153](https://github.com/apache/arrow-rs/pull/9153) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb)) -- docs: Improve main README.md and highlight community [\#9119](https://github.com/apache/arrow-rs/pull/9119) ([alamb](https://github.com/alamb)) -- Docs: Add additional documentation and example for `make_array` [\#9112](https://github.com/apache/arrow-rs/pull/9112) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- doc: fix link on FixedSizeListArray doc [\#9033](https://github.com/apache/arrow-rs/pull/9033) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) +- Update planned release schedule in README.md [\#9466](https://github.com/apache/arrow-rs/pull/9466) ([alamb](https://github.com/alamb)) **Performance improvements:** -- Replace `ArrayData` with direct Array construction [\#9338](https://github.com/apache/arrow-rs/pull/9338) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao)) -- Remove some `unsafe` and allocations when creating PrimitiveArrays from Vec and `from_trusted_len_iter` [\#9299](https://github.com/apache/arrow-rs/pull/9299) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- parquet: rle skip decode loop when batch contains all max levels \(aka no nulls\) [\#9258](https://github.com/apache/arrow-rs/pull/9258) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24)) -- Improve parquet BinaryView / StringView decoder performance \(up to -35%\) [\#9236](https://github.com/apache/arrow-rs/pull/9236) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan)) -- Avoid a clone when creating `BooleanArray` from ArrayData [\#9159](https://github.com/apache/arrow-rs/pull/9159) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Avoid overallocating arrays in coalesce primitives / views [\#9132](https://github.com/apache/arrow-rs/pull/9132) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) -- perf: Avoid ArrayData allocation in PrimitiveArray::reinterpret\_cast [\#9129](https://github.com/apache/arrow-rs/pull/9129) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- \[Parquet\] perf: Create StructArrays directly rather than via `ArrayData` \(1% improvement\) [\#9120](https://github.com/apache/arrow-rs/pull/9120) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Avoid clones in `make_array` for `StructArray` and `GenericByteViewArray` [\#9114](https://github.com/apache/arrow-rs/pull/9114) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- perf: optimize hex decoding in json \(1.8x faster in binary-heavy\) [\#9091](https://github.com/apache/arrow-rs/pull/9091) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H)) -- Speed up binary kernels \(30% faster `and` and `or`\), add `BooleanBuffer::from_bitwise_binary_op` [\#9090](https://github.com/apache/arrow-rs/pull/9090) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- perf: improve field indexing in JSON StructArrayDecoder \(1.7x speed up\) [\#9086](https://github.com/apache/arrow-rs/pull/9086) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H)) -- bench: added to row\_format benchmark conversion of 53 non-nested columns [\#9081](https://github.com/apache/arrow-rs/pull/9081) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) -- perf: improve calculating length performance for view byte array in row conversion [\#9080](https://github.com/apache/arrow-rs/pull/9080) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) -- perf: improve calculating length performance for nested arrays in row conversion [\#9079](https://github.com/apache/arrow-rs/pull/9079) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) -- perf: improve calculating length performance for `GenericByteArray` in row conversion [\#9078](https://github.com/apache/arrow-rs/pull/9078) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) +- Introduce `NullBuffer::try_from_unsliced` to simplify array construction [\#9385](https://github.com/apache/arrow-rs/issues/9385) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] +- perf: Coalesce page fetches when RowSelection selects all rows [\#9578](https://github.com/apache/arrow-rs/pull/9578) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan)) +- Use chunks\_exact for has\_true/has\_false to enable compiler unrolling [\#9570](https://github.com/apache/arrow-rs/pull/9570) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adriangb](https://github.com/adriangb)) +- pyarrow: Cache the imported classes to avoid importing them each time [\#9439](https://github.com/apache/arrow-rs/pull/9439) ([Tpt](https://github.com/Tpt)) **Closed issues:** -- BatchCoalescer::push\_batch panics on schema mismatch instead of returning error [\#9389](https://github.com/apache/arrow-rs/issues/9389) -- Release arrow-rs / parquet Minor version `57.3.0` \(January 2026\) [\#9240](https://github.com/apache/arrow-rs/issues/9240) -- \[Variant\] support `..` and `['fieldName']` syntax in the VariantPath parser [\#9050](https://github.com/apache/arrow-rs/issues/9050) -- Support Float16 for create\_random\_array [\#9028](https://github.com/apache/arrow-rs/issues/9028) +- Duplicate macro definition: `partially_shredded_variant_array_gen` [\#9492](https://github.com/apache/arrow-rs/issues/9492) +- Enable `LargeList` / `ListView` / `LargeListView` for `VariantArray::try_new` [\#9455](https://github.com/apache/arrow-rs/issues/9455) +- Support variables/expressions in record\_batch! macro [\#9245](https://github.com/apache/arrow-rs/issues/9245) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] **Merged pull requests:** -- Avoid allocating a `Vec` in `StructBuilder` [\#9428](https://github.com/apache/arrow-rs/pull/9428) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Fokko](https://github.com/Fokko)) -- fix: fixed trait functions clash get\_date\_time\_part\_extract\_fn \(\#8221\) [\#9424](https://github.com/apache/arrow-rs/pull/9424) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([esavier](https://github.com/esavier)) -- \[Minor\] Use per-predicate projection masks in arrow\_reader\_clickbench benchmark [\#9413](https://github.com/apache/arrow-rs/pull/9413) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan)) -- Fix `ArrowArrayStreamReader` for 0-columns record batch streams [\#9405](https://github.com/apache/arrow-rs/pull/9405) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jonded94](https://github.com/jonded94)) -- Use zstd::bulk API in IPC and Parquet with context reuse for compression and decompression [\#9400](https://github.com/apache/arrow-rs/pull/9400) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) -- Reproduce the issue of \#9370 in a minimal, end-to-end way [\#9399](https://github.com/apache/arrow-rs/pull/9399) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jonded94](https://github.com/jonded94)) -- perf: optimize skipper for varint values used when projecting Avro record types [\#9397](https://github.com/apache/arrow-rs/pull/9397) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev)) -- fix: return error instead of panic on schema mismatch in BatchCoalescer::push\_batch [\#9390](https://github.com/apache/arrow-rs/pull/9390) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([bvolpato-dd](https://github.com/bvolpato-dd)) -- Minor: Add additional test coverage for WriterProperties::{max\_row\_group\_row\_count,max\_row\_group\_size} [\#9387](https://github.com/apache/arrow-rs/pull/9387) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb)) -- Moving invalid\_utf8 tests into a separate mod [\#9384](https://github.com/apache/arrow-rs/pull/9384) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl)) -- Update sysinfo requirement from 0.37.1 to 0.38.1 [\#9383](https://github.com/apache/arrow-rs/pull/9383) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot)) -- feat: support RunEndEncoded arrays in arrow-json reader and writer [\#9379](https://github.com/apache/arrow-rs/pull/9379) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Abhisheklearn12](https://github.com/Abhisheklearn12)) -- Remove lint issues in parquet-related code. [\#9375](https://github.com/apache/arrow-rs/pull/9375) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([brunal](https://github.com/brunal)) -- Add RunEndEncoded array comparator [\#9368](https://github.com/apache/arrow-rs/pull/9368) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020)) -- feat: support BinaryView in bit\_length kernel [\#9363](https://github.com/apache/arrow-rs/pull/9363) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Abhisheklearn12](https://github.com/Abhisheklearn12)) -- Add regression tests for Parquet large binary offset overflow [\#9361](https://github.com/apache/arrow-rs/pull/9361) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([vigneshsiva11](https://github.com/vigneshsiva11)) -- feat: add max\_row\_group\_bytes option to WriterProperties [\#9357](https://github.com/apache/arrow-rs/pull/9357) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([yonipeleg33](https://github.com/yonipeleg33)) -- doc: remove disclaimer about `ListView` not being fully supported [\#9356](https://github.com/apache/arrow-rs/pull/9356) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) -- Move row\_filter async tests from parquet async reader [\#9355](https://github.com/apache/arrow-rs/pull/9355) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl)) -- \[Parquet\] Allow setting page size per column [\#9353](https://github.com/apache/arrow-rs/pull/9353) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao)) -- feat: Support roundtrip ListView in parquet arrow writer [\#9352](https://github.com/apache/arrow-rs/pull/9352) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([codephage2020](https://github.com/codephage2020)) -- feat: add ListView and LargeListView support to arrow-ord [\#9347](https://github.com/apache/arrow-rs/pull/9347) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020)) -- Support ListView in length kernel [\#9346](https://github.com/apache/arrow-rs/pull/9346) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([vegarsti](https://github.com/vegarsti)) -- feat: Add from\_datetime method to Timestamp types [\#9345](https://github.com/apache/arrow-rs/pull/9345) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020)) -- \[main\] Update version to 57.3.0, add changelog [\#9334](https://github.com/apache/arrow-rs/pull/9334) ([alamb](https://github.com/alamb)) -- build\(deps\): update pyo3 requirement from 0.27.1 to 0.28.0 [\#9331](https://github.com/apache/arrow-rs/pull/9331) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Add `DataType::is_list` and `DataType::is_binary` [\#9327](https://github.com/apache/arrow-rs/pull/9327) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([AdamGS](https://github.com/AdamGS)) -- Fix string array equality when the values buffer is the same and only the offsets to access it differ [\#9325](https://github.com/apache/arrow-rs/pull/9325) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann)) -- perf: skip validation of dictionary keys if all null [\#9322](https://github.com/apache/arrow-rs/pull/9322) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett)) -- parquet: use rwlock instead of mutex in predicate cache [\#9319](https://github.com/apache/arrow-rs/pull/9319) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24)) -- nit: remove usused code [\#9318](https://github.com/apache/arrow-rs/pull/9318) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24)) -- Remove unnecessary Arc\ [\#9316](https://github.com/apache/arrow-rs/pull/9316) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich)) -- Optimize data page statistics conversion \(up to 4x\) [\#9303](https://github.com/apache/arrow-rs/pull/9303) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) -- \[regression\] Error with adaptive predicate pushdown: "Invalid offset in sparse column chunk data: 754, no matching page found." [\#9301](https://github.com/apache/arrow-rs/pull/9301) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl)) -- Improve `PrimitiveArray::from_iter` perf [\#9294](https://github.com/apache/arrow-rs/pull/9294) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) -- Add additional Arrow type support [\#9291](https://github.com/apache/arrow-rs/pull/9291) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838)) -- fix: ensure `BufferBuilder::truncate` doesn't overset length [\#9288](https://github.com/apache/arrow-rs/pull/9288) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) -- Add benchmark for row group index reader perf [\#9285](https://github.com/apache/arrow-rs/pull/9285) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew)) -- fix union array row converter to handle non-sequential type ids [\#9283](https://github.com/apache/arrow-rs/pull/9283) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([friendlymatthew](https://github.com/friendlymatthew)) -- parquet: reduce clone in delta byte array decoder [\#9282](https://github.com/apache/arrow-rs/pull/9282) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24)) -- fix: fix \[\[NULL\]\] array doesn't roundtrip in arrow-row bug [\#9275](https://github.com/apache/arrow-rs/pull/9275) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lichuang](https://github.com/lichuang)) -- Enhance list casting, adding more cases for list views [\#9274](https://github.com/apache/arrow-rs/pull/9274) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) -- \[Variant\] Add path index access tests for list [\#9273](https://github.com/apache/arrow-rs/pull/9273) ([liamzwbao](https://github.com/liamzwbao)) -- Factor out json reader's static make\_decoder args to a struct [\#9271](https://github.com/apache/arrow-rs/pull/9271) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich)) -- make\_decoder accepts borrowed DataType instead of owned [\#9270](https://github.com/apache/arrow-rs/pull/9270) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich)) -- Implement a more generic from\_nested\_iter method for list arrays [\#9268](https://github.com/apache/arrow-rs/pull/9268) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann)) -- Move extension type construction logic out of Field [\#9266](https://github.com/apache/arrow-rs/pull/9266) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich)) -- fix: support casting string to f16 [\#9262](https://github.com/apache/arrow-rs/pull/9262) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) -- Add additional coverage for StringViewArray comparisons [\#9257](https://github.com/apache/arrow-rs/pull/9257) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Upgrade to object store 0.13.1 [\#9256](https://github.com/apache/arrow-rs/pull/9256) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan)) -- \[Parquet\] test adaptive predicate pushdown with skipped page [\#9251](https://github.com/apache/arrow-rs/pull/9251) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl)) -- Speed up string view comparison \(up to 3x\) [\#9250](https://github.com/apache/arrow-rs/pull/9250) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) -- Add bench for LocalFileSystem [\#9248](https://github.com/apache/arrow-rs/pull/9248) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan)) -- \[Parquet\] Add test for reading/writing long UTF8 StringViews [\#9246](https://github.com/apache/arrow-rs/pull/9246) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb)) -- \[Parquet\] test adaptive predicate pushdown with skipped page [\#9243](https://github.com/apache/arrow-rs/pull/9243) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([erratic-pattern](https://github.com/erratic-pattern)) -- Add tests and fixes for schema resolution bug [\#9237](https://github.com/apache/arrow-rs/pull/9237) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838)) -- Revert "Seal Array trait \(\#9092\)", mark `Array` as `unsafe` [\#9234](https://github.com/apache/arrow-rs/pull/9234) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gabotechs](https://github.com/gabotechs)) -- Speedup filter \(up to ~1.5x\) `FilterBuilder::Optimize`/`BitIndexIterator`/`iter_set_bits_rev` [\#9229](https://github.com/apache/arrow-rs/pull/9229) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) -- optimize `RowGroupIndexReader` for single row group reads [\#9226](https://github.com/apache/arrow-rs/pull/9226) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew)) -- test: improve arrow-row fuzz tests [\#9222](https://github.com/apache/arrow-rs/pull/9222) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) -- arrow-cast: support packing to Dictionary\(\_, Utf8View/BinaryView\) [\#9220](https://github.com/apache/arrow-rs/pull/9220) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ethan-tyler](https://github.com/ethan-tyler)) -- Add additional test coverage for `BatchCoalescer` push\_batch\_with\_filter [\#9218](https://github.com/apache/arrow-rs/pull/9218) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- \[Parquet\] Optimize appending max level comparison in DefinitionLevelDecoder [\#9217](https://github.com/apache/arrow-rs/pull/9217) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann)) -- Remove dead code to fix clippy failure on main [\#9215](https://github.com/apache/arrow-rs/pull/9215) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- \[Parquet\] perf: reuse seeked File clone in ChunkReader::get\_read\(\) [\#9214](https://github.com/apache/arrow-rs/pull/9214) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([fvaleye](https://github.com/fvaleye)) -- fix: \[9018\]Fixed RunArray slice offsets\(row, cast, eq\) [\#9213](https://github.com/apache/arrow-rs/pull/9213) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([manishkr](https://github.com/manishkr)) -- Add benchmarks for reading struct arrays from parquet [\#9210](https://github.com/apache/arrow-rs/pull/9210) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann)) -- Support casting negative scale decimals to numeric [\#9207](https://github.com/apache/arrow-rs/pull/9207) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Chiicake](https://github.com/Chiicake)) -- Deprecate `ArrowReaderOptions::with_page_index` and update API [\#9199](https://github.com/apache/arrow-rs/pull/9199) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew)) -- arrow-ipc: add reset method to DictionaryTracker [\#9196](https://github.com/apache/arrow-rs/pull/9196) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett)) -- Avoid a clone when creating `ListArray` from ArrayData [\#9194](https://github.com/apache/arrow-rs/pull/9194) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Avoid a clone when creating `ListViewArray` from ArrayData [\#9193](https://github.com/apache/arrow-rs/pull/9193) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Avoid a clone when creating `MapArray` from ArrayData [\#9192](https://github.com/apache/arrow-rs/pull/9192) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Avoid a clone when creating `NullArray` from ArrayData [\#9191](https://github.com/apache/arrow-rs/pull/9191) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Avoid a clone when creating `PrimitiveArray` from ArrayData [\#9190](https://github.com/apache/arrow-rs/pull/9190) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Avoid a clone when creating `RunEndArray` from ArrayData [\#9189](https://github.com/apache/arrow-rs/pull/9189) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Avoid a clone when creating `UnionArray` from ArrayData [\#9188](https://github.com/apache/arrow-rs/pull/9188) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Avoid a clone when creating `FixedSizeListArray` from ArrayData [\#9187](https://github.com/apache/arrow-rs/pull/9187) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Avoid a clone when creating `FixedSizeBinaryArray` from ArrayData [\#9186](https://github.com/apache/arrow-rs/pull/9186) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Avoid a clone when creating `DictionaryArray` from ArrayData [\#9185](https://github.com/apache/arrow-rs/pull/9185) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- fix: take\_run return empty array instead of panic. [\#9182](https://github.com/apache/arrow-rs/pull/9182) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([thorfour](https://github.com/thorfour)) -- lint: remove unused function \(fix clippy [\#9178](https://github.com/apache/arrow-rs/pull/9178) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) -- add `#[inline]` to `BitIterator` `next` function [\#9177](https://github.com/apache/arrow-rs/pull/9177) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) -- Add ListView support to `arrow-row` and `arrow-ord` [\#9176](https://github.com/apache/arrow-rs/pull/9176) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz)) -- arrow-cast: Add display formatter for ListView [\#9175](https://github.com/apache/arrow-rs/pull/9175) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz)) -- Add BinaryFormatSupport and Row Encoder to `arrow-avro` Writer [\#9171](https://github.com/apache/arrow-rs/pull/9171) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838)) -- docs\(parquet\): move async parquet example into ArrowReaderBuilder docs [\#9167](https://github.com/apache/arrow-rs/pull/9167) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([vigneshsiva11](https://github.com/vigneshsiva11)) -- feat\(array\): add `RecordBatchStream` trait [\#9166](https://github.com/apache/arrow-rs/pull/9166) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lidavidm](https://github.com/lidavidm)) -- refactor: streamline date64 tests [\#9165](https://github.com/apache/arrow-rs/pull/9165) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([cht42](https://github.com/cht42)) -- docs: update examples in ArrowReaderOptions to use in-memory buffers [\#9163](https://github.com/apache/arrow-rs/pull/9163) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AndreaBozzo](https://github.com/AndreaBozzo)) -- Add Avro Reader projection API [\#9162](https://github.com/apache/arrow-rs/pull/9162) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838)) -- Avoid a clone when creating StringArray/BinaryArray from ArrayData [\#9160](https://github.com/apache/arrow-rs/pull/9160) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- fix missing utf8 check for conversion from BinaryViewArray to StringViewArray [\#9158](https://github.com/apache/arrow-rs/pull/9158) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Minor: try and avoid an allocation creating `GenericByteViewArray` from `ArrayData` [\#9156](https://github.com/apache/arrow-rs/pull/9156) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Add find\_nth\_set\_bit\_position [\#9151](https://github.com/apache/arrow-rs/pull/9151) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) -- feat: add null comparison handling in make\_comparator [\#9150](https://github.com/apache/arrow-rs/pull/9150) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H)) -- Uncomment part of test\_utf8\_single\_column\_reader\_test [\#9148](https://github.com/apache/arrow-rs/pull/9148) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl)) -- arrow-ipc: Add tests for nested dicts for Map and Union arrays [\#9146](https://github.com/apache/arrow-rs/pull/9146) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz)) -- Update ASF copyright year in NOTICE [\#9145](https://github.com/apache/arrow-rs/pull/9145) ([mohit7705](https://github.com/mohit7705)) -- Avoid panic on Date32 overflow [\#9144](https://github.com/apache/arrow-rs/pull/9144) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([cht42](https://github.com/cht42)) -- feat: add `reserve` to `Rows` [\#9142](https://github.com/apache/arrow-rs/pull/9142) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) -- docs\(variant\): fix VariantObject::get documentation to reflect Option return type [\#9139](https://github.com/apache/arrow-rs/pull/9139) ([mohit7705](https://github.com/mohit7705)) -- Add `BooleanBufferBuilder::extend_trusted_len` [\#9137](https://github.com/apache/arrow-rs/pull/9137) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) -- fix: support cast from `Null` to list view/run encoded/union types [\#9134](https://github.com/apache/arrow-rs/pull/9134) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) -- Fix clippy [\#9130](https://github.com/apache/arrow-rs/pull/9130) ([alamb](https://github.com/alamb)) -- Fix IPC roundtripping dicts nested in ListViews [\#9126](https://github.com/apache/arrow-rs/pull/9126) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz)) -- Update readme for geospatial crate [\#9124](https://github.com/apache/arrow-rs/pull/9124) ([paleolimbot](https://github.com/paleolimbot)) -- \[Parquet\] perf: Create `PrimitiveArray`s directly rather than via `ArrayData` [\#9122](https://github.com/apache/arrow-rs/pull/9122) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb)) -- \[Parquet\] perf: Create Utf8/BinaryViewArray directly rather than via `ArrayData` [\#9121](https://github.com/apache/arrow-rs/pull/9121) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb)) -- \[parquet\] Add row group index virtual column [\#9117](https://github.com/apache/arrow-rs/pull/9117) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew)) -- docs\(parquet\): add example for preserving dictionary encoding [\#9116](https://github.com/apache/arrow-rs/pull/9116) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AndreaBozzo](https://github.com/AndreaBozzo)) -- doc: add example of RowFilter usage [\#9115](https://github.com/apache/arrow-rs/pull/9115) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sonhmai](https://github.com/sonhmai)) -- docs: Update release schedule in README.md [\#9111](https://github.com/apache/arrow-rs/pull/9111) ([alamb](https://github.com/alamb)) -- feat: add benchmarks for json parser [\#9107](https://github.com/apache/arrow-rs/pull/9107) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H)) -- chore: switch test from `bincode` to maintained `postcard` crate \(RUSTSEC-2025-0141 \) [\#9104](https://github.com/apache/arrow-rs/pull/9104) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- Add nullif\_kernel benchmark [\#9089](https://github.com/apache/arrow-rs/pull/9089) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb)) -- \[Variant\] Support Shredded Lists/Array in `variant_get` [\#9049](https://github.com/apache/arrow-rs/pull/9049) ([liamzwbao](https://github.com/liamzwbao)) -- fix:\[9018\]Fixed RunArray slice offsets [\#9036](https://github.com/apache/arrow-rs/pull/9036) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([manishkr](https://github.com/manishkr)) -- Support Float16 for create\_random\_array [\#9029](https://github.com/apache/arrow-rs/pull/9029) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([niebayes](https://github.com/niebayes)) -- fix: display `0 secs` for empty DayTime/MonthDayNano intervals [\#9023](https://github.com/apache/arrow-rs/pull/9023) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey)) -- Add options to skip decoding `Statistics` and `SizeStatistics` in Parquet metadata [\#9008](https://github.com/apache/arrow-rs/pull/9008) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl)) +- \[Variant\] Add unshred\_variant support for Binary and LargeBinary types [\#9576](https://github.com/apache/arrow-rs/pull/9576) ([kunalsinghdadhwal](https://github.com/kunalsinghdadhwal)) +- \[Variant\] Add `variant_to_arrow` `Struct` type support [\#9572](https://github.com/apache/arrow-rs/pull/9572) ([sdf-jkl](https://github.com/sdf-jkl)) +- Make Sbbf Constructers Public [\#9569](https://github.com/apache/arrow-rs/pull/9569) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([cetra3](https://github.com/cetra3)) +- fix: Used `checked_add` for bounds checks to avoid UB [\#9568](https://github.com/apache/arrow-rs/pull/9568) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([etseidl](https://github.com/etseidl)) +- Add mutable operations to BooleanBuffer \(Bit\*Assign\) [\#9567](https://github.com/apache/arrow-rs/pull/9567) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) +- chore\(deps\): update lz4\_flex requirement from 0.12 to 0.13 [\#9565](https://github.com/apache/arrow-rs/pull/9565) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- arrow-select: fix MutableArrayData interleave for ListView [\#9560](https://github.com/apache/arrow-rs/pull/9560) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto)) +- Move `ValueIter` into own module, and add public `record_count` function [\#9557](https://github.com/apache/arrow-rs/pull/9557) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Rafferty97](https://github.com/Rafferty97)) +- arrow-flight: generate dict\_ids for dicts nested inside complex types [\#9556](https://github.com/apache/arrow-rs/pull/9556) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([asubiotto](https://github.com/asubiotto)) +- add `shred_variant` support for `LargeUtf8` and `LargeBinary` [\#9554](https://github.com/apache/arrow-rs/pull/9554) ([sdf-jkl](https://github.com/sdf-jkl)) +- \[minor\] Download clickbench file when missing [\#9553](https://github.com/apache/arrow-rs/pull/9553) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan)) +- DeltaBitPackEncoderConversion: Fix panic message on invalid type [\#9552](https://github.com/apache/arrow-rs/pull/9552) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([progval](https://github.com/progval)) +- Replace interleave overflow panic with error [\#9549](https://github.com/apache/arrow-rs/pull/9549) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([xudong963](https://github.com/xudong963)) +- feat\(arrow-avro\): `HeaderInfo` to expose OCF header [\#9548](https://github.com/apache/arrow-rs/pull/9548) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev)) +- chore: Protect `main` branch with required reviews [\#9547](https://github.com/apache/arrow-rs/pull/9547) ([comphead](https://github.com/comphead)) +- Add benchmark for `infer_json_schema` [\#9546](https://github.com/apache/arrow-rs/pull/9546) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Rafferty97](https://github.com/Rafferty97)) +- chore\(deps\): bump black from 24.3.0 to 26.3.1 in /parquet/pytest [\#9545](https://github.com/apache/arrow-rs/pull/9545) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Unroll interleave -25-30% [\#9542](https://github.com/apache/arrow-rs/pull/9542) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan)) +- Optimize `take_fixed_size_binary` For Predefined Value Lengths [\#9535](https://github.com/apache/arrow-rs/pull/9535) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tobixdev](https://github.com/tobixdev)) +- feat: expose arrow schema on async avro reader [\#9534](https://github.com/apache/arrow-rs/pull/9534) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev)) +- Make with\_file\_decryption\_properties pub instead of pub\(crate\) [\#9532](https://github.com/apache/arrow-rs/pull/9532) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan)) +- fix: handle Null type in try\_merge for Struct, List, LargeList, and Union [\#9524](https://github.com/apache/arrow-rs/pull/9524) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas)) +- chore: extend record\_batch macro to support variables and expressions [\#9522](https://github.com/apache/arrow-rs/pull/9522) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([buraksenn](https://github.com/buraksenn)) +- \[Variant\] clean up `variant_get` tests [\#9518](https://github.com/apache/arrow-rs/pull/9518) ([sdf-jkl](https://github.com/sdf-jkl)) +- support large string for unshred variant [\#9515](https://github.com/apache/arrow-rs/pull/9515) ([friendlymatthew](https://github.com/friendlymatthew)) +- support string view unshred variant [\#9514](https://github.com/apache/arrow-rs/pull/9514) ([friendlymatthew](https://github.com/friendlymatthew)) +- Add has\_true\(\) and has\_false\(\) to BooleanArray [\#9511](https://github.com/apache/arrow-rs/pull/9511) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adriangb](https://github.com/adriangb)) +- Fix Invalid offset in sparse column chunk data error for multiple predicates [\#9509](https://github.com/apache/arrow-rs/pull/9509) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([cetra3](https://github.com/cetra3)) +- fix: remove incorrect debug assertion in BatchCoalescer [\#9508](https://github.com/apache/arrow-rs/pull/9508) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Tim-53](https://github.com/Tim-53)) +- \[Json\] Add benchmarks for list json reader [\#9507](https://github.com/apache/arrow-rs/pull/9507) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao)) +- fix: first next\_back\(\) on new RowsIter panics [\#9505](https://github.com/apache/arrow-rs/pull/9505) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) +- Add some benchmarks for decoding delta encoded Parquet [\#9500](https://github.com/apache/arrow-rs/pull/9500) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl)) +- chore: remove duplicate macro `partially_shredded_variant_array_gen` [\#9498](https://github.com/apache/arrow-rs/pull/9498) ([codephage2020](https://github.com/codephage2020)) +- Deprecate ArrowTimestampType::make\_value in favor of from\_naive\_datetime [\#9491](https://github.com/apache/arrow-rs/pull/9491) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020)) +- fix: Do not assume missing nullcount stat means zero nullcount [\#9481](https://github.com/apache/arrow-rs/pull/9481) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich)) +- \[Variant\] Enahcne bracket access for VariantPath [\#9479](https://github.com/apache/arrow-rs/pull/9479) ([klion26](https://github.com/klion26)) +- Optimize delta binary decoder in the case where bitwidth=0 [\#9477](https://github.com/apache/arrow-rs/pull/9477) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl)) +- Add PrimitiveRunBuilder::with\_data\_type\(\) to customize the values' DataType [\#9473](https://github.com/apache/arrow-rs/pull/9473) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal)) +- Convert `prettyprint` tests in `arrow-cast` to `insta` inline snapshots [\#9472](https://github.com/apache/arrow-rs/pull/9472) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([grtlr](https://github.com/grtlr)) +- Update strum\_macros requirement from 0.27 to 0.28 [\#9471](https://github.com/apache/arrow-rs/pull/9471) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- docs\(parquet\): Fix broken links in README [\#9467](https://github.com/apache/arrow-rs/pull/9467) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([SYaoJun](https://github.com/SYaoJun)) +- Add list-like types support to VariantArray::try\_new [\#9457](https://github.com/apache/arrow-rs/pull/9457) ([sdf-jkl](https://github.com/sdf-jkl)) +- Simplify downcast\_...!\(\) macro definitions [\#9454](https://github.com/apache/arrow-rs/pull/9454) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal)) +- feat\(parquet\): add content defined chunking for arrow writer [\#9450](https://github.com/apache/arrow-rs/pull/9450) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([kszucs](https://github.com/kszucs)) +- refactor: simplify iterator using cloned\(\).map\(Some\) [\#9449](https://github.com/apache/arrow-rs/pull/9449) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([SYaoJun](https://github.com/SYaoJun)) +- feat: Optimize from\_bitwise\_binary\_op with 64-bit alignment [\#9441](https://github.com/apache/arrow-rs/pull/9441) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kunalsinghdadhwal](https://github.com/kunalsinghdadhwal)) +- docs: fix markdown link syntax in README [\#9440](https://github.com/apache/arrow-rs/pull/9440) ([SYaoJun](https://github.com/SYaoJun)) +- Move `ListLikeArray` to arrow-array to be shared with json writer and parquet unshredding [\#9437](https://github.com/apache/arrow-rs/pull/9437) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao)) +- Add `claim` method to recordbatch for memory accounting [\#9433](https://github.com/apache/arrow-rs/pull/9433) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([cetra3](https://github.com/cetra3)) +- Add `append_nulls` to `MapBuilder` [\#9432](https://github.com/apache/arrow-rs/pull/9432) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Fokko](https://github.com/Fokko)) +- Add `append_non_nulls` to `StructBuilder` [\#9430](https://github.com/apache/arrow-rs/pull/9430) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Fokko](https://github.com/Fokko)) +- Add `append_value_n` to GenericByteBuilder [\#9426](https://github.com/apache/arrow-rs/pull/9426) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Fokko](https://github.com/Fokko)) +- refactor: simplify dynamic state for Avro record projection [\#9419](https://github.com/apache/arrow-rs/pull/9419) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev)) +- Add `NullBuffer::from_unsliced_buffer` helper and refactor call sites [\#9411](https://github.com/apache/arrow-rs/pull/9411) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Eyad3skr](https://github.com/Eyad3skr)) +- Implement min, max, sum for run-end-encoded arrays. [\#9409](https://github.com/apache/arrow-rs/pull/9409) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal)) +- feat: add `RunArray::new_unchecked` and `RunArray::into_parts` [\#9376](https://github.com/apache/arrow-rs/pull/9376) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton)) +- Fix skip\_records over-counting when partial record precedes num\_rows page skip [\#9374](https://github.com/apache/arrow-rs/pull/9374) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jonded94](https://github.com/jonded94)) +- fix: resolution of complex type variants in Avro unions [\#9328](https://github.com/apache/arrow-rs/pull/9328) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev)) +- feat\(arrow-avro\): Configurable Arrow timezone ID for Avro timestamps [\#9280](https://github.com/apache/arrow-rs/pull/9280) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev)) diff --git a/Cargo.toml b/Cargo.toml index 1a02830b0b9f..65043fb60ff4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,7 +68,7 @@ exclude = [ ] [workspace.package] -version = "58.0.0" +version = "58.1.0" homepage = "https://github.com/apache/arrow-rs" repository = "https://github.com/apache/arrow-rs" authors = ["Apache Arrow "] @@ -85,26 +85,26 @@ edition = "2024" rust-version = "1.85" [workspace.dependencies] -arrow = { version = "58.0.0", path = "./arrow", default-features = false } -arrow-arith = { version = "58.0.0", path = "./arrow-arith" } -arrow-array = { version = "58.0.0", path = "./arrow-array" } -arrow-buffer = { version = "58.0.0", path = "./arrow-buffer" } -arrow-cast = { version = "58.0.0", path = "./arrow-cast" } -arrow-csv = { version = "58.0.0", path = "./arrow-csv" } -arrow-data = { version = "58.0.0", path = "./arrow-data" } -arrow-ipc = { version = "58.0.0", path = "./arrow-ipc" } -arrow-json = { version = "58.0.0", path = "./arrow-json" } -arrow-ord = { version = "58.0.0", path = "./arrow-ord" } -arrow-pyarrow = { version = "58.0.0", path = "./arrow-pyarrow" } -arrow-row = { version = "58.0.0", path = "./arrow-row" } -arrow-schema = { version = "58.0.0", path = "./arrow-schema" } -arrow-select = { version = "58.0.0", path = "./arrow-select" } -arrow-string = { version = "58.0.0", path = "./arrow-string" } -parquet = { version = "58.0.0", path = "./parquet", default-features = false } -parquet-geospatial = { version = "58.0.0", path = "./parquet-geospatial" } -parquet-variant = { version = "58.0.0", path = "./parquet-variant" } -parquet-variant-json = { version = "58.0.0", path = "./parquet-variant-json" } -parquet-variant-compute = { version = "58.0.0", path = "./parquet-variant-compute" } +arrow = { version = "58.1.0", path = "./arrow", default-features = false } +arrow-arith = { version = "58.1.0", path = "./arrow-arith" } +arrow-array = { version = "58.1.0", path = "./arrow-array" } +arrow-buffer = { version = "58.1.0", path = "./arrow-buffer" } +arrow-cast = { version = "58.1.0", path = "./arrow-cast" } +arrow-csv = { version = "58.1.0", path = "./arrow-csv" } +arrow-data = { version = "58.1.0", path = "./arrow-data" } +arrow-ipc = { version = "58.1.0", path = "./arrow-ipc" } +arrow-json = { version = "58.1.0", path = "./arrow-json" } +arrow-ord = { version = "58.1.0", path = "./arrow-ord" } +arrow-pyarrow = { version = "58.1.0", path = "./arrow-pyarrow" } +arrow-row = { version = "58.1.0", path = "./arrow-row" } +arrow-schema = { version = "58.1.0", path = "./arrow-schema" } +arrow-select = { version = "58.1.0", path = "./arrow-select" } +arrow-string = { version = "58.1.0", path = "./arrow-string" } +parquet = { version = "58.1.0", path = "./parquet", default-features = false } +parquet-geospatial = { version = "58.1.0", path = "./parquet-geospatial" } +parquet-variant = { version = "58.1.0", path = "./parquet-variant" } +parquet-variant-json = { version = "58.1.0", path = "./parquet-variant-json" } +parquet-variant-compute = { version = "58.1.0", path = "./parquet-variant-compute" } chrono = { version = "0.4.40", default-features = false, features = ["clock"] } diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh index b444cae64911..ae7bf81b0cca 100755 --- a/dev/release/update_change_log.sh +++ b/dev/release/update_change_log.sh @@ -29,8 +29,8 @@ set -e -SINCE_TAG="57.3.0" -FUTURE_RELEASE="58.0.0" +SINCE_TAG="58.0.0" +FUTURE_RELEASE="58.1.0" SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" From 6471e9ac72a79fd13963568ec3294a76fab826a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Fri, 20 Mar 2026 20:38:26 +0100 Subject: [PATCH 66/80] Pre-reserve output capacity in ByteView/ByteArray dictionary decoding (#9590) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Reserve `output.views` capacity in `ByteViewArrayDecoderDictionary::read` before the decode loop - Reserve `output.offsets` capacity in `ByteArrayDecoderDictionary::read` before the decode loop This avoids per-chunk reallocation during `extend` calls inside the dictionary decode loop. Closes #9587 ## Test plan - [ ] Existing tests pass (no functional change, only pre-allocation) - [ ] Benchmark dictionary-encoded StringView/BinaryView/String reads 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 (1M context) --- parquet/src/arrow/array_reader/byte_array.rs | 3 +++ parquet/src/arrow/array_reader/byte_view_array.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/parquet/src/arrow/array_reader/byte_array.rs b/parquet/src/arrow/array_reader/byte_array.rs index 0acbe6501924..2d0d44fbe203 100644 --- a/parquet/src/arrow/array_reader/byte_array.rs +++ b/parquet/src/arrow/array_reader/byte_array.rs @@ -580,6 +580,9 @@ impl ByteArrayDecoderDictionary { return Ok(0); } + // Pre-reserve offsets capacity to avoid per-chunk reallocation + output.offsets.reserve(len); + self.decoder.read(len, |keys| { output.extend_from_dictionary(keys, dict.offsets.as_slice(), dict.values.as_slice()) }) diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs index 65b627aae451..1933654118f3 100644 --- a/parquet/src/arrow/array_reader/byte_view_array.rs +++ b/parquet/src/arrow/array_reader/byte_view_array.rs @@ -500,6 +500,9 @@ impl ByteViewArrayDecoderDictionary { // then the base_buffer_idx is 5 - 2 = 3 let base_buffer_idx = output.buffers.len() as u32 - dict.buffers.len() as u32; + // Pre-reserve output capacity to avoid per-chunk reallocation in extend + output.views.reserve(len); + let mut error = None; let read = self.decoder.read(len, |keys| { if base_buffer_idx == 0 { From 70445c54ff510e1b9f8d6e6782b92a1ce45fbb28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Sun, 22 Mar 2026 15:48:05 +0100 Subject: [PATCH 67/80] Add `quoted_strings` to `FormatOptions` (#9221) # Rationale for this change In some cases, it is desirable to print strings with surrounding quotation marks. A typical example that we run into in https://github.com/rerun-io/rerun is a `StructArray` that contains empty strings: Current formatting: ```text {name: } ``` Added option in this PR: ```text {name: ""} ``` # What changes are included in this PR? This PR relies on `std::fmt::Debug` to do the actual formatting of strings, which means that all escaping is handled out of the box. # Are these changes tested? This PR contains test for different types of inputs, including escape sequences. Additionally, it also tests the `StructArray` example outlined above. # Are there any user-facing changes? By default this option is false, making the feature opt-in. --------- Co-authored-by: Andrew Lamb --- arrow-cast/src/display.rs | 55 ++++++++++++-- arrow-cast/src/pretty.rs | 154 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 203 insertions(+), 6 deletions(-) diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs index 59dfa26c93cb..0460c0c96b55 100644 --- a/arrow-cast/src/display.rs +++ b/arrow-cast/src/display.rs @@ -80,6 +80,8 @@ pub struct FormatOptions<'a> { duration_format: DurationFormat, /// Show types in visual representation batches types_info: bool, + /// Whether string values should be quoted + quoted_strings: bool, /// Formatter factory used to instantiate custom [`ArrayFormatter`]s. This allows users to /// provide custom formatters. formatter_factory: Option<&'a dyn ArrayFormatterFactory>, @@ -102,6 +104,7 @@ impl PartialEq for FormatOptions<'_> { && self.time_format == other.time_format && self.duration_format == other.duration_format && self.types_info == other.types_info + && self.quoted_strings == other.quoted_strings && match (self.formatter_factory, other.formatter_factory) { (Some(f1), Some(f2)) => std::ptr::eq(f1, f2), (None, None) => true, @@ -123,6 +126,7 @@ impl Hash for FormatOptions<'_> { self.time_format.hash(state); self.duration_format.hash(state); self.types_info.hash(state); + self.quoted_strings.hash(state); self.formatter_factory .map(|f| f as *const dyn ArrayFormatterFactory) .hash(state); @@ -142,6 +146,7 @@ impl<'a> FormatOptions<'a> { time_format: None, duration_format: DurationFormat::ISO8601, types_info: false, + quoted_strings: false, formatter_factory: None, } } @@ -217,6 +222,17 @@ impl<'a> FormatOptions<'a> { Self { types_info, ..self } } + /// Sets whether string values should be quoted + /// + /// When `true`, strings are formatted using [`Debug`]-style with double quotes and escaping. + /// Defaults to `false` + pub const fn with_quoted_strings(self, quoted_strings: bool) -> Self { + Self { + quoted_strings, + ..self + } + } + /// Overrides the [`ArrayFormatterFactory`] used to instantiate custom [`ArrayFormatter`]s. /// /// Using [`None`] causes pretty-printers to use the default [`ArrayFormatter`]s. @@ -276,6 +292,11 @@ impl<'a> FormatOptions<'a> { self.types_info } + /// Returns whether string values should be quoted. + pub const fn quoted_strings(&self) -> bool { + self.quoted_strings + } + /// Returns the [`ArrayFormatterFactory`] used to instantiate custom [`ArrayFormatter`]s. pub const fn formatter_factory(&self) -> Option<&'a dyn ArrayFormatterFactory> { self.formatter_factory @@ -1081,16 +1102,38 @@ impl Display for MillisecondsFormatter<'_> { } } -impl DisplayIndex for &GenericStringArray { - fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { - write!(f, "{}", self.value(idx))?; +impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericStringArray { + type State = bool; + + fn prepare(&self, options: &FormatOptions<'a>) -> Result { + Ok(options.quoted_strings()) + } + + fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { + let value = self.value(idx); + if *state { + write!(f, "{:?}", value)?; + } else { + write!(f, "{}", value)?; + } Ok(()) } } -impl DisplayIndex for &StringViewArray { - fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { - write!(f, "{}", self.value(idx))?; +impl<'a> DisplayIndexState<'a> for &'a StringViewArray { + type State = bool; + + fn prepare(&self, options: &FormatOptions<'a>) -> Result { + Ok(options.quoted_strings()) + } + + fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult { + let value = self.value(idx); + if *state { + write!(f, "{:?}", value)?; + } else { + write!(f, "{}", value)?; + } Ok(()) } } diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index e63147cd09c1..61ce5598992d 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -1665,4 +1665,158 @@ mod tests { .unwrap(); insta::assert_snapshot!(error, @"Invalid argument error: Expected the same number of columns in a record batch (1) as the number of fields (2) in the schema"); } + + #[test] + fn test_quoted_strings() { + let schema = Arc::new(Schema::new(vec![Field::new( + "strings", + DataType::Utf8, + true, + )])); + + let string_array = StringArray::from(vec![ + Some("hello"), + Some("world"), + Some(""), + Some("tab\there"), + Some("newline\ntest"), + Some("quote\"test"), + Some("backslash\\test"), + None, + ]); + + let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(string_array)]).unwrap(); + + let options_none = FormatOptions::new().with_null("NULL"); + let table = pretty_format_batches_with_options(std::slice::from_ref(&batch), &options_none) + .unwrap() + .to_string(); + + insta::assert_snapshot!(table, @" + +----------------+ + | strings | + +----------------+ + | hello | + | world | + | | + | tab here | + | newline | + | test | + | quote\"test | + | backslash\\test | + | NULL | + +----------------+ + "); + + let options_quoted = FormatOptions::new() + .with_null("NULL") + .with_quoted_strings(true); + + let table = pretty_format_batches_with_options(&[batch], &options_quoted) + .unwrap() + .to_string(); + + insta::assert_snapshot!(table, @r#" + +-------------------+ + | strings | + +-------------------+ + | "hello" | + | "world" | + | "" | + | "tab\there" | + | "newline\ntest" | + | "quote\"test" | + | "backslash\\test" | + | NULL | + +-------------------+ + "#); + } + + #[test] + fn test_string_view_quoted() { + let schema = Arc::new(Schema::new(vec![Field::new( + "view_strings", + DataType::Utf8View, + true, + )])); + + let mut builder = StringViewBuilder::new(); + builder.append_value("hello"); + builder.append_null(); + builder.append_value("quote\"test"); + + let array: ArrayRef = Arc::new(builder.finish()); + let batch = RecordBatch::try_new(schema, vec![array]).unwrap(); + + let options = FormatOptions::new().with_quoted_strings(true); + + let table = pretty_format_batches_with_options(&[batch], &options) + .unwrap() + .to_string(); + + insta::assert_snapshot!(table, @" + +---------------+ + | view_strings | + +---------------+ + | \"hello\" | + | | + | \"quote\\\"test\" | + +---------------+ + "); + } + + #[test] + fn test_quoted_strings_in_struct() { + let string_builder = StringBuilder::new(); + let mut name_builder = string_builder; + name_builder.append_value("Alice"); + name_builder.append_value(""); + name_builder.append_value("Bob"); + + let fields = vec![Field::new("name", DataType::Utf8, false)]; + let mut struct_builder = StructBuilder::new(fields, vec![Box::new(name_builder)]); + struct_builder.append(true); + struct_builder.append(true); + struct_builder.append(true); + + let struct_array = struct_builder.finish(); + + let schema = Arc::new(Schema::new(vec![Field::new( + "person", + struct_array.data_type().clone(), + false, + )])); + + let batch = RecordBatch::try_new(schema, vec![Arc::new(struct_array)]).unwrap(); + + let options_none = FormatOptions::new(); + let table = pretty_format_batches_with_options(std::slice::from_ref(&batch), &options_none) + .unwrap() + .to_string(); + + insta::assert_snapshot!(table, @" + +---------------+ + | person | + +---------------+ + | {name: Alice} | + | {name: } | + | {name: Bob} | + +---------------+ + "); + + let options_quoted = FormatOptions::new().with_quoted_strings(true); + let table = pretty_format_batches_with_options(&[batch], &options_quoted) + .unwrap() + .to_string(); + + insta::assert_snapshot!(table, @" + +-----------------+ + | person | + +-----------------+ + | {name: \"Alice\"} | + | {name: \"\"} | + | {name: \"Bob\"} | + +-----------------+ + "); + } } From 980ea0b36c79a9e996efd90ad5f24571f0f9c0e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Tue, 24 Mar 2026 14:03:23 +0100 Subject: [PATCH 68/80] Reduce per-byte overhead in VLQ integer decoding (#9584) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Which issue does this PR close? Closes #9580 ## Rationale The current VLQ decoder calls `get_aligned` for each byte, which involves repeated offset calculations and bounds checks in the hot loop. ## What changes are included in this PR? Align to the byte boundary once, then iterate directly over the buffer slice, avoiding per-byte overhead from `get_aligned`. ## Are there any user-facing changes? No. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 (1M context) --- parquet/src/util/bit_util.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs index 3a26603fabc4..262564825864 100644 --- a/parquet/src/util/bit_util.rs +++ b/parquet/src/util/bit_util.rs @@ -659,9 +659,15 @@ impl BitReader { /// /// Returns `None` if there's not enough bytes in the stream. `Some` otherwise. pub fn get_vlq_int(&mut self) -> Option { + // Align to byte boundary once, then read bytes directly + self.byte_offset = self.get_byte_offset(); + self.bit_offset = 0; + + let buf = &self.buffer[self.byte_offset..]; let mut shift = 0; let mut v: i64 = 0; - while let Some(byte) = self.get_aligned::(1) { + + for (i, &byte) in buf.iter().enumerate() { v |= ((byte & 0x7F) as i64) << shift; shift += 7; assert!( @@ -669,6 +675,7 @@ impl BitReader { "Num of bytes exceed MAX_VLQ_BYTE_LEN ({MAX_VLQ_BYTE_LEN})" ); if byte & 0x80 == 0 { + self.byte_offset += i + 1; return Some(v); } } From 398962ec67bc777eca1c635c8ef01a9c634530eb Mon Sep 17 00:00:00 2001 From: Mikhail Zabaluev Date: Wed, 25 Mar 2026 20:05:53 +0200 Subject: [PATCH 69/80] deps: fix `object_store` breakage for 0.13.2 (#9612) # Rationale for this change The `object_store` crate release 0.13.2 breaks the build of parquet because it feature-gates the `buffered` module. I have filed https://github.com/apache/arrow-rs-object-store/issues/677 about the breakage; meanwhile this fix is made in expectation that 0.13.2 will not be yanked and the feature gate will remain. # What changes are included in this PR? Bump the version to 0.13.2 and requesting the "tokio" feature. # Are these changes tested? The build should succeed in CI workflows. # Are there any user-facing changes? No Co-authored-by: Mikhail Zabaluev --- Cargo.toml | 6 ++++-- arrow-avro/Cargo.toml | 4 ++-- parquet/Cargo.toml | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 65043fb60ff4..4ca4b068f65a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -108,12 +108,14 @@ parquet-variant-compute = { version = "58.1.0", path = "./parquet-variant-comput chrono = { version = "0.4.40", default-features = false, features = ["clock"] } -simdutf8 = { version = "0.1.5", default-features = false } - criterion = { version = "0.8.0", default-features = false } insta = { version = "1.46.3", default-features = false } +object_store = { version = "0.13.2", default-features = false } + +simdutf8 = { version = "0.1.5", default-features = false } + # release inherited profile keeping debug information and symbols # for mem/cpu profiling [profile.profiling] diff --git a/arrow-avro/Cargo.toml b/arrow-avro/Cargo.toml index 93eb825f9a7b..7f17c98431ea 100644 --- a/arrow-avro/Cargo.toml +++ b/arrow-avro/Cargo.toml @@ -56,7 +56,7 @@ arrow-buffer = { workspace = true } arrow-array = { workspace = true } arrow-select = { workspace = true, optional = true } -object_store = { version = "0.13", default-features = false, optional = true } +object_store = { workspace = true, optional = true } bytes = { version = "1.11.0", default-features = false, features = ["std"] } serde_json = { version = "1.0", default-features = false, features = ["std"] } @@ -93,7 +93,7 @@ futures = "0.3.31" async-stream = "0.3.6" apache-avro = "0.21.0" num-bigint = "0.4" -object_store = { version = "0.13", default-features = false, features = ["fs"] } +object_store = { workspace = true, features = ["fs"] } once_cell = "1.21.3" half = { version = "2.1", default-features = false } tokio = { version = "1.0", default-features = false, features = ["macros", "rt-multi-thread", "io-util", "fs"] } diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 4be779302448..efcd1fe2190b 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -49,7 +49,7 @@ parquet-variant = { workspace = true, optional = true } parquet-variant-json = { workspace = true, optional = true } parquet-variant-compute = { workspace = true, optional = true } -object_store = { version = "0.13.1", default-features = false, optional = true } +object_store = { workspace = true, optional = true, features = ["tokio"] } bytes = { version = "1.1", default-features = false, features = ["std"] } thrift = { version = "0.17", default-features = false } @@ -93,7 +93,7 @@ arrow = { workspace = true, features = ["ipc", "test_utils", "prettyprint", "jso arrow-cast = { workspace = true } tokio = { version = "1.0", default-features = false, features = ["macros", "rt-multi-thread", "io-util", "fs"] } rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } -object_store = { version = "0.13.1", default-features = false, features = ["azure", "fs"] } +object_store = { workspace = true, features = ["azure", "fs"] } sysinfo = { version = "0.38.1", default-features = false, features = ["system"] } [package.metadata.docs.rs] From f2512b5341ec66dcafe9de94ae382401ce5e8698 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Mar 2026 18:11:17 -0400 Subject: [PATCH 70/80] chore(deps): update sha2 requirement from 0.10 to 0.11 (#9618) Updates the requirements on [sha2](https://github.com/RustCrypto/hashes) to permit the latest version.
Commits

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- arrow-avro/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-avro/Cargo.toml b/arrow-avro/Cargo.toml index 7f17c98431ea..f46ef7e7b999 100644 --- a/arrow-avro/Cargo.toml +++ b/arrow-avro/Cargo.toml @@ -75,7 +75,7 @@ uuid = "1.17" indexmap = "2.10" rand = "0.9" md5 = { version = "0.8", optional = true } -sha2 = { version = "0.10", optional = true } +sha2 = { version = "0.11", optional = true } tokio = { version = "1.0", optional = true, default-features = false, features = ["macros", "rt", "io-util"] } [dev-dependencies] From 1f1c3a4cea6972ade7ff73a7765521c21a992e4f Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Thu, 26 Mar 2026 18:15:41 -0400 Subject: [PATCH 71/80] Support `ListView` codec in arrow-json (#9503) # Which issue does this PR close? - Closes #9340. # Rationale for this change # What changes are included in this PR? Support `ListView` codec in arrow-json. Using `ListLikeArray` trait to simplify implementation. # Are these changes tested? Tests added # Are there any user-facing changes? New encoder/decoder --- arrow-json/src/lib.rs | 66 +++++++++++++++++++----- arrow-json/src/reader/list_array.rs | 43 +++++++++++----- arrow-json/src/reader/mod.rs | 80 ++++++++++++++++++++++++++++- arrow-json/src/writer/encoder.rs | 79 ++++++++-------------------- arrow-json/src/writer/mod.rs | 48 +++++++++++++++++ 5 files changed, 228 insertions(+), 88 deletions(-) diff --git a/arrow-json/src/lib.rs b/arrow-json/src/lib.rs index 1b18e0094708..201c3cd80749 100644 --- a/arrow-json/src/lib.rs +++ b/arrow-json/src/lib.rs @@ -179,17 +179,17 @@ impl JsonSerializable for f64 { #[cfg(test)] mod tests { - use std::sync::Arc; - - use crate::writer::JsonArray; - use super::*; - + use crate::writer::JsonArray; + use crate::writer::LineDelimited; use arrow_array::{ - ArrayRef, GenericBinaryArray, GenericByteViewArray, RecordBatch, RecordBatchWriter, - builder::FixedSizeBinaryBuilder, types::BinaryViewType, + ArrayRef, GenericBinaryArray, GenericByteViewArray, GenericListViewArray, RecordBatch, + RecordBatchWriter, builder::FixedSizeBinaryBuilder, types::BinaryViewType, }; + use arrow_schema::{DataType, Field, Fields, Schema}; use serde_json::Value::{Bool, Number as VNumber, String as VString}; + use std::io::Cursor; + use std::sync::Arc; #[test] fn test_arrow_native_type_to_json() { @@ -216,13 +216,6 @@ mod tests { #[test] fn test_json_roundtrip_structs() { - use crate::writer::LineDelimited; - use arrow_schema::DataType; - use arrow_schema::Field; - use arrow_schema::Fields; - use arrow_schema::Schema; - use std::sync::Arc; - let schema = Arc::new(Schema::new(vec![ Field::new( "c1", @@ -352,4 +345,49 @@ mod tests { assert_eq!(batch, decoded); } + + fn assert_list_view_roundtrip() { + let flat_field = Arc::new(Field::new("item", DataType::Int32, true)); + let flat_dt = GenericListViewArray::::DATA_TYPE_CONSTRUCTOR(flat_field); + + let nested_inner = Arc::new(Field::new("item", DataType::Int32, false)); + let nested_inner_dt = GenericListViewArray::::DATA_TYPE_CONSTRUCTOR(nested_inner); + let nested_outer = Arc::new(Field::new("item", nested_inner_dt, true)); + let nested_dt = GenericListViewArray::::DATA_TYPE_CONSTRUCTOR(nested_outer); + + let schema = Arc::new(Schema::new(vec![ + Field::new("flat", flat_dt, true), + Field::new("nested", nested_dt, true), + ])); + + let input = r#"{"flat":[1,2,3],"nested":[[1,2],[3]]} +{"flat":[4,null]} +{} +{"flat":[6],"nested":[[4,5,6]]} +{"flat":[]} +"# + .as_bytes(); + + let batches: Vec = ReaderBuilder::new(schema.clone()) + .with_batch_size(1024) + .build(Cursor::new(input)) + .unwrap() + .collect::, _>>() + .unwrap(); + + let mut output = Vec::new(); + let mut writer = WriterBuilder::new().build::<_, LineDelimited>(&mut output); + for batch in &batches { + writer.write(batch).unwrap(); + } + writer.finish().unwrap(); + + assert_eq!(input, &output); + } + + #[test] + fn test_json_roundtrip_list_view() { + assert_list_view_roundtrip::(); + assert_list_view_roundtrip::(); + } } diff --git a/arrow-json/src/reader/list_array.rs b/arrow-json/src/reader/list_array.rs index d363b6be9780..ea23403c4b18 100644 --- a/arrow-json/src/reader/list_array.rs +++ b/arrow-json/src/reader/list_array.rs @@ -18,28 +18,33 @@ use crate::reader::tape::{Tape, TapeElement}; use crate::reader::{ArrayDecoder, DecoderContext}; use arrow_array::OffsetSizeTrait; -use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder}; -use arrow_buffer::buffer::NullBuffer; +use arrow_array::builder::BooleanBufferBuilder; +use arrow_buffer::{Buffer, buffer::NullBuffer}; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{ArrowError, DataType}; use std::marker::PhantomData; -pub struct ListArrayDecoder { +pub type ListArrayDecoder = ListLikeArrayDecoder; +pub type ListViewArrayDecoder = ListLikeArrayDecoder; + +pub struct ListLikeArrayDecoder { data_type: DataType, decoder: Box, phantom: PhantomData, is_nullable: bool, } -impl ListArrayDecoder { +impl ListLikeArrayDecoder { pub fn new( ctx: &DecoderContext, data_type: &DataType, is_nullable: bool, ) -> Result { - let field = match data_type { - DataType::List(f) if !O::IS_LARGE => f, - DataType::LargeList(f) if O::IS_LARGE => f, + let field = match (IS_VIEW, data_type) { + (false, DataType::List(f)) if !O::IS_LARGE => f, + (false, DataType::LargeList(f)) if O::IS_LARGE => f, + (true, DataType::ListView(f)) if !O::IS_LARGE => f, + (true, DataType::LargeListView(f)) if O::IS_LARGE => f, _ => unreachable!(), }; let decoder = ctx.make_decoder(field.data_type(), field.is_nullable())?; @@ -53,11 +58,11 @@ impl ListArrayDecoder { } } -impl ArrayDecoder for ListArrayDecoder { +impl ArrayDecoder for ListLikeArrayDecoder { fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result { let mut child_pos = Vec::with_capacity(pos.len()); - let mut offsets = BufferBuilder::::new(pos.len() + 1); - offsets.append(O::from_usize(0).unwrap()); + let mut offsets = Vec::with_capacity(pos.len() + 1); + offsets.push(O::from_usize(0).unwrap()); let mut nulls = self .is_nullable @@ -88,18 +93,30 @@ impl ArrayDecoder for ListArrayDecoder { let offset = O::from_usize(child_pos.len()).ok_or_else(|| { ArrowError::JsonError(format!("offset overflow decoding {}", self.data_type)) })?; - offsets.append(offset) + offsets.push(offset); } let child_data = self.decoder.decode(tape, &child_pos)?; let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish())); - let data = ArrayDataBuilder::new(self.data_type.clone()) + let mut data = ArrayDataBuilder::new(self.data_type.clone()) .len(pos.len()) .nulls(nulls) - .add_buffer(offsets.finish()) .child_data(vec![child_data]); + if IS_VIEW { + let mut sizes = Vec::with_capacity(offsets.len() - 1); + for i in 1..offsets.len() { + sizes.push(offsets[i] - offsets[i - 1]); + } + offsets.pop(); + data = data + .add_buffer(Buffer::from_vec(offsets)) + .add_buffer(Buffer::from_vec(sizes)); + } else { + data = data.add_buffer(Buffer::from_vec(offsets)); + } + // Safety // Validated lengths above Ok(unsafe { data.build_unchecked() }) diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs index 04271368a4aa..7039d3500ece 100644 --- a/arrow-json/src/reader/mod.rs +++ b/arrow-json/src/reader/mod.rs @@ -154,7 +154,7 @@ pub use value_iter::ValueIter; use crate::reader::boolean_array::BooleanArrayDecoder; use crate::reader::decimal_array::DecimalArrayDecoder; -use crate::reader::list_array::ListArrayDecoder; +use crate::reader::list_array::{ListArrayDecoder, ListViewArrayDecoder}; use crate::reader::map_array::MapArrayDecoder; use crate::reader::null_array::NullArrayDecoder; use crate::reader::primitive_array::PrimitiveArrayDecoder; @@ -792,6 +792,8 @@ fn make_decoder( DataType::LargeUtf8 => Ok(Box::new(StringArrayDecoder::::new(coerce_primitive))), DataType::List(_) => Ok(Box::new(ListArrayDecoder::::new(ctx, data_type, is_nullable)?)), DataType::LargeList(_) => Ok(Box::new(ListArrayDecoder::::new(ctx, data_type, is_nullable)?)), + DataType::ListView(_) => Ok(Box::new(ListViewArrayDecoder::::new(ctx, data_type, is_nullable)?)), + DataType::LargeListView(_) => Ok(Box::new(ListViewArrayDecoder::::new(ctx, data_type, is_nullable)?)), DataType::Struct(_) => Ok(Box::new(StructArrayDecoder::new(ctx, data_type, is_nullable)?)), DataType::Binary => Ok(Box::new(BinaryArrayDecoder::::default())), DataType::LargeBinary => Ok(Box::new(BinaryArrayDecoder::::default())), @@ -815,7 +817,10 @@ mod tests { use std::io::{BufReader, Cursor, Seek}; use arrow_array::cast::AsArray; - use arrow_array::{Array, BooleanArray, Float64Array, ListArray, StringArray, StringViewArray}; + use arrow_array::{ + Array, BooleanArray, Float64Array, GenericListViewArray, ListArray, OffsetSizeTrait, + StringArray, StringViewArray, + }; use arrow_buffer::{ArrowNativeType, Buffer}; use arrow_cast::display::{ArrayFormatter, FormatOptions}; use arrow_data::ArrayDataBuilder; @@ -2192,6 +2197,77 @@ mod tests { assert_eq!(read, expected); } + fn assert_read_list_view() { + let field = Arc::new(Field::new("item", DataType::Int32, true)); + let data_type = GenericListViewArray::::DATA_TYPE_CONSTRUCTOR(field.clone()); + let schema = Arc::new(Schema::new(vec![Field::new("lv", data_type, true)])); + + let buf = r#" + {"lv": [1, 2, 3]} + {"lv": [4, null]} + {"lv": null} + {"lv": [6]} + {"lv": []} + "#; + + let batches = do_read(buf, 1024, false, false, schema); + assert_eq!(batches.len(), 1); + let batch = &batches[0]; + let col = batch.column(0); + let list_view = col + .as_any() + .downcast_ref::>() + .unwrap(); + + assert_eq!(list_view.len(), 5); + + // Check offsets and sizes + let expected_offsets: Vec = vec![0, 3, 5, 5, 6] + .into_iter() + .map(|v| O::usize_as(v)) + .collect(); + let expected_sizes: Vec = vec![3, 2, 0, 1, 0] + .into_iter() + .map(|v| O::usize_as(v)) + .collect(); + assert_eq!(list_view.value_offsets(), &expected_offsets); + assert_eq!(list_view.value_sizes(), &expected_sizes); + + // Row 0: [1, 2, 3] + assert!(list_view.is_valid(0)); + let vals = list_view.value(0); + let ints = vals.as_primitive::(); + assert_eq!(ints.values(), &[1, 2, 3]); + + // Row 1: [4, null] + assert!(list_view.is_valid(1)); + let vals = list_view.value(1); + let ints = vals.as_primitive::(); + assert_eq!(ints.len(), 2); + assert_eq!(ints.value(0), 4); + assert!(ints.is_null(1)); + + // Row 2: null + assert!(list_view.is_null(2)); + + // Row 3: [6] + assert!(list_view.is_valid(3)); + let vals = list_view.value(3); + let ints = vals.as_primitive::(); + assert_eq!(ints.values(), &[6]); + + // Row 4: [] + assert!(list_view.is_valid(4)); + let vals = list_view.value(4); + assert_eq!(vals.len(), 0); + } + + #[test] + fn test_read_list_view() { + assert_read_list_view::(); + assert_read_list_view::(); + } + #[test] fn test_skip_empty_lines() { let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]); diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs index d7c3fbbe2e34..45055c5a36a8 100644 --- a/arrow-json/src/writer/encoder.rs +++ b/arrow-json/src/writer/encoder.rs @@ -352,15 +352,23 @@ pub fn make_encoder<'a>( } DataType::List(_) => { let array = array.as_list::(); - NullableEncoder::new(Box::new(ListEncoder::try_new(field, array, options)?), array.nulls().cloned()) + NullableEncoder::new(Box::new(ListLikeEncoder::try_new(field, array, options)?), array.nulls().cloned()) } DataType::LargeList(_) => { let array = array.as_list::(); - NullableEncoder::new(Box::new(ListEncoder::try_new(field, array, options)?), array.nulls().cloned()) + NullableEncoder::new(Box::new(ListLikeEncoder::try_new(field, array, options)?), array.nulls().cloned()) + } + DataType::ListView(_) => { + let array = array.as_list_view::(); + NullableEncoder::new(Box::new(ListLikeEncoder::try_new(field, array, options)?), array.nulls().cloned()) + } + DataType::LargeListView(_) => { + let array = array.as_list_view::(); + NullableEncoder::new(Box::new(ListLikeEncoder::try_new(field, array, options)?), array.nulls().cloned()) } DataType::FixedSizeList(_, _) => { let array = array.as_fixed_size_list(); - NullableEncoder::new(Box::new(FixedSizeListEncoder::try_new(field, array, options)?), array.nulls().cloned()) + NullableEncoder::new(Box::new(ListLikeEncoder::try_new(field, array, options)?), array.nulls().cloned()) } DataType::Dictionary(_, _) => downcast_dictionary_array! { @@ -639,77 +647,30 @@ impl Encoder for BinaryViewEncoder<'_> { } } -struct ListEncoder<'a, O: OffsetSizeTrait> { - offsets: OffsetBuffer, - encoder: NullableEncoder<'a>, -} - -impl<'a, O: OffsetSizeTrait> ListEncoder<'a, O> { - fn try_new( - field: &'a FieldRef, - array: &'a GenericListArray, - options: &'a EncoderOptions, - ) -> Result { - let encoder = make_encoder(field, array.values().as_ref(), options)?; - Ok(Self { - offsets: array.offsets().clone(), - encoder, - }) - } -} - -impl Encoder for ListEncoder<'_, O> { - fn encode(&mut self, idx: usize, out: &mut Vec) { - let end = self.offsets[idx + 1].as_usize(); - let start = self.offsets[idx].as_usize(); - out.push(b'['); - - if self.encoder.has_nulls() { - for idx in start..end { - if idx != start { - out.push(b',') - } - if self.encoder.is_null(idx) { - out.extend_from_slice(b"null"); - } else { - self.encoder.encode(idx, out); - } - } - } else { - for idx in start..end { - if idx != start { - out.push(b',') - } - self.encoder.encode(idx, out); - } - } - out.push(b']'); - } -} - -struct FixedSizeListEncoder<'a> { - value_length: usize, +struct ListLikeEncoder<'a, L: ListLikeArray> { + list_array: &'a L, encoder: NullableEncoder<'a>, } -impl<'a> FixedSizeListEncoder<'a> { +impl<'a, L: ListLikeArray> ListLikeEncoder<'a, L> { fn try_new( field: &'a FieldRef, - array: &'a FixedSizeListArray, + array: &'a L, options: &'a EncoderOptions, ) -> Result { let encoder = make_encoder(field, array.values().as_ref(), options)?; Ok(Self { + list_array: array, encoder, - value_length: array.value_length().as_usize(), }) } } -impl Encoder for FixedSizeListEncoder<'_> { +impl Encoder for ListLikeEncoder<'_, L> { fn encode(&mut self, idx: usize, out: &mut Vec) { - let start = idx * self.value_length; - let end = start + self.value_length; + let range = self.list_array.element_range(idx); + let start = range.start; + let end = range.end; out.push(b'['); if self.encoder.has_nulls() { for idx in start..end { diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs index 2fac5ab62353..04cc8c9e2a93 100644 --- a/arrow-json/src/writer/mod.rs +++ b/arrow-json/src/writer/mod.rs @@ -1241,6 +1241,54 @@ mod tests { ); } + fn assert_write_list_view() { + let field = Arc::new(Field::new("item", DataType::Int32, true)); + let data_type = GenericListViewArray::::DATA_TYPE_CONSTRUCTOR(field.clone()); + let schema = Schema::new(vec![Field::new("lv", data_type, true)]); + + // rows: [1, 2, 3], [4, null], null, [6] + let values = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4), None, Some(6)]); + let offsets = [0, 3, 0, 5] + .iter() + .map(|&v| O::from_usize(v).unwrap()) + .collect::>(); + let sizes = [3, 2, 0, 1] + .iter() + .map(|&v| O::from_usize(v).unwrap()) + .collect::>(); + let list_view = GenericListViewArray::::try_new( + field, + ScalarBuffer::from(offsets), + ScalarBuffer::from(sizes), + Arc::new(values), + Some(NullBuffer::from_iter([true, true, false, true])), + ) + .unwrap(); + + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(list_view)]).unwrap(); + + let mut buf = Vec::new(); + { + let mut writer = LineDelimitedWriter::new(&mut buf); + writer.write_batches(&[&batch]).unwrap(); + } + + assert_json_eq( + &buf, + r#"{"lv":[1,2,3]} +{"lv":[4,null]} +{} +{"lv":[6]} +"#, + ); + } + + #[test] + fn write_list_view() { + assert_write_list_view::(); + assert_write_list_view::(); + } + fn test_write_for_file(test_file: &str, remove_nulls: bool) { let file = File::open(test_file).unwrap(); let mut reader = BufReader::new(file); From 7f307c031f31a691be566f5e20171455c41dd661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 26 Mar 2026 23:17:50 +0100 Subject: [PATCH 72/80] fix: Stop using https://dist.apache.org/repos/dist/dev/arrow/KEYS for verification (#9604) # Which issue does this PR close? - Closes #9603 # Rationale for this change The release and dev KEYS files could get out of synch. We should use the release/ version: - Users use the release/ version not dev/ version when they verify our artifacts' signature - https://dist.apache.org/ may reject our request when we request many times by CI # What changes are included in this PR? Use `https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/KEYS` to download the KEYS file and the expected `https://dist.apache.org/repos/dist/dev/arrow` for the RC artifacts. # Are these changes tested? Yes, I've verified 58.1.0 1 both previous to the change and after the change. # Are there any user-facing changes? No --- dev/release/verify-release-candidate.sh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 2629d362aaff..d8b888effef1 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -33,23 +33,27 @@ set -o pipefail SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" ARROW_DIR="$(dirname $(dirname ${SOURCE_DIR}))" -ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow' +ARROW_RC_URL="https://dist.apache.org/repos/dist/dev/arrow" +ARROW_KEYS_URL="https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/KEYS" -download_dist_file() { +download_file() { curl \ --silent \ --show-error \ --fail \ --location \ - --remote-name $ARROW_DIST_URL/$1 + --output "$2" \ + "$1" } download_rc_file() { - download_dist_file apache-arrow-rs-${VERSION}-rc${RC_NUMBER}/$1 + download_file \ + "${ARROW_RC_URL}/apache-arrow-rs-${VERSION}-rc${RC_NUMBER}/$1" \ + "$1" } import_gpg_keys() { - download_dist_file KEYS + download_file "${ARROW_KEYS_URL}" KEYS gpg --import KEYS } From c194e54dc4e8fcd8b9333eea2528d5db1c1ba912 Mon Sep 17 00:00:00 2001 From: Konstantin Tarasov <33369833+sdf-jkl@users.noreply.github.com> Date: Thu, 26 Mar 2026 18:18:42 -0400 Subject: [PATCH 73/80] [Variant] Add unshredded `Struct` fast-path for `variant_get(..., Struct)` (#9597) # Which issue does this PR close? - Closes #9596. # Rationale for this change Check issue # What changes are included in this PR? Reuse `shred_basic_variant` as a fast path for unshredded `Struct` handling in `variant_get(..., Struct)` # Are these changes tested? Yes, added two unit tests to establish safe mode behavior. # Are there any user-facing changes? --- parquet-variant-compute/src/variant_get.rs | 90 +++++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 9204dcf70856..3e9892cacf70 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -213,9 +213,20 @@ fn shredded_get_path( return Ok(shredded); } - // Structs are special. Recurse into each field separately, hoping to follow the shredding even - // further, and build up the final struct from those individually shredded results. + // Structs are special. + // + // For fully unshredded targets (`typed_value` absent), delegate to the row builder so we + // preserve struct-level cast semantics: + // - safe mode: non-object rows become NULL structs + // - strict mode: non-object rows raise a cast error + // + // For shredded/partially-shredded targets (`typed_value` present), recurse into each field + // separately to take advantage of deeper shredding in child fields. if let DataType::Struct(fields) = as_field.data_type() { + if target.typed_value_field().is_none() { + return shred_basic_variant(target, VariantPath::default(), Some(as_field)); + } + let children = fields .iter() .map(|field| { @@ -3111,6 +3122,81 @@ mod test { assert_eq!(inner_values.value(1), 100); } + #[test] + fn test_unshredded_struct_safe_cast_non_object_rows_are_null() { + let json_strings = vec![r#"{"a": 1, "b": 2}"#, "123", "{}"]; + let string_array: Arc = Arc::new(StringArray::from(json_strings)); + let variant_array_ref = ArrayRef::from(json_to_variant(&string_array).unwrap()); + + let struct_fields = Fields::from(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + ]); + let options = GetOptions { + path: VariantPath::default(), + as_type: Some(Arc::new(Field::new( + "result", + DataType::Struct(struct_fields), + true, + ))), + cast_options: CastOptions::default(), + }; + + let result = variant_get(&variant_array_ref, options).unwrap(); + let struct_result = result.as_struct(); + let field_a = struct_result + .column(0) + .as_primitive::(); + let field_b = struct_result + .column(1) + .as_primitive::(); + + // Row 0 is an object, so the struct row is valid with extracted fields. + assert!(!struct_result.is_null(0)); + assert_eq!(field_a.value(0), 1); + assert_eq!(field_b.value(0), 2); + + // Row 1 is a scalar, so safe struct cast should produce a NULL struct row. + assert!(struct_result.is_null(1)); + assert!(field_a.is_null(1)); + assert!(field_b.is_null(1)); + + // Row 2 is an empty object, so the struct row is valid with missing fields as NULL. + assert!(!struct_result.is_null(2)); + assert!(field_a.is_null(2)); + assert!(field_b.is_null(2)); + } + + #[test] + fn test_unshredded_struct_strict_cast_non_object_errors() { + let json_strings = vec![r#"{"a": 1, "b": 2}"#, "123"]; + let string_array: Arc = Arc::new(StringArray::from(json_strings)); + let variant_array_ref = ArrayRef::from(json_to_variant(&string_array).unwrap()); + + let struct_fields = Fields::from(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + ]); + let options = GetOptions { + path: VariantPath::default(), + as_type: Some(Arc::new(Field::new( + "result", + DataType::Struct(struct_fields), + true, + ))), + cast_options: CastOptions { + safe: false, + ..Default::default() + }, + }; + + let err = variant_get(&variant_array_ref, options).unwrap_err(); + assert!( + err.to_string() + .contains("Failed to extract struct from variant") + ); + } + /// Create comprehensive shredded variant with diverse null patterns and empty objects /// Rows: normal values, top-level null, missing field a, missing field b, empty object fn create_comprehensive_shredded_variant() -> ArrayRef { From aa9432c8833f5701085e8b933b30560d21df9f80 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 29 Mar 2026 23:11:05 -0700 Subject: [PATCH 74/80] Fix `extend_nulls` panic for UnionArray (#9607) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Fix `MutableArrayData::extend_nulls` which previously panicked unconditionally for both sparse and dense Union arrays - For sparse unions: append the first type_id and extend nulls in all children - For dense unions: append the first type_id, compute offsets into the first child, and extend nulls in that child only ## Background This bug was discovered via DataFusion. `CaseExpr` uses `MutableArrayData` via `scatter()` to build result arrays. When a `CASE` expression returns a Union type (e.g., from `json_get` which returns a JSON union) and there are rows where no `WHEN` branch matches (implicit `ELSE NULL`), `scatter` calls `extend_nulls` which panics with "cannot call extend_nulls on UnionArray as cannot infer type". Any query like: ```sql SELECT CASE WHEN condition THEN returns_union(col, 'key') END FROM table ``` would panic if `condition` is false for any row. ## Root Cause The `extend_nulls` implementation for Union arrays unconditionally panicked because it claimed it "cannot infer type". However, the Union's field definitions (child types and type IDs) are available in the `MutableArrayData`'s data type — there's enough information to produce valid null entries by picking the first declared type_id. ## Test plan - [x] Added test for sparse union `extend_nulls` - [x] Added test for dense union `extend_nulls` - [x] Existing `test_union_dense` continues to pass - [x] All `array_transform` tests pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 (1M context) Co-authored-by: Jeffrey Vo --- arrow-data/src/transform/mod.rs | 4 +- arrow-data/src/transform/union.rs | 41 ++++++++++++-- arrow/tests/array_transform.rs | 88 +++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 6 deletions(-) diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs index c6052817bfb6..66f6603f02fc 100644 --- a/arrow-data/src/transform/mod.rs +++ b/arrow-data/src/transform/mod.rs @@ -813,8 +813,8 @@ impl<'a> MutableArrayData<'a> { }; let nulls = match data.data_type { - // RunEndEncoded and Null arrays cannot have top-level null bitmasks - DataType::RunEndEncoded(_, _) | DataType::Null => None, + // RunEndEncoded, Null, and Union arrays cannot have top-level null bitmasks + DataType::RunEndEncoded(_, _) | DataType::Null | DataType::Union(_, _) => None, _ => data .null_buffer .map(|nulls| { diff --git a/arrow-data/src/transform/union.rs b/arrow-data/src/transform/union.rs index f6f291e3f05d..d1301249d326 100644 --- a/arrow-data/src/transform/union.rs +++ b/arrow-data/src/transform/union.rs @@ -17,6 +17,7 @@ use super::{_MutableArrayData, Extend}; use crate::ArrayData; +use arrow_schema::DataType; pub(super) fn build_extend_sparse(array: &ArrayData) -> Extend<'_> { let type_ids = array.buffer::(0); @@ -68,10 +69,42 @@ pub(super) fn build_extend_dense(array: &ArrayData) -> Extend<'_> { ) } -pub(super) fn extend_nulls_dense(_mutable: &mut _MutableArrayData, _len: usize) { - panic!("cannot call extend_nulls on UnionArray as cannot infer type"); +pub(super) fn extend_nulls_dense(mutable: &mut _MutableArrayData, len: usize) { + let DataType::Union(fields, _) = &mutable.data_type else { + unreachable!() + }; + let first_type_id = fields + .iter() + .next() + .expect("union must have at least one field") + .0; + + // Extend type_ids buffer + mutable.buffer1.extend_from_slice(&vec![first_type_id; len]); + + // Dense: extend offsets pointing into the first child, then extend nulls in that child + let child_offset = mutable.child_data[0].len(); + let (start, end) = (child_offset as i32, (child_offset + len) as i32); + mutable.buffer2.extend(start..end); + mutable.child_data[0].extend_nulls(len); } -pub(super) fn extend_nulls_sparse(_mutable: &mut _MutableArrayData, _len: usize) { - panic!("cannot call extend_nulls on UnionArray as cannot infer type"); +pub(super) fn extend_nulls_sparse(mutable: &mut _MutableArrayData, len: usize) { + let DataType::Union(fields, _) = &mutable.data_type else { + unreachable!() + }; + let first_type_id = fields + .iter() + .next() + .expect("union must have at least one field") + .0; + + // Extend type_ids buffer + mutable.buffer1.extend_from_slice(&vec![first_type_id; len]); + + // Sparse: extend nulls in ALL children + mutable + .child_data + .iter_mut() + .for_each(|child| child.extend_nulls(len)); } diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs index 511dc1e8bfcd..c24d0992a473 100644 --- a/arrow/tests/array_transform.rs +++ b/arrow/tests/array_transform.rs @@ -1151,3 +1151,91 @@ fn test_fixed_size_list_append() { .unwrap(); assert_eq!(finished, expected_fixed_size_list_data); } + +#[test] +fn test_extend_nulls_sparse_union() { + let fields = UnionFields::try_new( + vec![0, 1], + vec![ + Field::new("null", DataType::Null, true), + Field::new("str", DataType::Utf8, true), + ], + ) + .unwrap(); + + let type_ids = ScalarBuffer::from(vec![1i8]); + let child_null = Arc::new(NullArray::new(1)) as ArrayRef; + let child_str = Arc::new(StringArray::from(vec![Some("hello")])) as ArrayRef; + let union_array = UnionArray::try_new( + fields.clone(), + type_ids, + None, // sparse + vec![child_null, child_str], + ) + .unwrap(); + + let data = union_array.to_data(); + let mut mutable = MutableArrayData::new(vec![&data], true, 4); + mutable.extend(0, 0, 1); // copy the first element + mutable.extend_nulls(2); // add two nulls + let result = mutable.freeze(); + + // Union arrays must not have a null bitmap per Arrow spec + assert!(result.nulls().is_none()); + + let result_array = UnionArray::from(result); + assert_eq!(result_array.len(), 3); + // First element should be type_id 1 (str) + assert_eq!(result_array.type_id(0), 1); + // Null elements use the first type_id (0) + assert_eq!(result_array.type_id(1), 0); + assert_eq!(result_array.type_id(2), 0); + // All children should have length 3 (sparse invariant) + assert_eq!(result_array.child(0).len(), 3); + assert_eq!(result_array.child(1).len(), 3); +} + +#[test] +fn test_extend_nulls_dense_union() { + let fields = UnionFields::try_new( + vec![0, 1], + vec![ + Field::new("i", DataType::Int32, true), + Field::new("str", DataType::Utf8, true), + ], + ) + .unwrap(); + + let type_ids = ScalarBuffer::from(vec![1i8]); + let offsets = ScalarBuffer::from(vec![0i32]); + let child_int = Arc::new(Int32Array::new_null(0)) as ArrayRef; + let child_str = Arc::new(StringArray::from(vec![Some("hello")])) as ArrayRef; + let union_array = UnionArray::try_new( + fields.clone(), + type_ids, + Some(offsets), + vec![child_int, child_str], + ) + .unwrap(); + + let data = union_array.to_data(); + let mut mutable = MutableArrayData::new(vec![&data], true, 4); + mutable.extend(0, 0, 1); // copy the first element + mutable.extend_nulls(2); // add two nulls + let result = mutable.freeze(); + + // Union arrays must not have a null bitmap per Arrow spec + assert!(result.nulls().is_none()); + + let result_array = UnionArray::from(result); + assert_eq!(result_array.len(), 3); + // First element is type_id 1 (str) + assert_eq!(result_array.type_id(0), 1); + // Null elements use the first type_id (0) + assert_eq!(result_array.type_id(1), 0); + assert_eq!(result_array.type_id(2), 0); + // First child (int) should have 2 null entries from extend_nulls + assert_eq!(result_array.child(0).len(), 2); + // Second child (str) should have 1 entry from extend + assert_eq!(result_array.child(1).len(), 1); +} From 77e4d05fe0f199ccfaad578e58278329534a9c3d Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Tue, 31 Mar 2026 09:59:46 -0400 Subject: [PATCH 75/80] [Json] Add json reader benchmarks for Map and REE (#9616) # Which issue does this PR close? - Relates to #9497. # Rationale for this change # What changes are included in this PR? As part of the effort to move the Json reader away from `ArrayData` toward typed `ArrayRef` APIs, it's necessary to change the `ArrayDecoder::decode` interface to return `ArrayRef` directly and updates all decoder implementations (list, struct, map, run-end encoded) to construct typed arrays without intermediate `ArrayData` round-trips. New benchmarks for map and run-end encoded decoding are added to verify there is no performance regression. # Are these changes tested? Yes # Are there any user-facing changes? No --- arrow-json/benches/json_reader.rs | 195 ++++++++++++++++++++++++++++-- 1 file changed, 184 insertions(+), 11 deletions(-) diff --git a/arrow-json/benches/json_reader.rs b/arrow-json/benches/json_reader.rs index fccac68d9bfa..9d0dd8e9a108 100644 --- a/arrow-json/benches/json_reader.rs +++ b/arrow-json/benches/json_reader.rs @@ -28,14 +28,28 @@ use std::fmt::Write; use std::hint::black_box; use std::sync::Arc; +// Shared const ROWS: usize = 1 << 17; // 128K rows const BATCH_SIZE: usize = 1 << 13; // 8K rows per batch +// Wide object / struct const WIDE_FIELDS: usize = 64; -const BINARY_BYTES: usize = 64; const WIDE_PROJECTION_TOTAL_FIELDS: usize = 100; // 100 fields total, select only 3 -const LIST_SHORT_ELEMENTS: usize = 5; -const LIST_LONG_ELEMENTS: usize = 100; + +// Binary +const BINARY_BYTES: usize = 64; + +// List +const SHORT_LIST_ELEMENTS: usize = 5; +const LONG_LIST_ELEMENTS: usize = 100; + +// Map +const SMALL_MAP_ENTRIES: usize = 5; +const LARGE_MAP_ENTRIES: usize = 50; + +// Run-end encoded +const SHORT_REE_RUN_LENGTH: usize = 2; +const LONG_REE_RUN_LENGTH: usize = 100; fn decode_and_flush(decoder: &mut Decoder, data: &[u8]) { let mut offset = 0; @@ -289,19 +303,174 @@ fn bench_decode_list(c: &mut Criterion) { let schema = build_list_schema(); // Short lists: tests list handling overhead (few elements per row) - let short_data = build_list_json(ROWS, LIST_SHORT_ELEMENTS); - bench_decode_schema(c, "decode_list_short_i64_json", &short_data, schema.clone()); + let short_data = build_list_json(ROWS, SHORT_LIST_ELEMENTS); + bench_decode_schema(c, "decode_short_list_i64_json", &short_data, schema.clone()); // Long lists: tests child element decode throughput (many elements per row) - let long_data = build_list_json(ROWS, LIST_LONG_ELEMENTS); - bench_decode_schema(c, "decode_list_long_i64_json", &long_data, schema); + let long_data = build_list_json(ROWS, LONG_LIST_ELEMENTS); + bench_decode_schema(c, "decode_long_list_i64_json", &long_data, schema); } fn bench_serialize_list(c: &mut Criterion) { let schema = build_list_schema(); - let short_values = build_list_values(ROWS, LIST_SHORT_ELEMENTS); - c.bench_function("decode_list_short_i64_serialize", |b| { + let short_values = build_list_values(ROWS, SHORT_LIST_ELEMENTS); + c.bench_function("decode_short_list_i64_serialize", |b| { + b.iter(|| { + let mut decoder = ReaderBuilder::new(schema.clone()) + .with_batch_size(BATCH_SIZE) + .build_decoder() + .unwrap(); + decoder.serialize(&short_values).unwrap(); + while let Some(_batch) = decoder.flush().unwrap() {} + }) + }); + + let long_values = build_list_values(ROWS, LONG_LIST_ELEMENTS); + c.bench_function("decode_long_list_i64_serialize", |b| { + b.iter(|| { + let mut decoder = ReaderBuilder::new(schema.clone()) + .with_batch_size(BATCH_SIZE) + .build_decoder() + .unwrap(); + decoder.serialize(&long_values).unwrap(); + while let Some(_batch) = decoder.flush().unwrap() {} + }) + }); +} + +fn build_map_json(rows: usize, entries: usize) -> Vec { + let mut out = String::with_capacity(rows * (entries * 20 + 16)); + for row in 0..rows { + out.push_str("{\"map\":{"); + for i in 0..entries { + if i > 0 { + out.push(','); + } + write!(&mut out, "\"k{}\":{}", i, (row + i) as i64).unwrap(); + } + out.push_str("}}\n"); + } + out.into_bytes() +} + +fn build_map_values(rows: usize, entries: usize) -> Vec { + let mut out = Vec::with_capacity(rows); + for row in 0..rows { + let mut inner = Map::with_capacity(entries); + for i in 0..entries { + inner.insert( + format!("k{i}"), + Value::Number(Number::from((row + i) as i64)), + ); + } + let mut map = Map::with_capacity(1); + map.insert("map".to_string(), Value::Object(inner)); + out.push(Value::Object(map)); + } + out +} + +fn build_map_schema() -> Arc { + let entries_field = Arc::new(Field::new( + "entries", + DataType::Struct( + vec![ + Field::new("keys", DataType::Utf8, false), + Field::new("values", DataType::Int64, true), + ] + .into(), + ), + false, + )); + Arc::new(Schema::new(vec![Field::new( + "map", + DataType::Map(entries_field, false), + false, + )])) +} + +fn bench_decode_map(c: &mut Criterion) { + let schema = build_map_schema(); + + let small_data = build_map_json(ROWS, SMALL_MAP_ENTRIES); + bench_decode_schema(c, "decode_small_map_json", &small_data, schema.clone()); + + let large_data = build_map_json(ROWS, LARGE_MAP_ENTRIES); + bench_decode_schema(c, "decode_large_map_json", &large_data, schema); +} + +fn bench_serialize_map(c: &mut Criterion) { + let schema = build_map_schema(); + + let small_values = build_map_values(ROWS, SMALL_MAP_ENTRIES); + c.bench_function("decode_small_map_serialize", |b| { + b.iter(|| { + let mut decoder = ReaderBuilder::new(schema.clone()) + .with_batch_size(BATCH_SIZE) + .build_decoder() + .unwrap(); + decoder.serialize(&small_values).unwrap(); + while let Some(_batch) = decoder.flush().unwrap() {} + }) + }); + + let large_values = build_map_values(ROWS, LARGE_MAP_ENTRIES); + c.bench_function("decode_large_map_serialize", |b| { + b.iter(|| { + let mut decoder = ReaderBuilder::new(schema.clone()) + .with_batch_size(BATCH_SIZE) + .build_decoder() + .unwrap(); + decoder.serialize(&large_values).unwrap(); + while let Some(_batch) = decoder.flush().unwrap() {} + }) + }); +} + +fn build_ree_json(rows: usize, run_length: usize) -> Vec { + let mut out = String::with_capacity(rows * 24); + for row in 0..rows { + let value = (row / run_length) as i64; + writeln!(&mut out, "{{\"val\":{value}}}").unwrap(); + } + out.into_bytes() +} + +fn build_ree_values(rows: usize, run_length: usize) -> Vec { + let mut out = Vec::with_capacity(rows); + for row in 0..rows { + let value = (row / run_length) as i64; + let mut map = Map::with_capacity(1); + map.insert("val".to_string(), Value::Number(Number::from(value))); + out.push(Value::Object(map)); + } + out +} + +fn build_ree_schema() -> Arc { + let ree_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int32, false)), + Arc::new(Field::new("values", DataType::Int64, true)), + ); + Arc::new(Schema::new(vec![Field::new("val", ree_type, false)])) +} + +fn bench_decode_ree(c: &mut Criterion) { + let schema = build_ree_schema(); + + let short_data = build_ree_json(ROWS, SHORT_REE_RUN_LENGTH); + bench_decode_schema(c, "decode_short_ree_runs_json", &short_data, schema.clone()); + + let long_data = build_ree_json(ROWS, LONG_REE_RUN_LENGTH); + bench_decode_schema(c, "decode_long_ree_runs_json", &long_data, schema); +} + +fn bench_serialize_ree(c: &mut Criterion) { + let schema = build_ree_schema(); + + let short_values = build_ree_values(ROWS, SHORT_REE_RUN_LENGTH); + c.bench_function("decode_short_ree_runs_serialize", |b| { b.iter(|| { let mut decoder = ReaderBuilder::new(schema.clone()) .with_batch_size(BATCH_SIZE) @@ -312,8 +481,8 @@ fn bench_serialize_list(c: &mut Criterion) { }) }); - let long_values = build_list_values(ROWS, LIST_LONG_ELEMENTS); - c.bench_function("decode_list_long_i64_serialize", |b| { + let long_values = build_ree_values(ROWS, LONG_REE_RUN_LENGTH); + c.bench_function("decode_long_ree_runs_serialize", |b| { b.iter(|| { let mut decoder = ReaderBuilder::new(schema.clone()) .with_batch_size(BATCH_SIZE) @@ -402,6 +571,10 @@ criterion_group!( bench_wide_projection, bench_decode_list, bench_serialize_list, + bench_decode_map, + bench_serialize_map, + bench_decode_ree, + bench_serialize_ree, bench_schema_inference ); criterion_main!(benches); From 1a169cd638aa4b72ccb4961e37e5014a66308718 Mon Sep 17 00:00:00 2001 From: Alexander Rafferty Date: Wed, 1 Apr 2026 06:27:16 +1100 Subject: [PATCH 76/80] Fix `MutableBuffer::clear` (#9622) # Which issue does this PR close? - closes https://github.com/apache/arrow-rs/pull/9593 # Rationale for this change In a previous PR (#9593), I change instances of `truncate(0)` to `clear()`. However, this breaks the test `test_truncate_with_pool` at `arrow-buffer/src/buffer/mutable.rs:1357`, due to an inconsistency between the implementation of `truncate` and `clear`. This PR fixes that test. # What changes are included in this PR? This PR copies a section of code related to the `pool` feature present in `truncate` but absent in `clear`, fixing the failing unit test. # Are these changes tested? Yes. # Are there any user-facing changes? No. --- arrow-buffer/src/buffer/mutable.rs | 10 ++++++++-- arrow-json/src/reader/value_iter.rs | 2 +- parquet/tests/geospatial.rs | 4 ++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs index 9fc860506194..07ef965cb082 100644 --- a/arrow-buffer/src/buffer/mutable.rs +++ b/arrow-buffer/src/buffer/mutable.rs @@ -450,7 +450,13 @@ impl MutableBuffer { /// Clear all existing data from this buffer. pub fn clear(&mut self) { - self.len = 0 + self.len = 0; + #[cfg(feature = "pool")] + { + if let Some(reservation) = self.reservation.lock().unwrap().as_mut() { + reservation.resize(self.len); + } + } } /// Returns the data stored in this buffer as a slice. @@ -1371,7 +1377,7 @@ mod tests { assert_eq!(pool.used(), 40); // Truncate to zero - buffer.truncate(0); + buffer.clear(); assert_eq!(buffer.len(), 0); assert_eq!(pool.used(), 0); } diff --git a/arrow-json/src/reader/value_iter.rs b/arrow-json/src/reader/value_iter.rs index f70b893f52a0..ebaba695adf3 100644 --- a/arrow-json/src/reader/value_iter.rs +++ b/arrow-json/src/reader/value_iter.rs @@ -73,7 +73,7 @@ impl Iterator for ValueIter { } loop { - self.line_buf.truncate(0); + self.line_buf.clear(); match self.reader.read_line(&mut self.line_buf) { Ok(0) => { // read_line returns 0 when stream reached EOF diff --git a/parquet/tests/geospatial.rs b/parquet/tests/geospatial.rs index 4f449df920e8..fcc93661ed97 100644 --- a/parquet/tests/geospatial.rs +++ b/parquet/tests/geospatial.rs @@ -380,8 +380,8 @@ mod test { for i in 0..reader.num_row_groups() { let row_group = reader.get_row_group(i).unwrap(); - values.truncate(0); - def_levels.truncate(0); + values.clear(); + def_levels.clear(); let mut row_group_out = writer.next_row_group().unwrap(); From f91231160716d2b726a6bd01ef1b596c9ff69e17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 31 Mar 2026 21:31:35 +0200 Subject: [PATCH 77/80] feat(parquet): derive `PartialEq` and `Eq` for `CdcOptions` (#9602) # Rationale for this change CdcOptions only contains primitive fields (usize, usize, i32) so deriving PartialEq and Eq is straightforward. This is needed by downstream crates such as DataFusion that embed CdcOptions in their own configuration structs and need to compare them. # What changes are included in this PR? Implemented PartialEq and Eq for CdcOptions. # Are these changes tested? Added an equality test. # Are there any user-facing changes? No. --- parquet/src/file/properties.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs index ae15cc6b8263..640a7a075d2f 100644 --- a/parquet/src/file/properties.rs +++ b/parquet/src/file/properties.rs @@ -80,7 +80,7 @@ pub const DEFAULT_CDC_NORM_LEVEL: i32 = 0; /// following options control the chunks' size and the chunking process. Note /// that the chunk size is calculated based on the logical value of the data, /// before any encoding or compression is applied. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct CdcOptions { /// Minimum chunk size in bytes, default is 256 KiB. /// The rolling hash will not be updated until this size is reached for each chunk. @@ -1864,4 +1864,18 @@ mod tests { } } } + + #[test] + fn test_cdc_options_equality() { + let opts = CdcOptions::default(); + assert_eq!(opts, CdcOptions::default()); + + let custom = CdcOptions { + min_chunk_size: 1024, + max_chunk_size: 8192, + norm_level: 1, + }; + assert_eq!(custom, custom); + assert_ne!(opts, custom); + } } From 51bf8a40f72e37528cf36419f8f453ccd0e45868 Mon Sep 17 00:00:00 2001 From: Konstantin Tarasov <33369833+sdf-jkl@users.noreply.github.com> Date: Tue, 31 Mar 2026 15:44:32 -0400 Subject: [PATCH 78/80] [Variant] extend shredded null handling for arrays (#9599) # Which issue does this PR close? - Closes #8400. # Rationale for this change Check issue # What changes are included in this PR? - Added `AppendNullMode` enum supporting all semantics. - Replaced the bool logic to the new enum - Fix test outputs for List Array cases # Are these changes tested? - Added unit tests # Are there any user-facing changes? --- parquet-variant-compute/src/shred_variant.rs | 242 ++++++++++++++---- .../src/type_conversion.rs | 24 +- parquet-variant-compute/src/variant_get.rs | 53 ++++ .../src/variant_to_arrow.rs | 238 ++++++++++++----- 4 files changed, 436 insertions(+), 121 deletions(-) diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index 6520ea700b0c..d80d2f9863f6 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -84,7 +84,7 @@ pub fn shred_variant(array: &VariantArray, as_type: &DataType) -> Result Result nulls.append_null(), + Self::ObjectField | Self::ArrayElement => nulls.append_non_null(), + } + match self { + Self::TopLevelVariant | Self::ObjectField => value_builder.append_null(), + Self::ArrayElement => value_builder.append_value(Variant::Null), + } + } +} + pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>( data_type: &'a DataType, cast_options: &'a CastOptions, capacity: usize, - top_level: bool, + null_value: NullValue, ) -> Result> { let builder = match data_type { DataType::Struct(fields) => { @@ -114,7 +145,7 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>( fields, cast_options, capacity, - top_level, + null_value, )?; VariantToShreddedVariantRowBuilder::Object(typed_value_builder) } @@ -127,6 +158,7 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>( data_type, cast_options, capacity, + null_value, )?; VariantToShreddedVariantRowBuilder::Array(typed_value_builder) } @@ -156,7 +188,7 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>( let builder = make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?; let typed_value_builder = - VariantToShreddedPrimitiveVariantRowBuilder::new(builder, capacity, top_level); + VariantToShreddedPrimitiveVariantRowBuilder::new(builder, capacity, null_value); VariantToShreddedVariantRowBuilder::Primitive(typed_value_builder) } DataType::FixedSizeBinary(_) => { @@ -204,33 +236,31 @@ impl<'a> VariantToShreddedVariantRowBuilder<'a> { } } -/// A top-level variant shredder -- appending NULL produces typed_value=NULL and value=Variant::Null +/// A shredded primitive field builder. pub(crate) struct VariantToShreddedPrimitiveVariantRowBuilder<'a> { value_builder: VariantValueArrayBuilder, typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>, nulls: NullBufferBuilder, - top_level: bool, + null_value: NullValue, } impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> { pub(crate) fn new( typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>, capacity: usize, - top_level: bool, + null_value: NullValue, ) -> Self { Self { value_builder: VariantValueArrayBuilder::new(capacity), typed_value_builder, nulls: NullBufferBuilder::new(capacity), - top_level, + null_value, } } fn append_null(&mut self) -> Result<()> { - // Only the top-level struct that represents the variant can be nullable; object fields and - // array elements are non-nullable. - self.nulls.append(!self.top_level); - self.value_builder.append_null(); + self.null_value + .append_to(&mut self.nulls, &mut self.value_builder); self.typed_value_builder.append_null() } @@ -256,6 +286,8 @@ impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> { pub(crate) struct VariantToShreddedArrayVariantRowBuilder<'a> { value_builder: VariantValueArrayBuilder, typed_value_builder: ArrayVariantToArrowRowBuilder<'a>, + nulls: NullBufferBuilder, + null_value: NullValue, } impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> { @@ -263,6 +295,7 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> { data_type: &'a DataType, cast_options: &'a CastOptions, capacity: usize, + null_value: NullValue, ) -> Result { Ok(Self { value_builder: VariantValueArrayBuilder::new(capacity), @@ -271,11 +304,14 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> { cast_options, capacity, )?, + nulls: NullBufferBuilder::new(capacity), + null_value, }) } fn append_null(&mut self) -> Result<()> { - self.value_builder.append_value(Variant::Null); + self.null_value + .append_to(&mut self.nulls, &mut self.value_builder); self.typed_value_builder.append_null()?; Ok(()) } @@ -285,12 +321,14 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> { // If the variant is an array, value must be null. match variant { Variant::List(list) => { + self.nulls.append_non_null(); self.value_builder.append_null(); self.typed_value_builder .append_value(&Variant::List(list))?; Ok(true) } other => { + self.nulls.append_non_null(); self.value_builder.append_value(other); self.typed_value_builder.append_null()?; Ok(false) @@ -298,13 +336,11 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> { } } - fn finish(self) -> Result<(BinaryViewArray, ArrayRef, Option)> { + fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option)> { Ok(( self.value_builder.build()?, self.typed_value_builder.finish()?, - // All elements of an array must be present (not missing) because - // the array Variant encoding does not allow missing elements - None, + self.nulls.finish(), )) } } @@ -314,7 +350,7 @@ pub(crate) struct VariantToShreddedObjectVariantRowBuilder<'a> { typed_value_builders: IndexMap<&'a str, VariantToShreddedVariantRowBuilder<'a>>, typed_value_nulls: NullBufferBuilder, nulls: NullBufferBuilder, - top_level: bool, + null_value: NullValue, } impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> { @@ -322,14 +358,14 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> { fields: &'a Fields, cast_options: &'a CastOptions, capacity: usize, - top_level: bool, + null_value: NullValue, ) -> Result { let typed_value_builders = fields.iter().map(|field| { let builder = make_variant_to_shredded_variant_arrow_row_builder( field.data_type(), cast_options, capacity, - false, + NullValue::ObjectField, )?; Ok((field.name().as_str(), builder)) }); @@ -338,15 +374,13 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> { typed_value_builders: typed_value_builders.collect::>()?, typed_value_nulls: NullBufferBuilder::new(capacity), nulls: NullBufferBuilder::new(capacity), - top_level, + null_value, }) } fn append_null(&mut self) -> Result<()> { - // Only the top-level struct that represents the variant can be nullable; object fields and - // array elements are non-nullable. - self.nulls.append(!self.top_level); - self.value_builder.append_null(); + self.null_value + .append_to(&mut self.nulls, &mut self.value_builder); self.typed_value_nulls.append_null(); for (_, typed_value_builder) in &mut self.typed_value_builders { typed_value_builder.append_null()?; @@ -669,6 +703,12 @@ mod tests { use std::sync::Arc; use uuid::Uuid; + const NULL_VALUES: [NullValue; 3] = [ + NullValue::TopLevelVariant, + NullValue::ObjectField, + NullValue::ArrayElement, + ]; + #[derive(Clone)] enum VariantValue<'a> { Value(Variant<'a, 'a>), @@ -881,7 +921,9 @@ mod tests { expected_variant.clone() ); } - None => unreachable!(), + None => { + assert!(fallbacks.0.is_null(idx)); + } } } } @@ -949,6 +991,121 @@ mod tests { } } + fn assert_append_null_mode_value_and_struct_nulls( + mode: NullValue, + value: &BinaryViewArray, + nulls: Option<&arrow::buffer::NullBuffer>, + ) { + if mode == NullValue::TopLevelVariant { + assert!(nulls.is_some_and(|n| n.is_null(0))); + } else { + assert!(nulls.is_none()); + } + + if mode == NullValue::ArrayElement { + assert!(value.is_valid(0)); + assert_eq!( + Variant::new(EMPTY_VARIANT_METADATA_BYTES, value.value(0)), + Variant::Null + ); + } else { + assert!(value.is_null(0)); + } + } + + #[test] + fn test_append_null_mode_semantics_primitive_builder() { + let cast_options = arrow::compute::CastOptions::default(); + + for mode in NULL_VALUES { + let mut primitive_builder = make_variant_to_shredded_variant_arrow_row_builder( + &DataType::Int64, + &cast_options, + 1, + mode, + ) + .unwrap(); + primitive_builder.append_null().unwrap(); + let (primitive_value, primitive_typed_value, primitive_nulls) = + primitive_builder.finish().unwrap(); + let primitive_typed_value = primitive_typed_value + .as_any() + .downcast_ref::() + .unwrap(); + + assert!(primitive_typed_value.is_null(0)); + assert_append_null_mode_value_and_struct_nulls( + mode, + &primitive_value, + primitive_nulls.as_ref(), + ); + } + } + + #[test] + fn test_append_null_mode_semantics_array_builder() { + let cast_options = arrow::compute::CastOptions::default(); + let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int64, true))); + + for mode in NULL_VALUES { + let mut array_builder = make_variant_to_shredded_variant_arrow_row_builder( + &list_type, + &cast_options, + 1, + mode, + ) + .unwrap(); + array_builder.append_null().unwrap(); + let (value, typed_value, nulls) = array_builder.finish().unwrap(); + + assert_append_null_mode_value_and_struct_nulls(mode, &value, nulls.as_ref()); + + let typed_value = typed_value.as_any().downcast_ref::().unwrap(); + assert_eq!(typed_value.len(), 1); + assert!(typed_value.is_null(0)); + assert_eq!(typed_value.values().len(), 0); + } + } + + #[test] + fn test_append_null_mode_semantics_object_builder() { + let cast_options = arrow::compute::CastOptions::default(); + let object_type = DataType::Struct(Fields::from(vec![ + Field::new("id", DataType::Int64, true), + Field::new("name", DataType::Utf8, true), + ])); + + for mode in NULL_VALUES { + let mut object_builder = make_variant_to_shredded_variant_arrow_row_builder( + &object_type, + &cast_options, + 1, + mode, + ) + .unwrap(); + object_builder.append_null().unwrap(); + let (value, typed_value, nulls) = object_builder.finish().unwrap(); + + assert_append_null_mode_value_and_struct_nulls(mode, &value, nulls.as_ref()); + + let typed_struct = typed_value + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(typed_struct.len(), 1); + assert!(typed_struct.is_null(0)); + + for field_name in ["id", "name"] { + let field = ShreddedVariantFieldArray::try_new( + typed_struct.column_by_name(field_name).unwrap(), + ) + .unwrap(); + assert!(field.value_field().unwrap().is_null(0)); + assert!(field.typed_value_field().unwrap().is_null(0)); + } + } + } + #[test] fn test_already_shredded_input_error() { // Create a VariantArray that already has typed_value_field @@ -1338,13 +1495,7 @@ mod tests { 5, &[0, 3, 6, 6, 6, 6], &[Some(3), Some(3), None, None, Some(0)], - &[ - None, - None, - Some(Variant::from("not a list")), - Some(Variant::Null), - None, - ], + &[None, None, Some(Variant::from("not a list")), None, None], ( &[Some(1), Some(2), Some(3), Some(1), None, None], &[ @@ -1414,13 +1565,7 @@ mod tests { 5, &[0, 3, 6, 6, 6], &[Some(3), Some(3), None, None, Some(0)], - &[ - None, - None, - Some(Variant::from("not a list")), - Some(Variant::Null), - None, - ], + &[None, None, Some(Variant::from("not a list")), None, None], ( &[Some(1), Some(2), Some(3), Some(1), None, None], &[ @@ -1522,12 +1667,7 @@ mod tests { 4, &[0, 3, 6, 6, 6], &[Some(3), Some(3), None, None], - &[ - None, - None, - Some(Variant::from("not a list")), - Some(Variant::Null), - ], + &[None, None, Some(Variant::from("not a list")), None], ); let outer_elements = @@ -1615,7 +1755,7 @@ mod tests { 3, &[0, 2, 2, 2], &[Some(2), None, None], - &[None, Some(Variant::from("not a list")), Some(Variant::Null)], + &[None, Some(Variant::from("not a list")), None], ); // Validate nested struct fields for each element @@ -2101,13 +2241,7 @@ mod tests { scores_field.len(), &[0i32, 2, 4, 4, 4, 4], &[Some(2), Some(2), None, None, None], - &[ - None, - None, - Some(Variant::Null), - Some(Variant::Null), - Some(Variant::Null), - ], + &[None, None, None, None, None], ( &[Some(10), Some(20), None, None], &[None, None, Some(Variant::from("oops")), Some(Variant::Null)], diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 4086a2410792..7b9eb67d1a95 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -17,11 +17,12 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. -use arrow::compute::{DecimalCast, rescale_decimal}; +use arrow::compute::{CastOptions, DecimalCast, rescale_decimal}; use arrow::datatypes::{ self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type, DecimalType, }; +use arrow::error::{ArrowError, Result}; use chrono::Timelike; use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16}; @@ -37,6 +38,27 @@ pub(crate) trait TimestampFromVariant: ArrowTimestampType { fn from_variant(variant: &Variant<'_, '_>) -> Option; } +/// Cast a single `Variant` value with safe/strict semantics. +/// +/// Returns `Ok(Some(_))` on successful conversion. +/// Returns `Ok(None)` when conversion fails in safe mode or the source value is `Variant::Null`. +/// Returns `Err(_)` when conversion fails in strict mode. +pub(crate) fn variant_cast_with_options<'a, 'm, 'v, T>( + variant: &'a Variant<'m, 'v>, + cast_options: &CastOptions<'_>, + cast: impl FnOnce(&'a Variant<'m, 'v>) -> Option, +) -> Result> { + if let Some(value) = cast(variant) { + Ok(Some(value)) + } else if matches!(variant, Variant::Null) || cast_options.safe { + Ok(None) + } else { + Err(ArrowError::CastError(format!( + "Failed to cast variant value {variant:?}" + ))) + } +} + /// Macro to generate PrimitiveFromVariant implementations for Arrow primitive types macro_rules! impl_primitive_from_variant { ($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => { diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 3e9892cacf70..73906f70eb77 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -4270,6 +4270,59 @@ mod test { } } + #[test] + fn test_variant_get_list_like_unsafe_cast_preserves_null_elements() { + let string_array: ArrayRef = Arc::new(StringArray::from(vec![r#"[1, null, 3]"#])); + let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap()); + let cast_options = CastOptions { + safe: false, + ..Default::default() + }; + let options = GetOptions::new() + .with_as_type(Some(FieldRef::from(Field::new( + "result", + DataType::List(Arc::new(Field::new("item", DataType::Int64, true))), + true, + )))) + .with_cast_options(cast_options); + + let result = variant_get(&variant_array, options).unwrap(); + let element_struct = result + .as_any() + .downcast_ref::() + .unwrap() + .values() + .as_any() + .downcast_ref::() + .unwrap(); + + let value = element_struct + .column_by_name("value") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + let typed_value = element_struct + .column_by_name("typed_value") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + + assert_eq!(typed_value.len(), 3); + assert_eq!(typed_value.value(0), 1); + assert!(typed_value.is_null(1)); + assert_eq!(typed_value.value(2), 3); + + assert!(value.is_null(0)); + assert!(value.is_valid(1)); + assert_eq!( + Variant::new(EMPTY_VARIANT_METADATA_BYTES, value.value(1)), + Variant::Null + ); + assert!(value.is_null(2)); + } + #[test] fn test_variant_get_list_like_unsafe_cast_errors_on_non_list() { let string_array: ArrayRef = Arc::new(StringArray::from(vec!["[1, 2]", "\"not a list\""])); diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index dc8fbcd223d2..dd396117d22d 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -16,10 +16,12 @@ // under the License. use crate::shred_variant::{ - VariantToShreddedVariantRowBuilder, make_variant_to_shredded_variant_arrow_row_builder, + NullValue, VariantToShreddedVariantRowBuilder, + make_variant_to_shredded_variant_arrow_row_builder, }; use crate::type_conversion::{ - PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal, + PrimitiveFromVariant, TimestampFromVariant, variant_cast_with_options, + variant_to_unscaled_decimal, }; use crate::variant_array::ShreddedVariantFieldArray; use crate::{VariantArray, VariantValueArrayBuilder}; @@ -545,30 +547,30 @@ impl<'a> StructVariantToArrowRowBuilder<'a> { } fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { - let Variant::Object(obj) = value else { - if self.cast_options.safe { - self.append_null()?; - return Ok(false); - } - return Err(ArrowError::CastError(format!( - "Failed to extract struct from variant {:?}", - value - ))); - }; - - for (index, field) in self.fields.iter().enumerate() { - match obj.get(field.name()) { - Some(field_value) => { - self.field_builders[index].append_value(field_value)?; - } - None => { - self.field_builders[index].append_null()?; + match variant_cast_with_options(value, self.cast_options, Variant::as_object) { + Ok(Some(obj)) => { + for (index, field) in self.fields.iter().enumerate() { + match obj.get(field.name()) { + Some(field_value) => { + self.field_builders[index].append_value(field_value)?; + } + None => { + self.field_builders[index].append_null()?; + } + } } + + self.nulls.append_non_null(); + Ok(true) + } + Ok(None) => { + self.append_null()?; + Ok(false) } + Err(_) => Err(ArrowError::CastError(format!( + "Failed to extract struct from variant {value:?}" + ))), } - - self.nulls.append_non_null(); - Ok(true) } fn finish(mut self) -> Result { @@ -707,21 +709,24 @@ macro_rules! define_variant_to_primitive_builder { } fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result { - if let Some(v) = $value_transform { - self.builder.append_value(v); - Ok(true) - } else { - if !self.cast_options.safe { - // Unsafe casting: return error on conversion failure - return Err(ArrowError::CastError(format!( - "Failed to extract primitive of type {} from variant {:?} at path VariantPath([])", - $type_name, - $value - ))); + match variant_cast_with_options( + $value, + self.cast_options, + |$value| $value_transform, + ) { + Ok(Some(v)) => { + self.builder.append_value(v); + Ok(true) + } + Ok(None) => { + self.builder.append_null(); + Ok(false) } - // Safe casting: append null on conversion failure - self.builder.append_null(); - Ok(false) + Err(_) => Err(ArrowError::CastError(format!( + "Failed to extract primitive of type {type_name} from variant {value:?} at path VariantPath([])", + type_name = $type_name, + value = $value + ))), } } @@ -748,7 +753,7 @@ define_variant_to_primitive_builder!( define_variant_to_primitive_builder!( struct VariantToBooleanArrowRowBuilder<'a> |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) }, - |value| value.as_boolean(), + |value| value.as_boolean(), type_name: datatypes::BooleanType::DATA_TYPE ); @@ -821,20 +826,23 @@ where } fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { - if let Some(scaled) = variant_to_unscaled_decimal::(value, self.precision, self.scale) { - self.builder.append_value(scaled); - Ok(true) - } else if self.cast_options.safe { - self.builder.append_null(); - Ok(false) - } else { - Err(ArrowError::CastError(format!( - "Failed to cast to {}(precision={}, scale={}) from variant {:?}", - T::PREFIX, - self.precision, - self.scale, - value - ))) + match variant_cast_with_options(value, self.cast_options, |value| { + variant_to_unscaled_decimal::(value, self.precision, self.scale) + }) { + Ok(Some(scaled)) => { + self.builder.append_value(scaled); + Ok(true) + } + Ok(None) => { + self.builder.append_null(); + Ok(false) + } + Err(_) => Err(ArrowError::CastError(format!( + "Failed to cast to {prefix}(precision={precision}, scale={scale}) from variant {value:?}", + prefix = T::PREFIX, + precision = self.precision, + scale = self.scale + ))), } } @@ -863,20 +871,19 @@ impl<'a> VariantToUuidArrowRowBuilder<'a> { } fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { - match value.as_uuid() { - Some(uuid) => { + match variant_cast_with_options(value, self.cast_options, Variant::as_uuid) { + Ok(Some(uuid)) => { self.builder .append_value(uuid.as_bytes()) .map_err(|e| ArrowError::ExternalError(Box::new(e)))?; - Ok(true) } - None if self.cast_options.safe => { + Ok(None) => { self.builder.append_null(); Ok(false) } - None => Err(ArrowError::CastError(format!( - "Failed to extract UUID from variant {value:?}", + Err(_) => Err(ArrowError::CastError(format!( + "Failed to extract UUID from variant {value:?}" ))), } } @@ -919,7 +926,7 @@ where element_data_type, cast_options, capacity, - false, + NullValue::ArrayElement, )?; Ok(Self { field, @@ -938,8 +945,8 @@ where } fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { - match value { - Variant::List(list) => { + match variant_cast_with_options(value, self.cast_options, Variant::as_list) { + Ok(Some(list)) => { for element in list.iter() { self.element_builder.append_value(element)?; self.current_offset = self.current_offset.add_checked(O::ONE)?; @@ -948,13 +955,12 @@ where self.nulls.append_non_null(); Ok(true) } - _ if self.cast_options.safe => { + Ok(None) => { self.append_null()?; Ok(false) } - _ => Err(ArrowError::CastError(format!( - "Failed to extract list from variant {:?}", - value + Err(_) => Err(ArrowError::CastError(format!( + "Failed to extract list from variant {value:?}" ))), } } @@ -1067,11 +1073,18 @@ define_variant_to_primitive_builder!( #[cfg(test)] mod tests { - use super::make_primitive_variant_to_arrow_row_builder; + use super::{ + make_primitive_variant_to_arrow_row_builder, make_typed_variant_to_arrow_row_builder, + }; + use arrow::array::{ + Array, Decimal32Array, FixedSizeBinaryArray, Int32Array, ListArray, StructArray, + }; use arrow::compute::CastOptions; use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode}; use arrow::error::ArrowError; + use parquet_variant::{Variant, VariantDecimal4}; use std::sync::Arc; + use uuid::Uuid; #[test] fn make_primitive_builder_rejects_non_primitive_types() { @@ -1120,4 +1133,97 @@ mod tests { } } } + + #[test] + fn strict_cast_allows_variant_null_for_primitive_builder() { + let cast_options = CastOptions { + safe: false, + ..Default::default() + }; + let mut builder = + make_primitive_variant_to_arrow_row_builder(&DataType::Int32, &cast_options, 2) + .unwrap(); + + assert!(!builder.append_value(&Variant::Null).unwrap()); + assert!(builder.append_value(&Variant::Int32(42)).unwrap()); + + let array = builder.finish().unwrap(); + let int_array = array.as_any().downcast_ref::().unwrap(); + assert!(int_array.is_null(0)); + assert_eq!(int_array.value(1), 42); + } + + #[test] + fn strict_cast_allows_variant_null_for_decimal_builder() { + let cast_options = CastOptions { + safe: false, + ..Default::default() + }; + let mut builder = make_primitive_variant_to_arrow_row_builder( + &DataType::Decimal32(9, 2), + &cast_options, + 2, + ) + .unwrap(); + let decimal_variant: Variant<'_, '_> = VariantDecimal4::try_new(1234, 2).unwrap().into(); + + assert!(!builder.append_value(&Variant::Null).unwrap()); + assert!(builder.append_value(&decimal_variant).unwrap()); + + let array = builder.finish().unwrap(); + let decimal_array = array.as_any().downcast_ref::().unwrap(); + assert!(decimal_array.is_null(0)); + assert_eq!(decimal_array.value(1), 1234); + } + + #[test] + fn strict_cast_allows_variant_null_for_uuid_builder() { + let cast_options = CastOptions { + safe: false, + ..Default::default() + }; + let mut builder = make_primitive_variant_to_arrow_row_builder( + &DataType::FixedSizeBinary(16), + &cast_options, + 2, + ) + .unwrap(); + let uuid = Uuid::nil(); + + assert!(!builder.append_value(&Variant::Null).unwrap()); + assert!(builder.append_value(&Variant::Uuid(uuid)).unwrap()); + + let array = builder.finish().unwrap(); + let uuid_array = array + .as_any() + .downcast_ref::() + .unwrap(); + assert!(uuid_array.is_null(0)); + assert_eq!(uuid_array.value(1), uuid.as_bytes()); + } + + #[test] + fn strict_cast_allows_variant_null_for_list_and_struct_builders() { + let cast_options = CastOptions { + safe: false, + ..Default::default() + }; + + let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int64, true))); + let mut list_builder = + make_typed_variant_to_arrow_row_builder(&list_type, &cast_options, 1).unwrap(); + assert!(!list_builder.append_value(Variant::Null).unwrap()); + let list_array = list_builder.finish().unwrap(); + let list_array = list_array.as_any().downcast_ref::().unwrap(); + assert!(list_array.is_null(0)); + + let struct_type = + DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, true)])); + let mut struct_builder = + make_typed_variant_to_arrow_row_builder(&struct_type, &cast_options, 1).unwrap(); + assert!(!struct_builder.append_value(Variant::Null).unwrap()); + let struct_array = struct_builder.finish().unwrap(); + let struct_array = struct_array.as_any().downcast_ref::().unwrap(); + assert!(struct_array.is_null(0)); + } } From 61b5763a368db4bfa76e8fbafdbf26718f39a031 Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Tue, 31 Mar 2026 22:40:44 +0200 Subject: [PATCH 79/80] pyarrow: Small code simplifications (#9594) # Rationale for this change Makes the code simpler and more readable by relying on new PyO3 and Rust features. No behavior should have changed outside of an error message if `__arrow_c_array__` does not return a tuple # What changes are included in this PR? - use `.call_method0(M)?` instead of `.getattr(M)?.call0()` - Use `.extract()` that allows more advanced features like directly extracting tuple elements - remove temporary variables just before returning - use &raw const and &raw mut pointers instead of casting and addr_of! --- arrow-pyarrow/src/lib.rs | 155 ++++++++++++--------------------------- 1 file changed, 48 insertions(+), 107 deletions(-) diff --git a/arrow-pyarrow/src/lib.rs b/arrow-pyarrow/src/lib.rs index e396711f873d..95f1d38fddf3 100644 --- a/arrow-pyarrow/src/lib.rs +++ b/arrow-pyarrow/src/lib.rs @@ -61,7 +61,6 @@ use std::convert::{From, TryFrom}; use std::ffi::CStr; -use std::ptr::{addr_of, addr_of_mut}; use std::sync::Arc; use arrow_array::ffi; @@ -156,36 +155,27 @@ impl FromPyArrow for DataType { // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { - let capsule = value.getattr("__arrow_c_schema__")?.call0()?; - let capsule = capsule.cast::()?; - validate_pycapsule(capsule, "arrow_schema")?; + let capsule = value.call_method0("__arrow_c_schema__")?.extract()?; + validate_pycapsule(&capsule, "arrow_schema")?; let schema_ptr = capsule .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))? .cast::(); - unsafe { - let dtype = DataType::try_from(schema_ptr.as_ref()).map_err(to_py_err)?; - return Ok(dtype); - } + return unsafe { DataType::try_from(schema_ptr.as_ref()) }.map_err(to_py_err); } validate_class(data_type_class(value.py())?, value)?; - let c_schema = FFI_ArrowSchema::empty(); - let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?; - let dtype = DataType::try_from(&c_schema).map_err(to_py_err)?; - Ok(dtype) + let mut c_schema = FFI_ArrowSchema::empty(); + value.call_method1("_export_to_c", (&raw mut c_schema as Py_uintptr_t,))?; + DataType::try_from(&c_schema).map_err(to_py_err) } } impl ToPyArrow for DataType { fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult> { let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; - let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - let dtype = - data_type_class(py)?.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; - Ok(dtype) + data_type_class(py)?.call_method1("_import_from_c", (&raw const c_schema as Py_uintptr_t,)) } } @@ -195,36 +185,27 @@ impl FromPyArrow for Field { // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { - let capsule = value.getattr("__arrow_c_schema__")?.call0()?; - let capsule = capsule.cast::()?; - validate_pycapsule(capsule, "arrow_schema")?; + let capsule = value.call_method0("__arrow_c_schema__")?.extract()?; + validate_pycapsule(&capsule, "arrow_schema")?; let schema_ptr = capsule .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))? .cast::(); - unsafe { - let field = Field::try_from(schema_ptr.as_ref()).map_err(to_py_err)?; - return Ok(field); - } + return unsafe { Field::try_from(schema_ptr.as_ref()) }.map_err(to_py_err); } validate_class(field_class(value.py())?, value)?; - let c_schema = FFI_ArrowSchema::empty(); - let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?; - let field = Field::try_from(&c_schema).map_err(to_py_err)?; - Ok(field) + let mut c_schema = FFI_ArrowSchema::empty(); + value.call_method1("_export_to_c", (&raw mut c_schema as Py_uintptr_t,))?; + Field::try_from(&c_schema).map_err(to_py_err) } } impl ToPyArrow for Field { fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult> { let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; - let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - let dtype = - field_class(py)?.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; - Ok(dtype) + field_class(py)?.call_method1("_import_from_c", (&raw const c_schema as Py_uintptr_t,)) } } @@ -234,36 +215,27 @@ impl FromPyArrow for Schema { // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { - let capsule = value.getattr("__arrow_c_schema__")?.call0()?; - let capsule = capsule.cast::()?; - validate_pycapsule(capsule, "arrow_schema")?; + let capsule = value.call_method0("__arrow_c_schema__")?.extract()?; + validate_pycapsule(&capsule, "arrow_schema")?; let schema_ptr = capsule .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))? .cast::(); - unsafe { - let schema = Schema::try_from(schema_ptr.as_ref()).map_err(to_py_err)?; - return Ok(schema); - } + return unsafe { Schema::try_from(schema_ptr.as_ref()) }.map_err(to_py_err); } validate_class(schema_class(value.py())?, value)?; - let c_schema = FFI_ArrowSchema::empty(); - let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?; - let schema = Schema::try_from(&c_schema).map_err(to_py_err)?; - Ok(schema) + let mut c_schema = FFI_ArrowSchema::empty(); + value.call_method1("_export_to_c", (&raw mut c_schema as Py_uintptr_t,))?; + Schema::try_from(&c_schema).map_err(to_py_err) } } impl ToPyArrow for Schema { fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult> { let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; - let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - let schema = - schema_class(py)?.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; - Ok(schema) + schema_class(py)?.call_method1("_import_from_c", (&raw const c_schema as Py_uintptr_t,)) } } @@ -273,21 +245,11 @@ impl FromPyArrow for ArrayData { // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_array__")? { - let tuple = value.getattr("__arrow_c_array__")?.call0()?; - - if !tuple.is_instance_of::() { - return Err(PyTypeError::new_err( - "Expected __arrow_c_array__ to return a tuple.", - )); - } - - let schema_capsule = tuple.get_item(0)?; - let schema_capsule = schema_capsule.cast::()?; - let array_capsule = tuple.get_item(1)?; - let array_capsule = array_capsule.cast::()?; + let (schema_capsule, array_capsule) = + value.call_method0("__arrow_c_array__")?.extract()?; - validate_pycapsule(schema_capsule, "arrow_schema")?; - validate_pycapsule(array_capsule, "arrow_array")?; + validate_pycapsule(&schema_capsule, "arrow_schema")?; + validate_pycapsule(&array_capsule, "arrow_array")?; let schema_ptr = schema_capsule .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))? @@ -315,8 +277,8 @@ impl FromPyArrow for ArrayData { value.call_method1( "_export_to_c", ( - addr_of_mut!(array) as Py_uintptr_t, - addr_of_mut!(schema) as Py_uintptr_t, + &raw mut array as Py_uintptr_t, + &raw mut schema as Py_uintptr_t, ), )?; @@ -328,15 +290,13 @@ impl ToPyArrow for ArrayData { fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult> { let array = FFI_ArrowArray::new(self); let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?; - - let array = array_class(py)?.call_method1( + array_class(py)?.call_method1( "_import_from_c", ( - addr_of!(array) as Py_uintptr_t, - addr_of!(schema) as Py_uintptr_t, + &raw const array as Py_uintptr_t, + &raw const schema as Py_uintptr_t, ), - )?; - Ok(array) + ) } } @@ -364,21 +324,11 @@ impl FromPyArrow for RecordBatch { // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_array__")? { - let tuple = value.getattr("__arrow_c_array__")?.call0()?; + let (schema_capsule, array_capsule) = + value.call_method0("__arrow_c_array__")?.extract()?; - if !tuple.is_instance_of::() { - return Err(PyTypeError::new_err( - "Expected __arrow_c_array__ to return a tuple.", - )); - } - - let schema_capsule = tuple.get_item(0)?; - let schema_capsule = schema_capsule.cast::()?; - let array_capsule = tuple.get_item(1)?; - let array_capsule = array_capsule.cast::()?; - - validate_pycapsule(schema_capsule, "arrow_schema")?; - validate_pycapsule(array_capsule, "arrow_array")?; + validate_pycapsule(&schema_capsule, "arrow_schema")?; + validate_pycapsule(&array_capsule, "arrow_array")?; let schema_ptr = schema_capsule .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))? @@ -455,9 +405,9 @@ impl FromPyArrow for ArrowArrayStreamReader { // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_stream__")? { - let capsule = value.getattr("__arrow_c_stream__")?.call0()?; - let capsule = capsule.cast::()?; - validate_pycapsule(capsule, "arrow_array_stream")?; + let capsule = value.call_method0("__arrow_c_stream__")?.extract()?; + + validate_pycapsule(&capsule, "arrow_array_stream")?; let stream = unsafe { FFI_ArrowArrayStream::from_raw( @@ -476,20 +426,17 @@ impl FromPyArrow for ArrowArrayStreamReader { validate_class(record_batch_reader_class(value.py())?, value)?; - // prepare a pointer to receive the stream struct + // prepare the stream struct to receive the content let mut stream = FFI_ArrowArrayStream::empty(); - let stream_ptr = &mut stream as *mut FFI_ArrowArrayStream; // make the conversion through PyArrow's private API // this changes the pointer's memory and is thus unsafe. // In particular, `_export_to_c` can go out of bounds - let args = PyTuple::new(value.py(), [stream_ptr as Py_uintptr_t])?; + let args = PyTuple::new(value.py(), [&raw mut stream as Py_uintptr_t])?; value.call_method1("_export_to_c", args)?; - let stream_reader = ArrowArrayStreamReader::try_new(stream) - .map_err(|err| PyValueError::new_err(err.to_string()))?; - - Ok(stream_reader) + ArrowArrayStreamReader::try_new(stream) + .map_err(|err| PyValueError::new_err(err.to_string())) } } @@ -498,13 +445,9 @@ impl IntoPyArrow for Box { // We can't implement `ToPyArrow` for `T: RecordBatchReader + Send` because // there is already a blanket implementation for `T: ToPyArrow`. fn into_pyarrow<'py>(self, py: Python<'py>) -> PyResult> { - let mut stream = FFI_ArrowArrayStream::new(self); - - let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream; - let reader = record_batch_reader_class(py)? - .call_method1("_import_from_c", (stream_ptr as Py_uintptr_t,))?; - - Ok(reader) + let stream = FFI_ArrowArrayStream::new(self); + record_batch_reader_class(py)? + .call_method1("_import_from_c", (&raw const stream as Py_uintptr_t,)) } } @@ -588,7 +531,7 @@ impl FromPyArrow for Table { fn from_pyarrow_bound(ob: &Bound) -> PyResult { let reader: Box = Box::new(ArrowArrayStreamReader::from_pyarrow_bound(ob)?); - Self::try_from(reader).map_err(|err| PyErr::new::(err.to_string())) + Self::try_from(reader).map_err(|err| PyValueError::new_err(err.to_string())) } } @@ -601,9 +544,7 @@ impl IntoPyArrow for Table { let kwargs = PyDict::new(py); kwargs.set_item("schema", py_schema)?; - let reader = table_class(py)?.call_method("from_batches", (py_batches,), Some(&kwargs))?; - - Ok(reader) + table_class(py)?.call_method("from_batches", (py_batches,), Some(&kwargs)) } } @@ -664,7 +605,7 @@ impl<'py, T: IntoPyArrow> IntoPyObject<'py> for PyArrowType { type Error = PyErr; - fn into_pyobject(self, py: Python<'py>) -> Result { + fn into_pyobject(self, py: Python<'py>) -> PyResult { self.0.into_pyarrow(py) } } From bc2a922d0db9fed86d91baa902166d5ace64791c Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 1 Apr 2026 14:42:59 +0200 Subject: [PATCH 80/80] fix(parquet): fix CDC panic on nested ListArrays with null entries (#9637) The CDC chunker's value_offset diverged from actual leaf array positions when null list entries had non-empty child offset ranges (valid per the Arrow columnar format spec). This caused slice_for_chunk to produce incorrect non_null_indices, leading to an out-of-bounds panic in write_mini_batch. Track non-null value counts (nni) separately from leaf slot counts in the chunker, and use them in slice_for_chunk to correctly index into non_null_indices regardless of gaps in the leaf array. --- parquet/src/arrow/arrow_writer/levels.rs | 196 +++++++++----------- parquet/src/column/chunker/cdc.rs | 219 ++++++++++++++++++++--- parquet/src/column/chunker/mod.rs | 6 +- 3 files changed, 287 insertions(+), 134 deletions(-) diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs index d1da24872c49..2ebe1319160f 100644 --- a/parquet/src/arrow/arrow_writer/levels.rs +++ b/parquet/src/arrow/arrow_writer/levels.rs @@ -805,37 +805,26 @@ impl ArrayLevels { /// Create a sliced view of this `ArrayLevels` for a CDC chunk. /// - /// Note: `def_levels`, `rep_levels`, and `non_null_indices` are copied (not zero-copy), - /// while `array` is sliced without copying. + /// The chunk's `value_offset`/`num_values` select the relevant slice of + /// `non_null_indices`. The array is sliced to the range covered by + /// those indices, and they are shifted to be relative to the slice. pub(crate) fn slice_for_chunk(&self, chunk: &CdcChunk) -> Self { - let level_offset = chunk.level_offset; - let num_levels = chunk.num_levels; - let value_offset = chunk.value_offset; - let num_values = chunk.num_values; - let def_levels = self - .def_levels - .as_ref() - .map(|levels| levels[level_offset..level_offset + num_levels].to_vec()); - let rep_levels = self - .rep_levels - .as_ref() - .map(|levels| levels[level_offset..level_offset + num_levels].to_vec()); - - // Filter non_null_indices to [value_offset, value_offset + num_values) - // and shift by -value_offset. Use binary search since the slice is sorted. - let value_end = value_offset + num_values; - let start = self - .non_null_indices - .partition_point(|&idx| idx < value_offset); - let end = self - .non_null_indices - .partition_point(|&idx| idx < value_end); - let non_null_indices: Vec = self.non_null_indices[start..end] - .iter() - .map(|&idx| idx - value_offset) - .collect(); + let def_levels = self.def_levels.as_ref().map(|levels| { + levels[chunk.level_offset..chunk.level_offset + chunk.num_levels].to_vec() + }); + let rep_levels = self.rep_levels.as_ref().map(|levels| { + levels[chunk.level_offset..chunk.level_offset + chunk.num_levels].to_vec() + }); - let array = self.array.slice(value_offset, num_values); + // Select the non-null indices for this chunk. + let nni = &self.non_null_indices[chunk.value_offset..chunk.value_offset + chunk.num_values]; + // Compute the array range spanned by the non-null indices + let start = nni.first().copied().unwrap_or(0); + let end = nni.last().map_or(0, |&i| i + 1); + // Shift indices to be relative to the sliced array. + let non_null_indices = nni.iter().map(|&idx| idx - start).collect(); + // Slice the array to the computed range. + let array = self.array.slice(start, end - start); let logical_nulls = array.logical_nulls(); Self { @@ -2149,9 +2138,8 @@ mod tests { fn test_slice_for_chunk_flat() { // Case 1: required field (max_def_level=0, no def/rep levels stored). // Array has 6 values; all are non-null so non_null_indices covers every position. - // The chunk selects value_offset=2, num_values=3 → the sub-array [3, 4, 5]. - // Since there are no levels, num_levels=0 and level_offset are irrelevant. - // non_null_indices [0,1,2,3,4,5] filtered to [2,4) and shifted by -2 → [0,1,2]. + // value_offset=2, num_values=3 → non_null_indices[2..5] = [2,3,4]. + // Array is sliced (no def_levels → write_batch_internal uses values.len()). let array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])); let logical_nulls = array.logical_nulls(); let levels = ArrayLevels { @@ -2176,14 +2164,9 @@ mod tests { // Case 2: optional field (max_def_level=1, def levels present, no rep levels). // Array: [Some(1), None, Some(3), None, Some(5), Some(6)] - // def_levels: [1, 0, 1, 0, 1, 1] (1=non-null, 0=null) - // non_null_indices: [0, 2, 4, 5] (array positions of the four non-null values) - // - // The chunk selects level_offset=1, num_levels=3, value_offset=1, num_values=3: - // - def_levels[1..4] = [0, 1, 0] → null, non-null, null - // - sub-array slice(1, 3) = [None, Some(3), None] - // - non_null_indices filtered to [value_offset=1, value_end=4): only index 2 qualifies, - // shifted by -1 → [1] (position of Some(3) within the sliced sub-array) + // non_null_indices: [0, 2, 4, 5] + // value_offset=1, num_values=1 → non_null_indices[1..2] = [2]. + // Array is not sliced (def_levels present → num_levels from def_levels.len()). let array: ArrayRef = Arc::new(Int32Array::from(vec![ Some(1), None, @@ -2206,90 +2189,85 @@ mod tests { level_offset: 1, num_levels: 3, value_offset: 1, - num_values: 3, + num_values: 1, }); assert_eq!(sliced.def_levels, Some(vec![0, 1, 0])); assert!(sliced.rep_levels.is_none()); - assert_eq!(sliced.non_null_indices, vec![1]); - assert_eq!(sliced.array.len(), 3); + assert_eq!(sliced.non_null_indices, vec![0]); // [2] shifted by -2 (nni[0]) + assert_eq!(sliced.array.len(), 1); } #[test] - fn test_slice_for_chunk_nested() { - // [[1,2],[3],[4,5]]: def=[2,2,2,2,2], rep=[0,1,0,0,1] - // Slice levels 2..5 (def=[2,2,2], rep=[0,0,1]), values 2..5 - let array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])); + fn test_slice_for_chunk_nested_with_nulls() { + // Regression test for https://github.com/apache/arrow-rs/issues/9637 + // + // Simulates a List where null list entries have non-zero child + // ranges (valid per Arrow spec: "a null value may correspond to a + // non-empty segment in the child array"). This creates gaps in the + // leaf array that don't correspond to any levels. + // + // 5 rows with 2 null list entries owning non-empty child ranges: + // row 0: [1] → leaf[0] + // row 1: null list → owns leaf[1..3] (gap of 2) + // row 2: [2, null] → leaf[3], leaf[4]=null element + // row 3: null list → owns leaf[5..8] (gap of 3) + // row 4: [4, 5] → leaf[8], leaf[9] + // + // def_levels: [3, 0, 3, 2, 0, 3, 3] + // rep_levels: [0, 0, 0, 1, 0, 0, 1] + // non_null_indices: [0, 3, 8, 9] + // gaps in array: 0→3 (skip 1,2), 3→8 (skip 5,6,7) + let array: ArrayRef = Arc::new(Int32Array::from(vec![ + Some(1), // 0: row 0 + None, // 1: gap (null list row 1) + None, // 2: gap (null list row 1) + Some(2), // 3: row 2 + None, // 4: row 2, null element + None, // 5: gap (null list row 3) + None, // 6: gap (null list row 3) + None, // 7: gap (null list row 3) + Some(4), // 8: row 4 + Some(5), // 9: row 4 + ])); let logical_nulls = array.logical_nulls(); let levels = ArrayLevels { - def_levels: Some(vec![2, 2, 2, 2, 2]), - rep_levels: Some(vec![0, 1, 0, 0, 1]), - non_null_indices: vec![0, 1, 2, 3, 4], - max_def_level: 2, + def_levels: Some(vec![3, 0, 3, 2, 0, 3, 3]), + rep_levels: Some(vec![0, 0, 0, 1, 0, 0, 1]), + non_null_indices: vec![0, 3, 8, 9], + max_def_level: 3, max_rep_level: 1, array, logical_nulls, }; - let sliced = levels.slice_for_chunk(&CdcChunk { + + // Chunk 0: rows 0-1, nni=[0] → array sliced to [0..1] + let chunk0 = levels.slice_for_chunk(&CdcChunk { + level_offset: 0, + num_levels: 2, + value_offset: 0, + num_values: 1, + }); + assert_eq!(chunk0.non_null_indices, vec![0]); + assert_eq!(chunk0.array.len(), 1); + + // Chunk 1: rows 2-3, nni=[3] → array sliced to [3..4] + let chunk1 = levels.slice_for_chunk(&CdcChunk { level_offset: 2, num_levels: 3, - value_offset: 2, - num_values: 3, + value_offset: 1, + num_values: 1, }); - assert_eq!(sliced.def_levels, Some(vec![2, 2, 2])); - assert_eq!(sliced.rep_levels, Some(vec![0, 0, 1])); - // [0,1,2,3,4] filtered to [2,5) → [2,3,4] → shifted -2 → [0,1,2] - assert_eq!(sliced.non_null_indices, vec![0, 1, 2]); - assert_eq!(sliced.array.len(), 3); - } + assert_eq!(chunk1.non_null_indices, vec![0]); + assert_eq!(chunk1.array.len(), 1); - #[test] - fn test_slice_for_chunk_non_null_indices_boundary() { - // [1, null, 3]: non_null_indices=[0, 2]; test inclusive lower / exclusive upper bounds - let array: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])); - let logical_nulls = array.logical_nulls(); - let levels = ArrayLevels { - def_levels: Some(vec![1, 0, 1]), - rep_levels: None, - non_null_indices: vec![0, 2], - max_def_level: 1, - max_rep_level: 0, - array, - logical_nulls, - }; - assert_eq!( - levels - .slice_for_chunk(&CdcChunk { - level_offset: 0, - num_levels: 1, - value_offset: 0, - num_values: 1 - }) - .non_null_indices, - vec![0] - ); - // idx 2 in range [1,3), shifted -1 → 1 - assert_eq!( - levels - .slice_for_chunk(&CdcChunk { - level_offset: 1, - num_levels: 2, - value_offset: 1, - num_values: 2 - }) - .non_null_indices, - vec![1] - ); - // idx 2 excluded from [1,2) - assert_eq!( - levels - .slice_for_chunk(&CdcChunk { - level_offset: 1, - num_levels: 1, - value_offset: 1, - num_values: 1 - }) - .non_null_indices, - Vec::::new() - ); + // Chunk 2: row 4, nni=[8, 9] → array sliced to [8..10] + let chunk2 = levels.slice_for_chunk(&CdcChunk { + level_offset: 5, + num_levels: 2, + value_offset: 2, + num_values: 2, + }); + assert_eq!(chunk2.non_null_indices, vec![0, 1]); + assert_eq!(chunk2.array.len(), 2); } } diff --git a/parquet/src/column/chunker/cdc.rs b/parquet/src/column/chunker/cdc.rs index f21f58780a6a..750735730874 100644 --- a/parquet/src/column/chunker/cdc.rs +++ b/parquet/src/column/chunker/cdc.rs @@ -289,27 +289,39 @@ impl ContentDefinedChunker { let mut chunks = Vec::new(); let mut prev_offset: usize = 0; let mut prev_value_offset: usize = 0; - // Total number of values seen; for non-nested data this equals num_levels. - let mut total_values: usize = num_levels; + let mut value_offset: usize = 0; if !has_rep_levels && !has_def_levels { // Fastest path: non-nested, non-null data. + // Every level corresponds to exactly one non-null value, so + // value_offset == level_offset and num_values == num_levels. + // + // Example: required Int32, array = [10, 20, 30] + // level: 0 1 2 + // value_offset: 0 1 2 for offset in 0..num_levels { roll_value(self, offset); if self.need_new_chunk() { chunks.push(CdcChunk { level_offset: prev_offset, - value_offset: prev_offset, num_levels: offset - prev_offset, + value_offset: prev_offset, num_values: offset - prev_offset, }); prev_offset = offset; } } - // Set the previous value offset to add the last chunk. prev_value_offset = prev_offset; + value_offset = num_levels; } else if !has_rep_levels { - // Non-nested data with nulls. + // Non-nested data with nulls. value_offset only increments for + // non-null values (def == max_def), so it diverges from the + // level offset when nulls are present. + // + // Example: optional Int32, array = [1, null, 2, null, 3] + // def_levels: [1, 0, 1, 0, 1] + // level: 0 1 2 3 4 + // value_offset: 0 1 2 (only increments on def==1) let def_levels = def_levels.expect("def_levels required when max_def_level > 0"); #[allow(clippy::needless_range_loop)] for offset in 0..num_levels { @@ -318,23 +330,56 @@ impl ContentDefinedChunker { if def_level == self.max_def_level { roll_value(self, offset); } + // Check boundary before incrementing value_offset so that + // num_values reflects only entries in the completed chunk. if self.need_new_chunk() { chunks.push(CdcChunk { level_offset: prev_offset, - value_offset: prev_offset, num_levels: offset - prev_offset, - num_values: offset - prev_offset, + value_offset: prev_value_offset, + num_values: value_offset - prev_value_offset, }); prev_offset = offset; + prev_value_offset = value_offset; + } + if def_level == self.max_def_level { + value_offset += 1; } } - // Set the previous value offset to add the last chunk. - prev_value_offset = prev_offset; } else { - // Nested data with nulls. + // Nested data with nulls. Two counters are needed: + // + // leaf_offset: index into the leaf values array for hashing, + // incremented for all leaf slots (def >= repeated_ancestor_def_level), + // including null elements. + // + // value_offset: index into non_null_indices for chunk boundaries, + // incremented only for non-null leaf values (def == max_def_level). + // + // These diverge when nullable elements exist inside lists. + // + // Example: List with repeated_ancestor_def_level=2, max_def=3 + // row 0: [1, null, 2] (3 leaf slots, 2 non-null) + // row 1: [3] (1 leaf slot, 1 non-null) + // + // leaf array: [1, null, 2, 3] + // def_levels: [3, 2, 3, 3] + // rep_levels: [0, 1, 1, 0] + // + // level def leaf_offset value_offset action + // ───── ─── ─────────── ──────────── ────────────────────────── + // 0 3 0 0 roll_value(0), value++, leaf++ + // 1 2 1 1 leaf++ only (null element) + // 2 3 2 1 roll_value(2), value++, leaf++ + // 3 3 3 2 roll_value(3), value++, leaf++ + // + // roll_value(2) correctly indexes leaf array position 2 (value "2"). + // Using value_offset=1 would index position 1 (the null slot). + // + // Using value_offset for roll_value would hash the wrong array slot. let def_levels = def_levels.expect("def_levels required for nested data"); let rep_levels = rep_levels.expect("rep_levels required for nested data"); - let mut value_offset: usize = 0; + let mut leaf_offset: usize = 0; for offset in 0..num_levels { let def_level = def_levels[offset]; @@ -343,43 +388,45 @@ impl ContentDefinedChunker { self.roll_level(def_level); self.roll_level(rep_level); if def_level == self.max_def_level { - roll_value(self, value_offset); + roll_value(self, leaf_offset); } + // Check boundary before incrementing value_offset so that + // num_values reflects only entries in the completed chunk. if rep_level == 0 && self.need_new_chunk() { - // If we are at a record boundary and need a new chunk, create one. let levels_to_write = offset - prev_offset; if levels_to_write > 0 { chunks.push(CdcChunk { level_offset: prev_offset, - value_offset: prev_value_offset, num_levels: levels_to_write, + value_offset: prev_value_offset, num_values: value_offset - prev_value_offset, }); prev_offset = offset; prev_value_offset = value_offset; } } - if def_level >= self.repeated_ancestor_def_level { - // We only increment the value offset if we have a leaf value. + if def_level == self.max_def_level { value_offset += 1; } + if def_level >= self.repeated_ancestor_def_level { + leaf_offset += 1; + } } - total_values = value_offset; } // Add the last chunk if we have any levels left. if prev_offset < num_levels { chunks.push(CdcChunk { level_offset: prev_offset, - value_offset: prev_value_offset, num_levels: num_levels - prev_offset, - num_values: total_values - prev_value_offset, + value_offset: prev_value_offset, + num_values: value_offset - prev_value_offset, }); } #[cfg(debug_assertions)] - self.validate_chunks(&chunks, num_levels, total_values); + self.validate_chunks(&chunks, num_levels, value_offset); chunks } @@ -626,8 +673,9 @@ mod tests { assert_eq!(chunks1.len(), chunks2.len()); for (a, b) in chunks1.iter().zip(chunks2.iter()) { assert_eq!(a.level_offset, b.level_offset); - assert_eq!(a.value_offset, b.value_offset); assert_eq!(a.num_levels, b.num_levels); + assert_eq!(a.value_offset, b.value_offset); + assert_eq!(a.num_values, b.num_values); } } @@ -663,9 +711,12 @@ mod arrow_tests { use std::borrow::Borrow; use std::sync::Arc; + use arrow::util::data_gen::create_random_batch; use arrow_array::cast::AsArray; use arrow_array::{Array, ArrayRef, BooleanArray, Int32Array, RecordBatch}; - use arrow_schema::{DataType, Field, Schema}; + use arrow_buffer::Buffer; + use arrow_data::ArrayData; + use arrow_schema::{DataType, Field, Fields, Schema}; use crate::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use crate::arrow::arrow_writer::ArrowWriter; @@ -2153,4 +2204,128 @@ mod arrow_tests { "all chunks after the first must be identical" ); } + + /// Helper to write a batch with CDC and read it back. + fn cdc_roundtrip(batch: &RecordBatch) -> RecordBatch { + let props = WriterProperties::builder() + .set_content_defined_chunking(Some(CdcOptions::default())) + .build(); + let mut buffer = Vec::new(); + let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), Some(props)).unwrap(); + writer.write(batch).unwrap(); + writer.close().unwrap(); + + let reader = ParquetRecordBatchReaderBuilder::try_new(bytes::Bytes::from(buffer)) + .unwrap() + .build() + .unwrap(); + reader.into_iter().next().unwrap().unwrap() + } + + /// Regression test for + /// + /// Writing nested list data with CDC enabled panicked with an out-of-bounds + /// slice access when null list entries had non-zero child ranges. + #[test] + fn test_cdc_list_roundtrip() { + let schema = Arc::new(Schema::new(vec![ + Field::new( + "_1", + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), + true, + ), + Field::new( + "_2", + DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, true))), + true, + ), + Field::new( + "_3", + DataType::LargeList(Arc::new(Field::new_list_field(DataType::Utf8, true))), + true, + ), + ])); + let batch = create_random_batch(schema, 2, 0.25, 0.75).unwrap(); + assert_eq!(cdc_roundtrip(&batch), batch); + } + + /// Test CDC with deeply nested types: List>, List>> + #[test] + fn test_cdc_deeply_nested_roundtrip() { + let inner_field = Field::new_list_field(DataType::Int32, true); + let inner_type = DataType::List(Arc::new(inner_field)); + let outer_field = Field::new_list_field(inner_type.clone(), true); + let list_list_type = DataType::List(Arc::new(outer_field)); + + let struct_inner_field = Field::new_list_field(DataType::Int32, true); + let struct_inner_type = DataType::List(Arc::new(struct_inner_field)); + let struct_fields = Fields::from(vec![Field::new("a", struct_inner_type, true)]); + let struct_type = DataType::Struct(struct_fields); + let struct_list_field = Field::new_list_field(struct_type, true); + let list_struct_type = DataType::List(Arc::new(struct_list_field)); + + let schema = Arc::new(Schema::new(vec![ + Field::new("list_list", list_list_type, true), + Field::new("list_struct_list", list_struct_type, true), + ])); + let batch = create_random_batch(schema, 200, 0.25, 0.75).unwrap(); + assert_eq!(cdc_roundtrip(&batch), batch); + } + + /// Test CDC with list arrays that have non-empty null segments. + /// + /// Per the Arrow columnar format spec: "a null value may correspond to a + /// non-empty segment in the child array". This test constructs such arrays + /// manually and verifies the CDC writer handles them correctly. + #[test] + fn test_cdc_list_non_empty_null_segments() { + // Build List where null entries own non-zero child ranges: + // row 0: [1, 2] offsets[0..2] valid + // row 1: null offsets[2..5] null, but owns 3 child values + // row 2: [6, 7] offsets[5..7] valid + // row 3: null offsets[7..9] null, but owns 2 child values + // row 4: [10] offsets[9..10] valid + let values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + let offsets = Buffer::from_iter([0_i32, 2, 5, 7, 9, 10]); + let null_bitmap = Buffer::from([0b00010101]); // rows 0, 2, 4 valid + + let list_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); + let list_data = unsafe { + ArrayData::new_unchecked( + list_type.clone(), + 5, + None, + Some(null_bitmap), + 0, + vec![offsets], + vec![values.to_data()], + ) + }; + let list_array = arrow_array::make_array(list_data); + + let schema = Arc::new(Schema::new(vec![Field::new("col", list_type, true)])); + let batch = RecordBatch::try_new(schema, vec![list_array]).unwrap(); + + let read = cdc_roundtrip(&batch); + let read_list = read.column(0).as_list::(); + assert_eq!(read_list.len(), 5); + assert!(read_list.is_valid(0)); + assert!(read_list.is_null(1)); + assert!(read_list.is_valid(2)); + assert!(read_list.is_null(3)); + assert!(read_list.is_valid(4)); + + let get_vals = |i: usize| -> Vec { + read_list + .value(i) + .as_primitive::() + .values() + .iter() + .copied() + .collect() + }; + assert_eq!(get_vals(0), vec![1, 2]); + assert_eq!(get_vals(2), vec![6, 7]); + assert_eq!(get_vals(4), vec![10]); + } } diff --git a/parquet/src/column/chunker/mod.rs b/parquet/src/column/chunker/mod.rs index c4caf18af66b..42631e026db4 100644 --- a/parquet/src/column/chunker/mod.rs +++ b/parquet/src/column/chunker/mod.rs @@ -31,10 +31,10 @@ pub(crate) use cdc::ContentDefinedChunker; pub(crate) struct CdcChunk { /// The start offset of this chunk inside the given levels. pub level_offset: usize, - /// The start offset of this chunk inside the given values array. - pub value_offset: usize, /// The number of levels in this chunk. pub num_levels: usize, - /// The number of values (Arrow array elements) in this chunk. + /// The start index into `non_null_indices` for this chunk. + pub value_offset: usize, + /// The number of `non_null_indices` entries in this chunk. pub num_values: usize, }