Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 135 additions & 11 deletions parquet/src/encodings/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -770,15 +770,48 @@ where

// At this point we have read the deltas to `buffer` we now need to offset
// these to get back to the original values that were encoded
for v in &mut buffer[read..read + batch_read] {
//
// Optimization: if the bit_width for the miniblock is 0, then we can employ
// a faster decoding method than setting `value[i] = value[i-1] + value[i] + min_delta`.
// Where min_delta is 0 (all values in the miniblock are the same), we can simply
// set all values to `self.last_value`. In the case of non-zero min_delta (values
// in the mini-block form an arithmetic progression) each value can be computed via
// `value[i] = (i + 1) * min_delta + last_value`. In both cases we remove the
// dependence on the preceding value.
// Kudos to @pitrou for the idea https://github.com/apache/arrow/pull/49296
let min_delta = self.min_delta.as_i64()?;
if bit_width == 0 {
if min_delta == 0 {
Copy link
Copy Markdown
Contributor

@Dandandan Dandandan Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we repeat the min_delta==0 optimization below as well for bit_width > 0 ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can try. The theory, however, is that the speed up is from not having to use the previous value to calculate the current. I'll see if there's any speedup from avoiding the additional wrapping_add.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. It's good for 5-10% on my laptop.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, not bad...

buffer[read..read + batch_read].fill(self.last_value);
} else {
// the c++ version multiplies min_delta by the iter index, but doing
// wrapping_mul through T::T was a bit slower. this is still
// faster than before.
let mut delta = self.min_delta;
for v in &mut buffer[read..read + batch_read] {
*v = self.last_value.wrapping_add(&delta);
delta = delta.wrapping_add(&self.min_delta);
}

self.last_value = buffer[read + batch_read - 1];
}
} else {
// It is OK for deltas to contain "overflowed" values after encoding,
// e.g. i64::MAX - i64::MIN, so we use `wrapping_add` to "overflow" again and
// restore original value.
*v = v
.wrapping_add(&self.min_delta)
.wrapping_add(&self.last_value);

self.last_value = *v;
if min_delta == 0 {
for v in &mut buffer[read..read + batch_read] {
*v = v.wrapping_add(&self.last_value);
self.last_value = *v;
}
} else {
for v in &mut buffer[read..read + batch_read] {
*v = v
.wrapping_add(&self.min_delta)
.wrapping_add(&self.last_value);
self.last_value = *v;
}
}
}

read += batch_read;
Expand Down Expand Up @@ -840,12 +873,33 @@ where
));
}

for v in &mut skip_buffer[0..skip_count] {
*v = v
.wrapping_add(&self.min_delta)
.wrapping_add(&self.last_value);
// see commentary in self.get() above regarding optimizations
let min_delta = self.min_delta.as_i64()?;
if bit_width == 0 {
// if min_delta == 0, there's nothing to do. self.last_value is unchanged
if min_delta != 0 {
let mut delta = self.min_delta;
for v in &mut skip_buffer[0..skip_count] {
*v = self.last_value.wrapping_add(&delta);
delta = delta.wrapping_add(&self.min_delta);
}

self.last_value = skip_buffer[skip_count - 1];
}
} else if min_delta == 0 {
for v in &mut skip_buffer[0..skip_count] {
*v = v.wrapping_add(&self.last_value);

self.last_value = *v;
}
} else {
for v in &mut skip_buffer[0..skip_count] {
*v = v
.wrapping_add(&self.min_delta)
.wrapping_add(&self.last_value);

self.last_value = *v;
self.last_value = *v;
}
}

skip += mini_block_should_skip;
Expand Down Expand Up @@ -1802,6 +1856,76 @@ mod tests {
);
}

#[test]
fn test_delta_bit_packed_int32_single_value_large() {
let block_data = vec![3; 10240];
test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
}

#[test]
fn test_delta_bit_packed_int32_single_value_skip_large() {
let block_data = vec![3; 10240];
test_skip::<Int32Type>(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50);
test_skip::<Int32Type>(block_data, Encoding::DELTA_BINARY_PACKED, 5000);
}

#[test]
fn test_delta_bit_packed_int32_increasing_value_large() {
let block_data = (0i32..10240).collect();
test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
}

#[test]
fn test_delta_bit_packed_int32_increasing_value_skip_large() {
let block_data = (0i32..10240).collect::<Vec<i32>>();
test_skip::<Int32Type>(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50);
test_skip::<Int32Type>(block_data, Encoding::DELTA_BINARY_PACKED, 5000);
}

#[test]
fn test_delta_bit_packed_int32_stepped_value_large() {
let block_data = (0i32..10240).map(|i| i / 2).collect();
test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
}

#[test]
fn test_delta_bit_packed_int32_stepped_value_skip_large() {
let block_data = (0i32..10240).map(|i| i / 2).collect::<Vec<i32>>();
test_skip::<Int32Type>(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50);
test_skip::<Int32Type>(block_data, Encoding::DELTA_BINARY_PACKED, 5000);
}

#[test]
fn test_delta_bit_packed_int32_mixed_large() {
// should be enough for 4 mini-blocks plus a little so we get some
// mixed mini-blocks
const BLOCK_SIZE: i32 = 133;
let block1_data = (0..BLOCK_SIZE).map(|i| (i * 7) % 11).collect();
let block2_data = vec![3; BLOCK_SIZE as usize];
let block3_data = (0..BLOCK_SIZE).map(|i| (i * 5) % 13).collect();
let block4_data = (0..BLOCK_SIZE).collect();
let block5_data = (0..BLOCK_SIZE).map(|i| (i * 3) % 17).collect();
test_delta_bit_packed_decode::<Int32Type>(vec![
block1_data,
block2_data,
block3_data,
block4_data,
block5_data,
]);
}

#[test]
fn test_delta_bit_packed_int64_single_value_large() {
let block_data = vec![5; 10240];
test_delta_bit_packed_decode::<Int64Type>(vec![block_data]);
}

#[test]
fn test_delta_bit_packed_int64_increasing_value_large() {
let block_data = (0i64..10240).collect();
test_delta_bit_packed_decode::<Int64Type>(vec![block_data]);
}

#[test]
fn test_delta_byte_array_same_arrays() {
let data = vec![
Expand Down
Loading