diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs index 58430820a9b6..7da21e6dd091 100644 --- a/parquet/src/encodings/decoding.rs +++ b/parquet/src/encodings/decoding.rs @@ -770,15 +770,48 @@ where // At this point we have read the deltas to `buffer` we now need to offset // these to get back to the original values that were encoded - for v in &mut buffer[read..read + batch_read] { + // + // Optimization: if the bit_width for the miniblock is 0, then we can employ + // a faster decoding method than setting `value[i] = value[i-1] + value[i] + min_delta`. + // Where min_delta is 0 (all values in the miniblock are the same), we can simply + // set all values to `self.last_value`. In the case of non-zero min_delta (values + // in the mini-block form an arithmetic progression) each value can be computed via + // `value[i] = (i + 1) * min_delta + last_value`. In both cases we remove the + // dependence on the preceding value. + // Kudos to @pitrou for the idea https://github.com/apache/arrow/pull/49296 + let min_delta = self.min_delta.as_i64()?; + if bit_width == 0 { + if min_delta == 0 { + buffer[read..read + batch_read].fill(self.last_value); + } else { + // the c++ version multiplies min_delta by the iter index, but doing + // wrapping_mul through T::T was a bit slower. this is still + // faster than before. + let mut delta = self.min_delta; + for v in &mut buffer[read..read + batch_read] { + *v = self.last_value.wrapping_add(&delta); + delta = delta.wrapping_add(&self.min_delta); + } + + self.last_value = buffer[read + batch_read - 1]; + } + } else { // It is OK for deltas to contain "overflowed" values after encoding, // e.g. i64::MAX - i64::MIN, so we use `wrapping_add` to "overflow" again and // restore original value. - *v = v - .wrapping_add(&self.min_delta) - .wrapping_add(&self.last_value); - - self.last_value = *v; + if min_delta == 0 { + for v in &mut buffer[read..read + batch_read] { + *v = v.wrapping_add(&self.last_value); + self.last_value = *v; + } + } else { + for v in &mut buffer[read..read + batch_read] { + *v = v + .wrapping_add(&self.min_delta) + .wrapping_add(&self.last_value); + self.last_value = *v; + } + } } read += batch_read; @@ -840,12 +873,33 @@ where )); } - for v in &mut skip_buffer[0..skip_count] { - *v = v - .wrapping_add(&self.min_delta) - .wrapping_add(&self.last_value); + // see commentary in self.get() above regarding optimizations + let min_delta = self.min_delta.as_i64()?; + if bit_width == 0 { + // if min_delta == 0, there's nothing to do. self.last_value is unchanged + if min_delta != 0 { + let mut delta = self.min_delta; + for v in &mut skip_buffer[0..skip_count] { + *v = self.last_value.wrapping_add(&delta); + delta = delta.wrapping_add(&self.min_delta); + } + + self.last_value = skip_buffer[skip_count - 1]; + } + } else if min_delta == 0 { + for v in &mut skip_buffer[0..skip_count] { + *v = v.wrapping_add(&self.last_value); + + self.last_value = *v; + } + } else { + for v in &mut skip_buffer[0..skip_count] { + *v = v + .wrapping_add(&self.min_delta) + .wrapping_add(&self.last_value); - self.last_value = *v; + self.last_value = *v; + } } skip += mini_block_should_skip; @@ -1802,6 +1856,76 @@ mod tests { ); } + #[test] + fn test_delta_bit_packed_int32_single_value_large() { + let block_data = vec![3; 10240]; + test_delta_bit_packed_decode::(vec![block_data]); + } + + #[test] + fn test_delta_bit_packed_int32_single_value_skip_large() { + let block_data = vec![3; 10240]; + test_skip::(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50); + test_skip::(block_data, Encoding::DELTA_BINARY_PACKED, 5000); + } + + #[test] + fn test_delta_bit_packed_int32_increasing_value_large() { + let block_data = (0i32..10240).collect(); + test_delta_bit_packed_decode::(vec![block_data]); + } + + #[test] + fn test_delta_bit_packed_int32_increasing_value_skip_large() { + let block_data = (0i32..10240).collect::>(); + test_skip::(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50); + test_skip::(block_data, Encoding::DELTA_BINARY_PACKED, 5000); + } + + #[test] + fn test_delta_bit_packed_int32_stepped_value_large() { + let block_data = (0i32..10240).map(|i| i / 2).collect(); + test_delta_bit_packed_decode::(vec![block_data]); + } + + #[test] + fn test_delta_bit_packed_int32_stepped_value_skip_large() { + let block_data = (0i32..10240).map(|i| i / 2).collect::>(); + test_skip::(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50); + test_skip::(block_data, Encoding::DELTA_BINARY_PACKED, 5000); + } + + #[test] + fn test_delta_bit_packed_int32_mixed_large() { + // should be enough for 4 mini-blocks plus a little so we get some + // mixed mini-blocks + const BLOCK_SIZE: i32 = 133; + let block1_data = (0..BLOCK_SIZE).map(|i| (i * 7) % 11).collect(); + let block2_data = vec![3; BLOCK_SIZE as usize]; + let block3_data = (0..BLOCK_SIZE).map(|i| (i * 5) % 13).collect(); + let block4_data = (0..BLOCK_SIZE).collect(); + let block5_data = (0..BLOCK_SIZE).map(|i| (i * 3) % 17).collect(); + test_delta_bit_packed_decode::(vec![ + block1_data, + block2_data, + block3_data, + block4_data, + block5_data, + ]); + } + + #[test] + fn test_delta_bit_packed_int64_single_value_large() { + let block_data = vec![5; 10240]; + test_delta_bit_packed_decode::(vec![block_data]); + } + + #[test] + fn test_delta_bit_packed_int64_increasing_value_large() { + let block_data = (0i64..10240).collect(); + test_delta_bit_packed_decode::(vec![block_data]); + } + #[test] fn test_delta_byte_array_same_arrays() { let data = vec![