apache · alamb · Mar 18, 2026 · Feb 24, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs
@@ -770,15 +770,48 @@ where
 
             // At this point we have read the deltas to `buffer` we now need to offset
             // these to get back to the original values that were encoded
-            for v in &mut buffer[read..read + batch_read] {
+            //
+            // Optimization: if the bit_width for the miniblock is 0, then we can employ
+            // a faster decoding method than setting `value[i] = value[i-1] + value[i] + min_delta`.
+            // Where min_delta is 0 (all values in the miniblock are the same), we can simply
+            // set all values to `self.last_value`. In the case of non-zero min_delta (values
+            // in the mini-block form an arithmetic progression) each value can be computed via
+            // `value[i] = (i + 1) * min_delta + last_value`. In both cases we remove the
+            // dependence on the preceding value.
+            // Kudos to @pitrou for the idea https://github.com/apache/arrow/pull/49296
+            let min_delta = self.min_delta.as_i64()?;
+            if bit_width == 0 {
+                if min_delta == 0 {
+                    buffer[read..read + batch_read].fill(self.last_value);
+                } else {
+                    // the c++ version multiplies min_delta by the iter index, but doing
+                    // wrapping_mul through T::T was a bit slower. this is still
+                    // faster than before.
+                    let mut delta = self.min_delta;
+                    for v in &mut buffer[read..read + batch_read] {
+                        *v = self.last_value.wrapping_add(&delta);
+                        delta = delta.wrapping_add(&self.min_delta);
+                    }
+
+                    self.last_value = buffer[read + batch_read - 1];
+                }
+            } else {
                 // It is OK for deltas to contain "overflowed" values after encoding,
                 // e.g. i64::MAX - i64::MIN, so we use `wrapping_add` to "overflow" again and
                 // restore original value.
-                *v = v
-                    .wrapping_add(&self.min_delta)
-                    .wrapping_add(&self.last_value);
-
-                self.last_value = *v;
+                if min_delta == 0 {
+                    for v in &mut buffer[read..read + batch_read] {
+                        *v = v.wrapping_add(&self.last_value);
+                        self.last_value = *v;
+                    }
+                } else {
+                    for v in &mut buffer[read..read + batch_read] {
+                        *v = v
+                            .wrapping_add(&self.min_delta)
+                            .wrapping_add(&self.last_value);
+                        self.last_value = *v;
+                    }
+                }
             }
 
             read += batch_read;
@@ -840,12 +873,33 @@ where
                 ));
             }
 
-            for v in &mut skip_buffer[0..skip_count] {
-                *v = v
-                    .wrapping_add(&self.min_delta)
-                    .wrapping_add(&self.last_value);
+            // see commentary in self.get() above regarding optimizations
+            let min_delta = self.min_delta.as_i64()?;
+            if bit_width == 0 {
+                // if min_delta == 0, there's nothing to do. self.last_value is unchanged
+                if min_delta != 0 {
+                    let mut delta = self.min_delta;
+                    for v in &mut skip_buffer[0..skip_count] {
+                        *v = self.last_value.wrapping_add(&delta);
+                        delta = delta.wrapping_add(&self.min_delta);
+                    }
+
+                    self.last_value = skip_buffer[skip_count - 1];
+                }
+            } else if min_delta == 0 {
+                for v in &mut skip_buffer[0..skip_count] {
+                    *v = v.wrapping_add(&self.last_value);
+
+                    self.last_value = *v;
+                }
+            } else {
+                for v in &mut skip_buffer[0..skip_count] {
+                    *v = v
+                        .wrapping_add(&self.min_delta)
+                        .wrapping_add(&self.last_value);
 
-                self.last_value = *v;
+                    self.last_value = *v;
+                }
             }
 
             skip += mini_block_should_skip;
@@ -1802,6 +1856,76 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_delta_bit_packed_int32_single_value_large() {
+        let block_data = vec![3; 10240];
+        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_single_value_skip_large() {
+        let block_data = vec![3; 10240];
+        test_skip::<Int32Type>(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50);
+        test_skip::<Int32Type>(block_data, Encoding::DELTA_BINARY_PACKED, 5000);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_increasing_value_large() {
+        let block_data = (0i32..10240).collect();
+        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_increasing_value_skip_large() {
+        let block_data = (0i32..10240).collect::<Vec<i32>>();
+        test_skip::<Int32Type>(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50);
+        test_skip::<Int32Type>(block_data, Encoding::DELTA_BINARY_PACKED, 5000);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_stepped_value_large() {
+        let block_data = (0i32..10240).map(|i| i / 2).collect();
+        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_stepped_value_skip_large() {
+        let block_data = (0i32..10240).map(|i| i / 2).collect::<Vec<i32>>();
+        test_skip::<Int32Type>(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50);
+        test_skip::<Int32Type>(block_data, Encoding::DELTA_BINARY_PACKED, 5000);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_mixed_large() {
+        // should be enough for 4 mini-blocks plus a little so we get some
+        // mixed mini-blocks
+        const BLOCK_SIZE: i32 = 133;
+        let block1_data = (0..BLOCK_SIZE).map(|i| (i * 7) % 11).collect();
+        let block2_data = vec![3; BLOCK_SIZE as usize];
+        let block3_data = (0..BLOCK_SIZE).map(|i| (i * 5) % 13).collect();
+        let block4_data = (0..BLOCK_SIZE).collect();
+        let block5_data = (0..BLOCK_SIZE).map(|i| (i * 3) % 17).collect();
+        test_delta_bit_packed_decode::<Int32Type>(vec![
+            block1_data,
+            block2_data,
+            block3_data,
+            block4_data,
+            block5_data,
+        ]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int64_single_value_large() {
+        let block_data = vec![5; 10240];
+        test_delta_bit_packed_decode::<Int64Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int64_increasing_value_large() {
+        let block_data = (0i64..10240).collect();
+        test_delta_bit_packed_decode::<Int64Type>(vec![block_data]);
+    }
+
     #[test]
     fn test_delta_byte_array_same_arrays() {
         let data = vec![