Skip to content

Commit 11e76a3

Browse files
committed
Cache logical nulls of the array to avoid clones or repeated recomputation
1 parent 968f319 commit 11e76a3

File tree

1 file changed

+55
-2
lines changed

1 file changed

+55
-2
lines changed

parquet/src/arrow/arrow_writer/levels.rs

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -498,10 +498,10 @@ impl LevelInfoBuilder {
498498
def_levels.reserve(len);
499499
info.non_null_indices.reserve(len);
500500

501-
match info.array.logical_nulls() {
501+
match &info.logical_nulls {
502502
Some(nulls) => {
503503
assert!(range.end <= nulls.len());
504-
let nulls = nulls.into_inner();
504+
let nulls = nulls.inner();
505505
def_levels.extend(range.clone().map(|i| {
506506
// Safety: range.end was asserted to be in bounds earlier
507507
let valid = unsafe { nulls.value_unchecked(i) };
@@ -568,6 +568,9 @@ pub(crate) struct ArrayLevels {
568568

569569
/// The arrow array
570570
array: ArrayRef,
571+
572+
/// cached logical nulls of the array.
573+
logical_nulls: Option<NullBuffer>,
571574
}
572575

573576
impl PartialEq for ArrayLevels {
@@ -578,6 +581,7 @@ impl PartialEq for ArrayLevels {
578581
&& self.max_def_level == other.max_def_level
579582
&& self.max_rep_level == other.max_rep_level
580583
&& self.array.as_ref() == other.array.as_ref()
584+
&& self.logical_nulls.as_ref() == other.logical_nulls.as_ref()
581585
}
582586
}
583587
impl Eq for ArrayLevels {}
@@ -590,13 +594,16 @@ impl ArrayLevels {
590594
false => ctx.def_level,
591595
};
592596

597+
let logical_nulls = array.logical_nulls();
598+
593599
Self {
594600
def_levels: (max_def_level != 0).then(Vec::new),
595601
rep_levels: (max_rep_level != 0).then(Vec::new),
596602
non_null_indices: vec![],
597603
max_def_level,
598604
max_rep_level,
599605
array,
606+
logical_nulls,
600607
}
601608
}
602609

@@ -670,6 +677,7 @@ mod tests {
670677
max_def_level: 2,
671678
max_rep_level: 2,
672679
array: Arc::new(primitives),
680+
logical_nulls: None,
673681
};
674682
assert_eq!(&levels[0], &expected);
675683
}
@@ -690,6 +698,7 @@ mod tests {
690698
max_def_level: 0,
691699
max_rep_level: 0,
692700
array,
701+
logical_nulls: None,
693702
};
694703
assert_eq!(&levels[0], &expected_levels);
695704
}
@@ -709,13 +718,15 @@ mod tests {
709718
let levels = calculate_array_levels(&array, &field).unwrap();
710719
assert_eq!(levels.len(), 1);
711720

721+
let logical_nulls = array.logical_nulls();
712722
let expected_levels = ArrayLevels {
713723
def_levels: Some(vec![1, 0, 1, 1, 0]),
714724
rep_levels: None,
715725
non_null_indices: vec![0, 2, 3],
716726
max_def_level: 1,
717727
max_rep_level: 0,
718728
array,
729+
logical_nulls,
719730
};
720731
assert_eq!(&levels[0], &expected_levels);
721732
}
@@ -750,6 +761,7 @@ mod tests {
750761
max_def_level: 1,
751762
max_rep_level: 1,
752763
array: Arc::new(leaf_array),
764+
logical_nulls: None,
753765
};
754766
assert_eq!(&levels[0], &expected_levels);
755767

@@ -783,6 +795,7 @@ mod tests {
783795
max_def_level: 2,
784796
max_rep_level: 1,
785797
array: Arc::new(leaf_array),
798+
logical_nulls: None,
786799
};
787800
assert_eq!(&levels[0], &expected_levels);
788801
}
@@ -832,6 +845,7 @@ mod tests {
832845
max_def_level: 3,
833846
max_rep_level: 1,
834847
array: Arc::new(leaf),
848+
logical_nulls: None,
835849
};
836850

837851
assert_eq!(&levels[0], &expected_levels);
@@ -882,6 +896,7 @@ mod tests {
882896
max_def_level: 5,
883897
max_rep_level: 2,
884898
array: Arc::new(leaf),
899+
logical_nulls: None,
885900
};
886901

887902
assert_eq!(&levels[0], &expected_levels);
@@ -919,6 +934,7 @@ mod tests {
919934
max_def_level: 1,
920935
max_rep_level: 1,
921936
array: Arc::new(leaf),
937+
logical_nulls: None,
922938
};
923939
assert_eq!(&levels[0], &expected_levels);
924940

@@ -951,6 +967,7 @@ mod tests {
951967
max_def_level: 3,
952968
max_rep_level: 1,
953969
array: Arc::new(leaf),
970+
logical_nulls: None,
954971
};
955972
assert_eq!(&levels[0], &expected_levels);
956973

@@ -999,6 +1016,7 @@ mod tests {
9991016
max_def_level: 5,
10001017
max_rep_level: 2,
10011018
array: Arc::new(leaf),
1019+
logical_nulls: None,
10021020
};
10031021
assert_eq!(&levels[0], &expected_levels);
10041022
}
@@ -1031,13 +1049,15 @@ mod tests {
10311049
let levels = calculate_array_levels(&a_array, &a_field).unwrap();
10321050
assert_eq!(levels.len(), 1);
10331051

1052+
let logical_nulls = leaf.logical_nulls();
10341053
let expected_levels = ArrayLevels {
10351054
def_levels: Some(vec![3, 2, 3, 1, 0, 3]),
10361055
rep_levels: None,
10371056
non_null_indices: vec![0, 2, 5],
10381057
max_def_level: 3,
10391058
max_rep_level: 0,
10401059
array: leaf,
1060+
logical_nulls,
10411061
};
10421062
assert_eq!(&levels[0], &expected_levels);
10431063
}
@@ -1077,6 +1097,7 @@ mod tests {
10771097
max_def_level: 3,
10781098
max_rep_level: 1,
10791099
array: Arc::new(a_values),
1100+
logical_nulls: None,
10801101
};
10811102
assert_eq!(list_level, &expected_level);
10821103
}
@@ -1169,45 +1190,52 @@ mod tests {
11691190
max_def_level: 0,
11701191
max_rep_level: 0,
11711192
array: Arc::new(a),
1193+
logical_nulls: None,
11721194
};
11731195
assert_eq!(list_level, &expected_level);
11741196

11751197
// test "b" levels
11761198
let list_level = levels.get(1).unwrap();
11771199

1200+
let b_logical_nulls = b.logical_nulls();
11781201
let expected_level = ArrayLevels {
11791202
def_levels: Some(vec![1, 0, 0, 1, 1]),
11801203
rep_levels: None,
11811204
non_null_indices: vec![0, 3, 4],
11821205
max_def_level: 1,
11831206
max_rep_level: 0,
11841207
array: Arc::new(b),
1208+
logical_nulls: b_logical_nulls,
11851209
};
11861210
assert_eq!(list_level, &expected_level);
11871211

11881212
// test "d" levels
11891213
let list_level = levels.get(2).unwrap();
11901214

1215+
let d_logical_nulls = d.logical_nulls();
11911216
let expected_level = ArrayLevels {
11921217
def_levels: Some(vec![1, 1, 1, 2, 1]),
11931218
rep_levels: None,
11941219
non_null_indices: vec![3],
11951220
max_def_level: 2,
11961221
max_rep_level: 0,
11971222
array: Arc::new(d),
1223+
logical_nulls: d_logical_nulls,
11981224
};
11991225
assert_eq!(list_level, &expected_level);
12001226

12011227
// test "f" levels
12021228
let list_level = levels.get(3).unwrap();
12031229

1230+
let f_logical_nulls = f.logical_nulls();
12041231
let expected_level = ArrayLevels {
12051232
def_levels: Some(vec![3, 2, 3, 2, 3]),
12061233
rep_levels: None,
12071234
non_null_indices: vec![0, 2, 4],
12081235
max_def_level: 3,
12091236
max_rep_level: 0,
12101237
array: Arc::new(f),
1238+
logical_nulls: f_logical_nulls,
12111239
};
12121240
assert_eq!(list_level, &expected_level);
12131241
}
@@ -1303,6 +1331,7 @@ mod tests {
13031331
assert_eq!(levels.len(), 2);
13041332

13051333
let map = batch.column(0).as_map();
1334+
let map_keys_logical_nulls = map.keys().logical_nulls();
13061335

13071336
// test key levels
13081337
let list_level = &levels[0];
@@ -1314,11 +1343,13 @@ mod tests {
13141343
max_def_level: 1,
13151344
max_rep_level: 1,
13161345
array: map.keys().clone(),
1346+
logical_nulls: map_keys_logical_nulls,
13171347
};
13181348
assert_eq!(list_level, &expected_level);
13191349

13201350
// test values levels
13211351
let list_level = levels.get(1).unwrap();
1352+
let map_values_logical_nulls = map.values().logical_nulls();
13221353

13231354
let expected_level = ArrayLevels {
13241355
def_levels: Some(vec![2, 2, 2, 1, 2, 1, 2]),
@@ -1327,6 +1358,7 @@ mod tests {
13271358
max_def_level: 2,
13281359
max_rep_level: 1,
13291360
array: map.values().clone(),
1361+
logical_nulls: map_values_logical_nulls,
13301362
};
13311363
assert_eq!(list_level, &expected_level);
13321364
}
@@ -1405,13 +1437,15 @@ mod tests {
14051437
let levels = calculate_array_levels(rb.column(0), rb.schema().field(0)).unwrap();
14061438
let list_level = &levels[0];
14071439

1440+
let logical_nulls = values.logical_nulls();
14081441
let expected_level = ArrayLevels {
14091442
def_levels: Some(vec![4, 1, 0, 2, 2, 3, 4]),
14101443
rep_levels: Some(vec![0, 0, 0, 0, 1, 0, 0]),
14111444
non_null_indices: vec![0, 4],
14121445
max_def_level: 4,
14131446
max_rep_level: 1,
14141447
array: values,
1448+
logical_nulls,
14151449
};
14161450

14171451
assert_eq!(list_level, &expected_level);
@@ -1445,13 +1479,15 @@ mod tests {
14451479

14461480
assert_eq!(levels.len(), 1);
14471481

1482+
let logical_nulls = values.logical_nulls();
14481483
let expected_level = ArrayLevels {
14491484
def_levels: Some(vec![4, 4, 3, 2, 0, 4, 4, 0, 1]),
14501485
rep_levels: Some(vec![0, 1, 0, 0, 0, 0, 1, 0, 0]),
14511486
non_null_indices: vec![0, 1, 5, 6],
14521487
max_def_level: 4,
14531488
max_rep_level: 1,
14541489
array: values,
1490+
logical_nulls,
14551491
};
14561492

14571493
assert_eq!(&levels[0], &expected_level);
@@ -1530,24 +1566,28 @@ mod tests {
15301566

15311567
assert_eq!(levels.len(), 2);
15321568

1569+
let a1_logical_nulls = a1_values.logical_nulls();
15331570
let expected_level = ArrayLevels {
15341571
def_levels: Some(vec![0, 0, 1, 6, 5, 2, 3, 1]),
15351572
rep_levels: Some(vec![0, 0, 0, 0, 2, 0, 1, 0]),
15361573
non_null_indices: vec![1],
15371574
max_def_level: 6,
15381575
max_rep_level: 2,
15391576
array: a1_values,
1577+
logical_nulls: a1_logical_nulls,
15401578
};
15411579

15421580
assert_eq!(&levels[0], &expected_level);
15431581

1582+
let a2_logical_nulls = a2_values.logical_nulls();
15441583
let expected_level = ArrayLevels {
15451584
def_levels: Some(vec![0, 0, 1, 3, 2, 4, 1]),
15461585
rep_levels: Some(vec![0, 0, 0, 0, 0, 1, 0]),
15471586
non_null_indices: vec![4],
15481587
max_def_level: 4,
15491588
max_rep_level: 1,
15501589
array: a2_values,
1590+
logical_nulls: a2_logical_nulls,
15511591
};
15521592

15531593
assert_eq!(&levels[1], &expected_level);
@@ -1579,13 +1619,15 @@ mod tests {
15791619

15801620
let list_level = &levels[0];
15811621

1622+
let logical_nulls = values.logical_nulls();
15821623
let expected_level = ArrayLevels {
15831624
def_levels: Some(vec![0, 0, 3, 3]),
15841625
rep_levels: Some(vec![0, 0, 0, 1]),
15851626
non_null_indices: vec![6, 7],
15861627
max_def_level: 3,
15871628
max_rep_level: 1,
15881629
array: values,
1630+
logical_nulls,
15891631
};
15901632
assert_eq!(list_level, &expected_level);
15911633
}
@@ -1729,22 +1771,26 @@ mod tests {
17291771
let b_levels = &levels[1];
17301772

17311773
// [[{a: 1}, null], null, [null, null], [{a: null}, {a: 2}]]
1774+
let values_a_logical_nulls = values_a.logical_nulls();
17321775
let expected_a = ArrayLevels {
17331776
def_levels: Some(vec![4, 2, 0, 2, 2, 3, 4]),
17341777
rep_levels: Some(vec![0, 1, 0, 0, 1, 0, 1]),
17351778
non_null_indices: vec![0, 7],
17361779
max_def_level: 4,
17371780
max_rep_level: 1,
17381781
array: values_a,
1782+
logical_nulls: values_a_logical_nulls,
17391783
};
17401784
// [[{b: 2}, null], null, [null, null], [{b: 3}, {b: 4}]]
1785+
let values_b_logical_nulls = values_b.logical_nulls();
17411786
let expected_b = ArrayLevels {
17421787
def_levels: Some(vec![3, 2, 0, 2, 2, 3, 3]),
17431788
rep_levels: Some(vec![0, 1, 0, 0, 1, 0, 1]),
17441789
non_null_indices: vec![0, 6, 7],
17451790
max_def_level: 3,
17461791
max_rep_level: 1,
17471792
array: values_b,
1793+
logical_nulls: values_b_logical_nulls,
17481794
};
17491795

17501796
assert_eq!(a_levels, &expected_a);
@@ -1769,13 +1815,15 @@ mod tests {
17691815

17701816
let list_level = &levels[0];
17711817

1818+
let logical_nulls = values.logical_nulls();
17721819
let expected_level = ArrayLevels {
17731820
def_levels: Some(vec![1, 0, 1]),
17741821
rep_levels: Some(vec![0, 0, 0]),
17751822
non_null_indices: vec![],
17761823
max_def_level: 3,
17771824
max_rep_level: 1,
17781825
array: values,
1826+
logical_nulls,
17791827
};
17801828
assert_eq!(list_level, &expected_level);
17811829
}
@@ -1804,13 +1852,15 @@ mod tests {
18041852
builder.write(0..4);
18051853
let levels = builder.finish();
18061854

1855+
let logical_nulls = values.logical_nulls();
18071856
let expected_level = ArrayLevels {
18081857
def_levels: Some(vec![5, 4, 5, 2, 5, 3, 5, 5, 4, 4, 0]),
18091858
rep_levels: Some(vec![0, 2, 2, 1, 0, 1, 0, 2, 1, 2, 0]),
18101859
non_null_indices: vec![0, 2, 3, 4, 5],
18111860
max_def_level: 5,
18121861
max_rep_level: 2,
18131862
array: values,
1863+
logical_nulls,
18141864
};
18151865

18161866
assert_eq!(levels[0], expected_level);
@@ -1834,13 +1884,16 @@ mod tests {
18341884
let mut builder = levels(&item_field, dict.clone());
18351885
builder.write(0..4);
18361886
let levels = builder.finish();
1887+
1888+
let logical_nulls = dict.logical_nulls();
18371889
let expected_level = ArrayLevels {
18381890
def_levels: Some(vec![0, 0, 1, 1]),
18391891
rep_levels: None,
18401892
non_null_indices: vec![2, 3],
18411893
max_def_level: 1,
18421894
max_rep_level: 0,
18431895
array: Arc::new(dict),
1896+
logical_nulls,
18441897
};
18451898
assert_eq!(levels[0], expected_level);
18461899
}

0 commit comments

Comments
 (0)