Skip to content

Commit 1b3f313

Browse files
authored
GH-49888: [C++][Compute] Fix count for run-end encoded arrays with nulls (#49908)
### Rationale for this change The `count` kernel used `GetNullCount()`, which reports the physical null count. For run-end encoded arrays, this ignored nulls in the encoded values child. ### What changes are included in this PR? Use `ComputeLogicalNullCount()` in the `count` kernel so run-end encoded arrays are counted correctly. Add C++ and Python tests for this case. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #49888 Authored-by: fenfeng9 <fenfeng9@qq.com> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent 61c96ca commit 1b3f313

3 files changed

Lines changed: 23 additions & 1 deletion

File tree

cpp/src/arrow/compute/kernels/aggregate_basic.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ struct CountImpl : public ScalarAggregator {
103103
this->non_nulls += batch.length;
104104
} else if (batch[0].is_array()) {
105105
const ArraySpan& input = batch[0].array;
106-
const int64_t nulls = input.GetNullCount();
106+
const int64_t nulls = input.ComputeLogicalNullCount();
107107
this->nulls += nulls;
108108
this->non_nulls += input.length - nulls;
109109
} else {

cpp/src/arrow/compute/kernels/aggregate_test.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,16 @@ TYPED_TEST(TestCountKernel, SimpleCount) {
941941
EXPECT_THAT(Count(*MakeScalar(ty, 1), all), ResultWith(Datum(int64_t(1))));
942942
}
943943

944+
TEST(TestCountKernel, RunEndEncodedNulls) {
945+
auto input = ArrayFromJSON(int32(), "[1, 1, null, null, null, 2, 2, 2, null, 3]");
946+
ASSERT_OK_AND_ASSIGN(auto encoded, RunEndEncode(input));
947+
948+
auto array = encoded.make_array();
949+
ValidateCount(*array, {6, 4});
950+
// Logical slice: [null, null, 2, 2, 2, null].
951+
ValidateCount(*array->Slice(3, 6), {3, 3});
952+
}
953+
944954
template <typename ArrowType>
945955
class TestRandomNumericCountKernel : public ::testing::Test {};
946956

python/pyarrow/tests/test_compute.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2849,6 +2849,18 @@ def test_count():
28492849
pc.count(arr, 'something else')
28502850

28512851

2852+
def test_count_run_end_encoded_nulls():
2853+
arr = pc.run_end_encode(
2854+
pa.array([1, 1, None, None, None, 2, 2, 2, None, 3]))
2855+
2856+
assert pc.count(arr, mode="only_valid").as_py() == 6
2857+
assert pc.count(arr, mode="only_null").as_py() == 4
2858+
assert pc.count(arr, mode="all").as_py() == 10
2859+
# Slice crosses run boundaries: logical [None, None, 2, 2, 2, None].
2860+
assert pc.count(arr.slice(3, 6), mode="only_valid").as_py() == 3
2861+
assert pc.count(arr.slice(3, 6), mode="only_null").as_py() == 3
2862+
2863+
28522864
def test_index():
28532865
arr = pa.array([0, 1, None, 3, 4], type=pa.int64())
28542866
assert pc.index(arr, pa.scalar(0)).as_py() == 0

0 commit comments

Comments
 (0)