@@ -484,27 +484,64 @@ Comparator<CType>* GetComparator(CompareOperator op) {
484484}
485485
486486template <typename T, typename Fn, typename CType = typename TypeTraits<T>::CType>
487- std::shared_ptr<Array> CompareAndFilter (const CType* data, int64_t length, Fn&& fn) {
487+ std::shared_ptr<Array> CompareAndFilter (const std::shared_ptr<Array>& array, Fn&& fn) {
488+ using ArrayType = typename TypeTraits<T>::ArrayType;
489+ auto typed_array = checked_pointer_cast<ArrayType>(array);
490+
488491 std::vector<CType> filtered;
489- filtered.reserve (length);
490- std::copy_if (data, data + length, std::back_inserter (filtered), std::forward<Fn>(fn));
492+ filtered.reserve (array->length ());
493+
494+ for (int64_t i = 0 ; i < array->length (); ++i) {
495+ if (array->IsNull (i)) {
496+ // Nulls are filtered out (comparison with null is false)
497+ continue ;
498+ }
499+ CType value = typed_array->Value (i);
500+ if (fn (value)) {
501+ filtered.push_back (value);
502+ }
503+ }
504+
491505 std::shared_ptr<Array> filtered_array;
492506 ArrayFromVector<T, CType>(filtered, &filtered_array);
493507 return filtered_array;
494508}
495509
496510template <typename T, typename CType = typename TypeTraits<T>::CType>
497- std::shared_ptr<Array> CompareAndFilter (const CType* data, int64_t length , CType val,
511+ std::shared_ptr<Array> CompareAndFilter (const std::shared_ptr<Array>& array , CType val,
498512 CompareOperator op) {
499513 auto cmp = GetComparator<CType>(op);
500- return CompareAndFilter<T>(data, length , [&](CType e) { return cmp (e, val); });
514+ return CompareAndFilter<T>(array , [&](CType e) { return cmp (e, val); });
501515}
502516
503- template <typename T, typename CType = typename TypeTraits<T>::CType>
504- std::shared_ptr<Array> CompareAndFilter (const CType* data, int64_t length,
505- const CType* other, CompareOperator op) {
517+ template <typename T>
518+ std::shared_ptr<Array> CompareAndFilter (const std::shared_ptr<Array>& lhs,
519+ const std::shared_ptr<Array>& rhs,
520+ CompareOperator op) {
521+ using ArrayType = typename TypeTraits<T>::ArrayType;
522+ using CType = typename TypeTraits<T>::CType;
523+ auto lhs_typed = checked_pointer_cast<ArrayType>(lhs);
524+ auto rhs_typed = checked_pointer_cast<ArrayType>(rhs);
506525 auto cmp = GetComparator<CType>(op);
507- return CompareAndFilter<T>(data, length, [&](CType e) { return cmp (e, *other++); });
526+
527+ std::vector<CType> filtered;
528+ filtered.reserve (lhs->length ());
529+
530+ for (int64_t i = 0 ; i < lhs->length (); ++i) {
531+ // Skip if either element is null
532+ if (lhs->IsNull (i) || rhs->IsNull (i)) {
533+ continue ;
534+ }
535+ CType lhs_value = lhs_typed->Value (i);
536+ CType rhs_value = rhs_typed->Value (i);
537+ if (cmp (lhs_value, rhs_value)) {
538+ filtered.push_back (lhs_value);
539+ }
540+ }
541+
542+ std::shared_ptr<Array> filtered_array;
543+ ArrayFromVector<T, CType>(filtered, &filtered_array);
544+ return filtered_array;
508545}
509546
510547TYPED_TEST (TestFilterKernelWithNumeric, CompareScalarAndFilterRandomNumeric) {
@@ -513,11 +550,13 @@ TYPED_TEST(TestFilterKernelWithNumeric, CompareScalarAndFilterRandomNumeric) {
513550 using CType = typename TypeTraits<TypeParam>::CType;
514551
515552 auto rand = random::RandomArrayGenerator (kRandomSeed );
553+ std::default_random_engine gen (kRandomSeed );
554+ ::arrow::random::uniform_real_distribution<double > null_dist (0.0 , 1.0 );
516555 for (size_t i = 3 ; i < 10 ; i++) {
517556 const int64_t length = static_cast <int64_t >(1ULL << i);
518- // TODO(bkietz) rewrite with some nulls
519- auto array =
520- checked_pointer_cast<ArrayType>( rand.Numeric <TypeParam>(length, 0 , 100 , 0 ));
557+ double null_probability = null_dist (gen);
558+ auto array = checked_pointer_cast<ArrayType>(
559+ rand.Numeric <TypeParam>(length, 0 , 100 , null_probability ));
521560 CType c_fifty = 50 ;
522561 auto fifty = std::make_shared<ScalarType>(c_fifty);
523562 for (auto op : {EQUAL, NOT_EQUAL, GREATER, LESS_EQUAL}) {
@@ -527,8 +566,7 @@ TYPED_TEST(TestFilterKernelWithNumeric, CompareScalarAndFilterRandomNumeric) {
527566 ASSERT_OK_AND_ASSIGN (Datum filtered, Filter (array, selection));
528567 auto filtered_array = filtered.make_array ();
529568 ValidateOutput (*filtered_array);
530- auto expected =
531- CompareAndFilter<TypeParam>(array->raw_values (), array->length (), c_fifty, op);
569+ auto expected = CompareAndFilter<TypeParam>(array, c_fifty, op);
532570 ASSERT_ARRAYS_EQUAL (*filtered_array, *expected);
533571 }
534572 }
@@ -538,20 +576,23 @@ TYPED_TEST(TestFilterKernelWithNumeric, CompareArrayAndFilterRandomNumeric) {
538576 using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
539577
540578 auto rand = random::RandomArrayGenerator (kRandomSeed );
579+ std::default_random_engine gen (kRandomSeed );
580+ ::arrow::random::uniform_real_distribution<double > null_dist (0.0 , 1.0 );
541581 for (size_t i = 3 ; i < 10 ; i++) {
542582 const int64_t length = static_cast <int64_t >(1ULL << i);
583+ double null_probability_lhs = null_dist (gen);
584+ double null_probability_rhs = null_dist (gen);
543585 auto lhs = checked_pointer_cast<ArrayType>(
544- rand.Numeric <TypeParam>(length, 0 , 100 , /* null_probability= */ 0.0 ));
586+ rand.Numeric <TypeParam>(length, 0 , 100 , null_probability_lhs ));
545587 auto rhs = checked_pointer_cast<ArrayType>(
546- rand.Numeric <TypeParam>(length, 0 , 100 , /* null_probability= */ 0.0 ));
588+ rand.Numeric <TypeParam>(length, 0 , 100 , null_probability_rhs ));
547589 for (auto op : {EQUAL, NOT_EQUAL, GREATER, LESS_EQUAL}) {
548590 ASSERT_OK_AND_ASSIGN (Datum selection,
549591 CallFunction (CompareOperatorToFunctionName (op), {lhs, rhs}));
550592 ASSERT_OK_AND_ASSIGN (Datum filtered, Filter (lhs, selection));
551593 auto filtered_array = filtered.make_array ();
552594 ValidateOutput (*filtered_array);
553- auto expected = CompareAndFilter<TypeParam>(lhs->raw_values (), lhs->length (),
554- rhs->raw_values (), op);
595+ auto expected = CompareAndFilter<TypeParam>(lhs, rhs, op);
555596 ASSERT_ARRAYS_EQUAL (*filtered_array, *expected);
556597 }
557598 }
@@ -563,10 +604,13 @@ TYPED_TEST(TestFilterKernelWithNumeric, ScalarInRangeAndFilterRandomNumeric) {
563604 using CType = typename TypeTraits<TypeParam>::CType;
564605
565606 auto rand = random::RandomArrayGenerator (kRandomSeed );
607+ std::default_random_engine gen (kRandomSeed );
608+ ::arrow::random::uniform_real_distribution<double > null_dist (0.0 , 1.0 );
566609 for (size_t i = 3 ; i < 10 ; i++) {
567610 const int64_t length = static_cast <int64_t >(1ULL << i);
611+ double null_probability = null_dist (gen);
568612 auto array = checked_pointer_cast<ArrayType>(
569- rand.Numeric <TypeParam>(length, 0 , 100 , /* null_probability= */ 0.0 ));
613+ rand.Numeric <TypeParam>(length, 0 , 100 , null_probability));
570614 CType c_fifty = 50 , c_hundred = 100 ;
571615 auto fifty = std::make_shared<ScalarType>(c_fifty);
572616 auto hundred = std::make_shared<ScalarType>(c_hundred);
@@ -579,8 +623,7 @@ TYPED_TEST(TestFilterKernelWithNumeric, ScalarInRangeAndFilterRandomNumeric) {
579623 auto filtered_array = filtered.make_array ();
580624 ValidateOutput (*filtered_array);
581625 auto expected = CompareAndFilter<TypeParam>(
582- array->raw_values (), array->length (),
583- [&](CType e) { return (e > c_fifty) && (e < c_hundred); });
626+ array, [&](CType e) { return (e > c_fifty) && (e < c_hundred); });
584627 ASSERT_ARRAYS_EQUAL (*filtered_array, *expected);
585628 }
586629}
0 commit comments