diff --git a/.github/actions/check/action.yml b/.github/actions/check/action.yml index 0348c8d1a5876..b1386a28a381f 100644 --- a/.github/actions/check/action.yml +++ b/.github/actions/check/action.yml @@ -42,8 +42,6 @@ runs: taplo check taplo fmt --check - - # TODO: enable it later # - name: Audit dependencies # shell: bash # if: "!contains(github.event.head_commit.message, 'skip audit')" diff --git a/src/common/column/src/bitmap/immutable.rs b/src/common/column/src/bitmap/immutable.rs index 87b1d00c52e79..c7f24a1434b28 100644 --- a/src/common/column/src/bitmap/immutable.rs +++ b/src/common/column/src/bitmap/immutable.rs @@ -190,6 +190,10 @@ impl Bitmap { self.unset_bits } + pub fn true_count(&self) -> usize { + self.len() - self.unset_bits + } + /// Slices `self`, offsetting by `offset` and truncating up to `length` bits. /// # Panic /// Panics iff `offset + length > self.length`, i.e. if the offset and `length` @@ -460,25 +464,13 @@ impl Bitmap { MutableBitmap::from_trusted_len_iter(iterator).into() } - /// Creates a new [`Bitmap`] from a fallible iterator of booleans. - #[inline] - pub fn try_from_trusted_len_iter>>( - iterator: I, - ) -> std::result::Result { - Ok(MutableBitmap::try_from_trusted_len_iter(iterator)?.into()) - } - - /// Creates a new [`Bitmap`] from a fallible iterator of booleans. - /// # Safety - /// The iterator must report an accurate length. + /// Invokes `f` with values `0..len` collecting the boolean results into a new `MutableBuffer` + /// + /// This is similar to `from_trusted_len_iter`, however, can be significantly faster + /// as it eliminates the conditional `Iterator::next` #[inline] - pub unsafe fn try_from_trusted_len_iter_unchecked< - E, - I: Iterator>, - >( - iterator: I, - ) -> std::result::Result { - Ok(MutableBitmap::try_from_trusted_len_iter_unchecked(iterator)?.into()) + pub fn collect_bool bool>(len: usize, f: F) -> Self { + MutableBitmap::collect_bool(len, f).into() } /// Create a new [`Bitmap`] from an arrow [`NullBuffer`] @@ -499,6 +491,14 @@ impl Bitmap { pub fn into_array_data(&self) -> ArrayData { ArrayData::from(self) } + + pub fn map_all_sets_to_none(v: Option) -> Option { + match v { + Some(v) if v.unset_bits == 0 => None, + None => None, + other => other, + } + } } impl<'a> IntoIterator for &'a Bitmap { diff --git a/src/common/column/src/bitmap/mutable.rs b/src/common/column/src/bitmap/mutable.rs index 1d19016cf4944..53da95b83aab8 100644 --- a/src/common/column/src/bitmap/mutable.rs +++ b/src/common/column/src/bitmap/mutable.rs @@ -19,6 +19,8 @@ use std::iter::TrustedLen; use std::ops::Range; use std::sync::Arc; +use databend_common_base::vec_ext::VecExt; + use super::utils::count_zeros; use super::utils::fmt; use super::utils::get_bit; @@ -266,7 +268,7 @@ impl MutableBitmap { #[inline] pub unsafe fn push_unchecked(&mut self, value: bool) { if self.length % 8 == 0 { - self.buffer.push(0); + self.buffer.push_unchecked(0); } let byte = self.buffer.as_mut_slice().last_mut().unwrap(); *byte = set(*byte, self.length % 8, value); @@ -467,24 +469,11 @@ impl FromIterator for MutableBitmap { /// The iterator must be trustedLen and its len must be least `len`. #[inline] unsafe fn get_chunk_unchecked(iterator: &mut impl Iterator) -> u64 { - let mut byte = 0u64; - let mut mask; - for i in 0..8 { - mask = 1u64 << (8 * i); - for _ in 0..8 { - let value = match iterator.next() { - Some(value) => value, - None => unsafe { unreachable_unchecked() }, - }; - - byte |= match value { - true => mask, - false => 0, - }; - mask <<= 1; - } + let mut packed = 0; + for bit_idx in 0..64 { + packed |= (iterator.next().unwrap() as u64) << bit_idx; } - byte + packed } /// # Safety @@ -521,7 +510,7 @@ unsafe fn extend_aligned_trusted_iter_unchecked( let remainder = additional_bits % 64; let additional = (additional_bits + 7) / 8; - assert_eq!( + debug_assert_eq!( additional, // a hint of how the following calculation will be done chunks * 8 + remainder / 8 + (remainder % 8 > 0) as usize @@ -531,20 +520,20 @@ unsafe fn extend_aligned_trusted_iter_unchecked( // chunks of 64 bits for _ in 0..chunks { let chunk = get_chunk_unchecked(&mut iterator); - buffer.extend_from_slice(&chunk.to_le_bytes()); + buffer.extend_from_slice_unchecked(&chunk.to_le_bytes()); } // remaining complete bytes for _ in 0..(remainder / 8) { let byte = unsafe { get_byte_unchecked(8, &mut iterator) }; - buffer.push(byte) + buffer.push_unchecked(byte) } // remaining bits let remainder = remainder % 8; if remainder > 0 { let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) }; - buffer.push(byte) + buffer.push_unchecked(byte) } additional_bits } @@ -626,45 +615,6 @@ impl MutableBitmap { unsafe { Self::from_trusted_len_iter_unchecked(iterator) } } - /// Creates a new [`MutableBitmap`] from an iterator of booleans. - pub fn try_from_trusted_len_iter(iterator: I) -> std::result::Result - where I: TrustedLen> { - unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) } - } - - /// Creates a new [`MutableBitmap`] from an falible iterator of booleans. - /// # Safety - /// The caller must guarantee that the iterator is `TrustedLen`. - pub unsafe fn try_from_trusted_len_iter_unchecked( - mut iterator: I, - ) -> std::result::Result - where I: Iterator> { - let length = iterator.size_hint().1.unwrap(); - - let mut buffer = vec![0u8; (length + 7) / 8]; - - let chunks = length / 8; - let reminder = length % 8; - - let data = buffer.as_mut_slice(); - data[..chunks].iter_mut().try_for_each(|byte| { - (0..8).try_for_each(|i| { - *byte = set(*byte, i, iterator.next().unwrap()?); - Ok(()) - }) - })?; - - if reminder != 0 { - let last = &mut data[chunks]; - iterator.enumerate().try_for_each(|(i, value)| { - *last = set(*last, i, value?); - Ok(()) - })?; - } - - Ok(Self { buffer, length }) - } - fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) { // e.g. // [a, b, --101010] <- to be extended @@ -764,6 +714,45 @@ impl MutableBitmap { } } + /// Invokes `f` with values `0..len` collecting the boolean results into a new `MutableBuffer` + /// + /// This is similar to `from_trusted_len_iter`, however, can be significantly faster + /// as it eliminates the conditional `Iterator::next` + #[inline] + pub fn collect_bool bool>(len: usize, mut f: F) -> Self { + let mut buffer = Vec::with_capacity(len.div_ceil(64) * 8); + + let chunks = len / 64; + let remainder = len % 64; + for chunk in 0..chunks { + let mut packed = 0; + for bit_idx in 0..64 { + let i = bit_idx + chunk * 64; + packed |= (f(i) as u64) << bit_idx; + } + + // SAFETY: Already allocated sufficient capacity + unsafe { buffer.extend_from_slice_unchecked(&packed.to_le_bytes()) } + } + + if remainder != 0 { + let mut packed = 0; + for bit_idx in 0..remainder { + let i = bit_idx + chunks * 64; + packed |= (f(i) as u64) << bit_idx; + } + + // SAFETY: Already allocated sufficient capacity + unsafe { buffer.extend_from_slice_unchecked(&packed.to_le_bytes()) } + } + + buffer.truncate(len.div_ceil(8)); + Self { + buffer, + length: len, + } + } + /// Returns the slice of bytes of this [`MutableBitmap`]. /// Note that the last byte may not be fully used. #[inline] diff --git a/src/query/expression/benches/bench.rs b/src/query/expression/benches/bench.rs index 4bd61dfaed09d..4aa2386685c2d 100644 --- a/src/query/expression/benches/bench.rs +++ b/src/query/expression/benches/bench.rs @@ -15,8 +15,11 @@ #[macro_use] extern crate criterion; +use arrow_buffer::BooleanBuffer; use arrow_buffer::ScalarBuffer; use criterion::Criterion; +use databend_common_base::vec_ext::VecExt; +use databend_common_column::bitmap::Bitmap; use databend_common_column::buffer::Buffer; use databend_common_expression::arrow::deserialize_column; use databend_common_expression::arrow::serialize_column; @@ -200,6 +203,18 @@ fn bench(c: &mut Criterion) { }, ); + group.bench_function( + format!("function_buffer_index_unchecked_push/{length}"), + |b| { + b.iter(|| { + let mut c = Vec::with_capacity(length); + for i in 0..length { + unsafe { c.push_unchecked(left.get_unchecked(i) + right.get_unchecked(i)) }; + } + }) + }, + ); + group.bench_function( format!("function_buffer_scalar_index_unchecked_iterator/{length}"), |b| { @@ -271,6 +286,27 @@ fn bench(c: &mut Criterion) { let _c = Int32Type::column_from_iter(iter, &[]); }) }); + + group.bench_function(format!("bitmap_from_arrow1_collect_bool/{length}"), |b| { + b.iter(|| { + let buffer = collect_bool(length, false, |x| x % 2 == 0); + assert!(buffer.count_set_bits() == length / 2); + }) + }); + + group.bench_function(format!("bitmap_from_arrow2_collect_bool/{length}"), |b| { + b.iter(|| { + let nulls = Bitmap::collect_bool(length, |x| x % 2 == 0); + assert!(nulls.null_count() == length / 2); + }) + }); + + group.bench_function(format!("bitmap_from_arrow2/{length}"), |b| { + b.iter(|| { + let nulls = Bitmap::from_trusted_len_iter((0..length).map(|x| x % 2 == 0)); + assert!(nulls.null_count() == length / 2); + }) + }); } } @@ -303,3 +339,42 @@ fn generate_random_int_data(rng: &mut StdRng, length: usize) -> (Buffer, Bu let b: Buffer = (0..length).map(|_| rng.gen_range(-1000..1000)).collect(); (s, b) } + +/// Invokes `f` with values `0..len` collecting the boolean results into a new `BooleanBuffer` +/// +/// This is similar to [`MutableBuffer::collect_bool`] but with +/// the option to efficiently negate the result +fn collect_bool(len: usize, neg: bool, f: impl Fn(usize) -> bool) -> BooleanBuffer { + let mut buffer = arrow_buffer::MutableBuffer::new(arrow_buffer::bit_util::ceil(len, 64) * 8); + + let chunks = len / 64; + let remainder = len % 64; + for chunk in 0..chunks { + let mut packed = 0; + for bit_idx in 0..64 { + let i = bit_idx + chunk * 64; + packed |= (f(i) as u64) << bit_idx; + } + if neg { + packed = !packed + } + + // SAFETY: Already allocated sufficient capacity + unsafe { buffer.push_unchecked(packed) } + } + + if remainder != 0 { + let mut packed = 0; + for bit_idx in 0..remainder { + let i = bit_idx + chunks * 64; + packed |= (f(i) as u64) << bit_idx; + } + if neg { + packed = !packed + } + + // SAFETY: Already allocated sufficient capacity + unsafe { buffer.push_unchecked(packed) } + } + BooleanBuffer::new(buffer.into(), 0, len) +} diff --git a/src/query/expression/src/lib.rs b/src/query/expression/src/lib.rs index 8ecea5372657e..cec48c7e4fbba 100755 --- a/src/query/expression/src/lib.rs +++ b/src/query/expression/src/lib.rs @@ -54,6 +54,7 @@ mod input_columns; mod kernels; mod property; mod register; +mod register_comparison; pub mod row; pub mod schema; pub mod type_check; @@ -74,6 +75,7 @@ pub use crate::input_columns::*; pub use crate::kernels::*; pub use crate::property::*; pub use crate::register::*; +pub use crate::register_comparison::*; pub use crate::row::*; pub use crate::schema::*; pub use crate::utils::block_thresholds::BlockThresholds; diff --git a/src/query/expression/src/register_comparison.rs b/src/query/expression/src/register_comparison.rs new file mode 100755 index 0000000000000..e40e4ea964bae --- /dev/null +++ b/src/query/expression/src/register_comparison.rs @@ -0,0 +1,97 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This code is generated by src/query/codegen/src/writes/register.rs. DO NOT EDIT. + +#![allow(unused_parens)] +#![allow(unused_variables)] +#![allow(clippy::redundant_closure)] +use crate::types::*; +use crate::values::Value; +use crate::EvalContext; +use crate::FunctionContext; +use crate::FunctionDomain; +use crate::FunctionRegistry; + +impl FunctionRegistry { + pub fn register_comparison_2_arg( + &mut self, + name: &str, + calc_domain: F, + func: G, + ) where + F: Fn(&FunctionContext, &I1::Domain, &I2::Domain) -> FunctionDomain + + 'static + + Clone + + Copy + + Send + + Sync, + G: Fn(I1::ScalarRef<'_>, I2::ScalarRef<'_>, &mut EvalContext) -> bool + + 'static + + Clone + + Copy + + Send + + Sync, + for<'a> I1::ScalarRef<'a>: Copy, + for<'a> I2::ScalarRef<'a>: Copy, + { + self.register_passthrough_nullable_2_arg::( + name, + calc_domain, + vectorize_cmp_2_arg(func), + ) + } +} + +pub fn vectorize_cmp_2_arg( + func: impl Fn(I1::ScalarRef<'_>, I2::ScalarRef<'_>, &mut EvalContext) -> bool + Copy + Send + Sync, +) -> impl Fn(Value, Value, &mut EvalContext) -> Value + Copy + Send + Sync +where + for<'a> I1::ScalarRef<'a>: Copy, + for<'a> I2::ScalarRef<'a>: Copy, +{ + move |arg1, arg2, ctx| match (arg1, arg2) { + (Value::Scalar(arg1), Value::Scalar(arg2)) => Value::Scalar(func( + I1::to_scalar_ref(&arg1), + I2::to_scalar_ref(&arg2), + ctx, + )), + (Value::Column(arg1), Value::Scalar(arg2)) => { + let arg2 = I2::to_scalar_ref(&arg2); + let col = Bitmap::collect_bool(ctx.num_rows, |idx| { + let arg1 = unsafe { I1::index_column_unchecked(&arg1, idx) }; + func(arg1, arg2, ctx) + }); + + Value::Column(col) + } + (Value::Scalar(arg1), Value::Column(arg2)) => { + let arg1 = I1::to_scalar_ref(&arg1); + let col = Bitmap::collect_bool(ctx.num_rows, |idx| { + let arg2 = unsafe { I2::index_column_unchecked(&arg2, idx) }; + func(arg1, arg2, ctx) + }); + + Value::Column(col) + } + (Value::Column(arg1), Value::Column(arg2)) => { + let col = Bitmap::collect_bool(ctx.num_rows, |idx| { + let arg1 = unsafe { I1::index_column_unchecked(&arg1, idx) }; + let arg2 = unsafe { I2::index_column_unchecked(&arg2, idx) }; + func(arg1, arg2, ctx) + }); + Value::Column(col) + } + } +} diff --git a/src/query/functions/src/aggregates/adaptors/aggregate_null_unary_adaptor.rs b/src/query/functions/src/aggregates/adaptors/aggregate_null_unary_adaptor.rs index 7afa226ca77b7..90977e2c1f466 100644 --- a/src/query/functions/src/aggregates/adaptors/aggregate_null_unary_adaptor.rs +++ b/src/query/functions/src/aggregates/adaptors/aggregate_null_unary_adaptor.rs @@ -117,6 +117,8 @@ impl AggregateFunction for AggregateNullUnaryAdapto let validity = column_merge_validity(col, validity.cloned()); let not_null_column = &[col.remove_nullable()]; let not_null_column = not_null_column.into(); + let validity = Bitmap::map_all_sets_to_none(validity); + self.nested .accumulate(place, not_null_column, validity.as_ref(), input_rows)?; diff --git a/src/query/functions/src/aggregates/aggregate_min_max_any.rs b/src/query/functions/src/aggregates/aggregate_min_max_any.rs index 01ecef3cf8941..ca7396fc86297 100644 --- a/src/query/functions/src/aggregates/aggregate_min_max_any.rs +++ b/src/query/functions/src/aggregates/aggregate_min_max_any.rs @@ -15,6 +15,7 @@ use std::marker::PhantomData; use std::sync::Arc; +use boolean::TrueIdxIter; use borsh::BorshDeserialize; use borsh::BorshSerialize; use databend_common_exception::ErrorCode; @@ -25,6 +26,7 @@ use databend_common_expression::types::Bitmap; use databend_common_expression::types::*; use databend_common_expression::with_number_mapped_type; use databend_common_expression::Scalar; +use databend_common_expression::SELECTIVITY_THRESHOLD; use ethnum::i256; use super::aggregate_function_factory::AggregateFunctionDescription; @@ -213,19 +215,27 @@ where } let column_iter = T::iter_column(&other); - if let Some(validity) = validity { - if validity.null_count() == column_len { - return Ok(()); - } - for (data, valid) in column_iter.zip(validity.iter()) { - if valid { - let _ = self.add(data, function_data); + if let Some(v) = validity { + if v.true_count() as f64 / v.len() as f64 >= SELECTIVITY_THRESHOLD { + let value = column_iter + .zip(v.iter()) + .filter(|(_, v)| *v) + .map(|(v, _)| v) + .reduce(|l, r| if !C::change_if(&l, &r) { l } else { r }); + + if let Some(value) = value { + self.add(value, function_data)?; } - } + } else { + for idx in TrueIdxIter::new(v.len(), Some(v)) { + let v = unsafe { T::index_column_unchecked(&other, idx) }; + self.add(v, function_data)?; + } + }; } else { let v = column_iter.reduce(|l, r| if !C::change_if(&l, &r) { l } else { r }); if let Some(v) = v { - let _ = self.add(v, function_data); + self.add(v, function_data)?; } } Ok(()) diff --git a/src/query/functions/src/aggregates/aggregate_sum.rs b/src/query/functions/src/aggregates/aggregate_sum.rs index 2d3eee38d7b97..7f39a503c956e 100644 --- a/src/query/functions/src/aggregates/aggregate_sum.rs +++ b/src/query/functions/src/aggregates/aggregate_sum.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use boolean::TrueIdxIter; use borsh::BorshDeserialize; use borsh::BorshSerialize; use databend_common_exception::ErrorCode; @@ -28,6 +29,7 @@ use databend_common_expression::Column; use databend_common_expression::ColumnBuilder; use databend_common_expression::Scalar; use databend_common_expression::StateAddr; +use databend_common_expression::SELECTIVITY_THRESHOLD; use num_traits::AsPrimitive; use super::assert_unary_arguments; @@ -89,14 +91,19 @@ where TSum: Number + std::ops::AddAssign, { match validity { - Some(v) if v.null_count() > 0 => { + Some(v) => { let mut sum = TSum::default(); - inner.iter().zip(v.iter()).for_each(|(t, b)| { - if b { - sum += t.as_(); - } - }); - + if v.true_count() as f64 / v.len() as f64 >= SELECTIVITY_THRESHOLD { + inner.iter().zip(v.iter()).for_each(|(t, b)| { + if b { + sum += t.as_(); + } + }); + } else { + TrueIdxIter::new(v.len(), Some(v)).for_each(|idx| { + sum += unsafe { inner.get_unchecked(idx).as_() }; + }); + } sum } _ => { diff --git a/src/query/functions/src/scalars/comparison.rs b/src/query/functions/src/scalars/comparison.rs index 8d3bc381489ab..7a5ee1fa771eb 100644 --- a/src/query/functions/src/scalars/comparison.rs +++ b/src/query/functions/src/scalars/comparison.rs @@ -22,6 +22,7 @@ use databend_common_expression::types::string::StringDomain; use databend_common_expression::types::AnyType; use databend_common_expression::types::ArgType; use databend_common_expression::types::ArrayType; +use databend_common_expression::types::Bitmap; use databend_common_expression::types::BooleanType; use databend_common_expression::types::DataType; use databend_common_expression::types::DateType; @@ -79,42 +80,42 @@ const ALL_FALSE_DOMAIN: BooleanDomain = BooleanDomain { }; fn register_variant_cmp(registry: &mut FunctionRegistry) { - registry.register_2_arg::( + registry.register_comparison_2_arg::( "eq", |_, _, _| FunctionDomain::Full, |lhs, rhs, _| { jsonb::compare(lhs, rhs).expect("unable to parse jsonb value") == Ordering::Equal }, ); - registry.register_2_arg::( + registry.register_comparison_2_arg::( "noteq", |_, _, _| FunctionDomain::Full, |lhs, rhs, _| { jsonb::compare(lhs, rhs).expect("unable to parse jsonb value") != Ordering::Equal }, ); - registry.register_2_arg::( + registry.register_comparison_2_arg::( "gt", |_, _, _| FunctionDomain::Full, |lhs, rhs, _| { jsonb::compare(lhs, rhs).expect("unable to parse jsonb value") == Ordering::Greater }, ); - registry.register_2_arg::( + registry.register_comparison_2_arg::( "gte", |_, _, _| FunctionDomain::Full, |lhs, rhs, _| { jsonb::compare(lhs, rhs).expect("unable to parse jsonb value") != Ordering::Less }, ); - registry.register_2_arg::( + registry.register_comparison_2_arg::( "lt", |_, _, _| FunctionDomain::Full, |lhs, rhs, _| { jsonb::compare(lhs, rhs).expect("unable to parse jsonb value") == Ordering::Less }, ); - registry.register_2_arg::( + registry.register_comparison_2_arg::( "lte", |_, _, _| FunctionDomain::Full, |lhs, rhs, _| { @@ -125,32 +126,32 @@ fn register_variant_cmp(registry: &mut FunctionRegistry) { macro_rules! register_simple_domain_type_cmp { ($registry:ident, $T:ty) => { - $registry.register_2_arg::<$T, $T, BooleanType, _, _>( + $registry.register_comparison_2_arg::<$T, $T, _, _>( "eq", |_, d1, d2| d1.domain_eq(d2), |lhs, rhs, _| lhs == rhs, ); - $registry.register_2_arg::<$T, $T, BooleanType, _, _>( + $registry.register_comparison_2_arg::<$T, $T, _, _>( "noteq", |_, d1, d2| d1.domain_noteq(d2), |lhs, rhs, _| lhs != rhs, ); - $registry.register_2_arg::<$T, $T, BooleanType, _, _>( + $registry.register_comparison_2_arg::<$T, $T, _, _>( "gt", |_, d1, d2| d1.domain_gt(d2), |lhs, rhs, _| lhs > rhs, ); - $registry.register_2_arg::<$T, $T, BooleanType, _, _>( + $registry.register_comparison_2_arg::<$T, $T, _, _>( "gte", |_, d1, d2| d1.domain_gte(d2), |lhs, rhs, _| lhs >= rhs, ); - $registry.register_2_arg::<$T, $T, BooleanType, _, _>( + $registry.register_comparison_2_arg::<$T, $T, _, _>( "lt", |_, d1, d2| d1.domain_lt(d2), |lhs, rhs, _| lhs < rhs, ); - $registry.register_2_arg::<$T, $T, BooleanType, _, _>( + $registry.register_comparison_2_arg::<$T, $T, _, _>( "lte", |_, d1, d2| d1.domain_lte(d2), |lhs, rhs, _| lhs <= rhs, @@ -194,28 +195,25 @@ fn register_string_cmp(registry: &mut FunctionRegistry) { fn vectorize_string_cmp( func: impl Fn(Ordering) -> bool + Copy, ) -> impl Fn(Value, Value, &mut EvalContext) -> Value + Copy { - move |arg1, arg2, _ctx| match (arg1, arg2) { + move |arg1, arg2, ctx| match (arg1, arg2) { (Value::Scalar(arg1), Value::Scalar(arg2)) => Value::Scalar(func(arg1.cmp(&arg2))), (Value::Column(arg1), Value::Scalar(arg2)) => { - let mut builder = MutableBitmap::with_capacity(arg1.len()); - for i in 0..arg1.len() { - builder.push(func(StringColumn::compare_str(&arg1, i, &arg2))); - } - Value::Column(builder.into()) + let col = Bitmap::collect_bool(ctx.num_rows, |i| { + func(StringColumn::compare_str(&arg1, i, &arg2)) + }); + Value::Column(col) } (Value::Scalar(arg1), Value::Column(arg2)) => { - let mut builder = MutableBitmap::with_capacity(arg1.len()); - for i in 0..arg2.len() { - builder.push(func(StringColumn::compare_str(&arg2, i, &arg1).reverse())); - } - Value::Column(builder.into()) + let col = Bitmap::collect_bool(ctx.num_rows, |i| { + func(StringColumn::compare_str(&arg2, i, &arg1).reverse()) + }); + Value::Column(col) } (Value::Column(arg1), Value::Column(arg2)) => { - let mut builder = MutableBitmap::with_capacity(arg1.len()); - for i in 0..arg1.len() { - builder.push(func(StringColumn::compare(&arg1, i, &arg2, i))); - } - Value::Column(builder.into()) + let col = Bitmap::collect_bool(ctx.num_rows, |i| { + func(StringColumn::compare(&arg1, i, &arg2, i)) + }); + Value::Column(col) } } } @@ -229,7 +227,7 @@ fn register_timestamp_cmp(registry: &mut FunctionRegistry) { } fn register_boolean_cmp(registry: &mut FunctionRegistry) { - registry.register_2_arg::( + registry.register_comparison_2_arg::( "eq", |_, d1, d2| match (d1.has_true, d1.has_false, d2.has_true, d2.has_false) { (true, false, true, false) => FunctionDomain::Domain(ALL_TRUE_DOMAIN), @@ -240,7 +238,7 @@ fn register_boolean_cmp(registry: &mut FunctionRegistry) { }, |lhs, rhs, _| lhs == rhs, ); - registry.register_2_arg::( + registry.register_comparison_2_arg::( "noteq", |_, d1, d2| match (d1.has_true, d1.has_false, d2.has_true, d2.has_false) { (true, false, true, false) => FunctionDomain::Domain(ALL_FALSE_DOMAIN), @@ -251,7 +249,7 @@ fn register_boolean_cmp(registry: &mut FunctionRegistry) { }, |lhs, rhs, _| lhs != rhs, ); - registry.register_2_arg::( + registry.register_comparison_2_arg::( "gt", |_, d1, d2| match (d1.has_true, d1.has_false, d2.has_true, d2.has_false) { (true, false, false, true) => FunctionDomain::Domain(ALL_TRUE_DOMAIN), @@ -260,7 +258,7 @@ fn register_boolean_cmp(registry: &mut FunctionRegistry) { }, |lhs, rhs, _| lhs & !rhs, ); - registry.register_2_arg::( + registry.register_comparison_2_arg::( "gte", |_, d1, d2| match (d1.has_true, d1.has_false, d2.has_true, d2.has_false) { (true, false, _, _) => FunctionDomain::Domain(ALL_TRUE_DOMAIN), @@ -270,7 +268,7 @@ fn register_boolean_cmp(registry: &mut FunctionRegistry) { }, |lhs, rhs, _| lhs | !rhs, ); - registry.register_2_arg::( + registry.register_comparison_2_arg::( "lt", |_, d1, d2| match (d1.has_true, d1.has_false, d2.has_true, d2.has_false) { (false, true, true, false) => FunctionDomain::Domain(ALL_TRUE_DOMAIN), @@ -279,7 +277,7 @@ fn register_boolean_cmp(registry: &mut FunctionRegistry) { }, |lhs, rhs, _| !lhs & rhs, ); - registry.register_2_arg::( + registry.register_comparison_2_arg::( "lte", |_, d1, d2| match (d1.has_true, d1.has_false, d2.has_true, d2.has_false) { (false, true, _, _) => FunctionDomain::Domain(ALL_TRUE_DOMAIN), diff --git a/src/query/functions/src/scalars/decimal/comparison.rs b/src/query/functions/src/scalars/decimal/comparison.rs index bbd1e793b0632..13f3356e29eaa 100644 --- a/src/query/functions/src/scalars/decimal/comparison.rs +++ b/src/query/functions/src/scalars/decimal/comparison.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use databend_common_expression::types::decimal::*; use databend_common_expression::types::*; -use databend_common_expression::vectorize_2_arg; +use databend_common_expression::vectorize_cmp_2_arg; use databend_common_expression::Domain; use databend_common_expression::EvalContext; use databend_common_expression::Function; @@ -290,7 +290,7 @@ where { let a = a.try_downcast().unwrap(); let b = b.try_downcast().unwrap(); - let value = vectorize_2_arg::, DecimalType, BooleanType>(f)(a, b, ctx); + let value = vectorize_cmp_2_arg::, DecimalType>(f)(a, b, ctx); value.upcast() } diff --git a/tests/sqllogictests/suites/duckdb/join/inner/test_join.test b/tests/sqllogictests/suites/duckdb/join/inner/test_join.test index 078a4afeac2fa..dd67ab09b300e 100644 --- a/tests/sqllogictests/suites/duckdb/join/inner/test_join.test +++ b/tests/sqllogictests/suites/duckdb/join/inner/test_join.test @@ -50,7 +50,7 @@ SELECT a, test.b, c FROM test INNER JOIN test2 ON test2.b = test.b ORDER BY c; 11 1 20 12 2 30 -# explicit join with additional condition that is no left-right comparision +# explicit join with additional condition that is no left-right comparison query III SELECT a, test.b, c FROM test INNER JOIN test2 ON test2.b = test.b and test.b = 2; ---- @@ -64,7 +64,7 @@ SELECT a, test.b, c FROM test INNER JOIN test2 ON test2.b = test.b and 2 = 2 ORD 11 1 20 12 2 30 -# explicit join with only condition that is no left-right comparision +# explicit join with only condition that is no left-right comparison query III SELECT a, test.b, c FROM test INNER JOIN test2 ON test.b = 2 ORDER BY c; ---- diff --git a/tests/sqllogictests/suites/duckdb/join_small_block/inner/test_join.test b/tests/sqllogictests/suites/duckdb/join_small_block/inner/test_join.test index 33e883b1018d6..60dc3c09dc73a 100644 --- a/tests/sqllogictests/suites/duckdb/join_small_block/inner/test_join.test +++ b/tests/sqllogictests/suites/duckdb/join_small_block/inner/test_join.test @@ -53,7 +53,7 @@ SELECT a, test.b, c FROM test INNER JOIN test2 ON test2.b = test.b ORDER BY c; 11 1 20 12 2 30 -# explicit join with additional condition that is no left-right comparision +# explicit join with additional condition that is no left-right comparison query III SELECT a, test.b, c FROM test INNER JOIN test2 ON test2.b = test.b and test.b = 2; ---- @@ -67,7 +67,7 @@ SELECT a, test.b, c FROM test INNER JOIN test2 ON test2.b = test.b and 2 = 2 ORD 11 1 20 12 2 30 -# explicit join with only condition that is no left-right comparision +# explicit join with only condition that is no left-right comparison query III SELECT a, test.b, c FROM test INNER JOIN test2 ON test.b = 2 ORDER BY c; ----