From 1a396e9598737755c08e183750d426cc77e04d80 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Mon, 27 Jan 2025 14:58:56 +0100 Subject: [PATCH 1/4] moved self_leq to the is_less_equal callable --- include/eve/arch/cpu/wide.hpp | 14 +- .../detail/function/simd/arm/neon/friends.hpp | 33 ----- .../detail/function/simd/arm/sve/friends.hpp | 5 - .../detail/function/simd/common/friends.hpp | 13 -- .../eve/detail/function/simd/ppc/friends.hpp | 10 -- .../detail/function/simd/riscv/friends.hpp | 31 ---- .../eve/detail/function/simd/x86/friends.hpp | 51 ------- .../eve/module/core/regular/impl/is_less.hpp | 2 +- .../core/regular/impl/is_less_equal.hpp | 36 +++++ .../regular/impl/simd/arm/neon/is_less.hpp | 2 +- .../impl/simd/arm/neon/is_less_equal.hpp | 55 +++++++ .../impl/simd/arm/sve/is_less_equal.hpp | 22 +++ .../regular/impl/simd/ppc/is_less_equal.hpp | 25 ++++ .../regular/impl/simd/riscv/is_less_equal.hpp | 32 ++++ .../regular/impl/simd/x86/is_less_equal.hpp | 140 ++++++++++++------ include/eve/module/core/regular/is_less.hpp | 1 - .../eve/module/core/regular/is_less_equal.hpp | 70 ++++----- 17 files changed, 307 insertions(+), 235 deletions(-) create mode 100644 include/eve/module/core/regular/impl/is_less_equal.hpp create mode 100644 include/eve/module/core/regular/impl/simd/arm/neon/is_less_equal.hpp create mode 100644 include/eve/module/core/regular/impl/simd/arm/sve/is_less_equal.hpp create mode 100644 include/eve/module/core/regular/impl/simd/ppc/is_less_equal.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/is_less_equal.hpp diff --git a/include/eve/arch/cpu/wide.hpp b/include/eve/arch/cpu/wide.hpp index 9cf0c59de7..ddd9404297 100644 --- a/include/eve/arch/cpu/wide.hpp +++ b/include/eve/arch/cpu/wide.hpp @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include #include @@ -953,32 +955,32 @@ namespace eve } //! @brief Element-wise less-or-equal comparison between eve::wide - friend EVE_FORCEINLINE auto operator<=(wide v, wide w) noexcept + friend EVE_FORCEINLINE auto operator<=(wide a, wide b) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return detail::self_leq(v, w); + return is_less_equal(a, b); } //! @brief Element-wise less-or-equal comparison between a eve::wide and a scalar template - friend EVE_FORCEINLINE auto operator<=(wide v, S w) noexcept + friend EVE_FORCEINLINE auto operator<=(wide w, S s) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return v <= wide {w}; + return is_less_equal(w, s); } //! @brief Element-wise less-or-equal comparison between a scalar and a eve::wide template - friend EVE_FORCEINLINE auto operator<=(S v, wide w) noexcept + friend EVE_FORCEINLINE auto operator<=(S s, wide w) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return wide {v} <= w; + return is_less_equal(s, w); } //! Computes the logical negation of its parameter diff --git a/include/eve/detail/function/simd/arm/neon/friends.hpp b/include/eve/detail/function/simd/arm/neon/friends.hpp index f4ed7ee2a7..84e6eeecd7 100644 --- a/include/eve/detail/function/simd/arm/neon/friends.hpp +++ b/include/eve/detail/function/simd/arm/neon/friends.hpp @@ -94,37 +94,4 @@ namespace eve::detail else if constexpr( sizeof(T) == 8 ) return map([](E const& e, E const& f){ return as_logical_t(e >= f); }, v, w); } - - template - EVE_FORCEINLINE logical> self_leq(wide v,wide w) noexcept - requires arm_abi> - { - constexpr auto cat = categorize>(); - - if constexpr( cat == category::int32x4 ) return vcleq_s32(v, w); - else if constexpr( cat == category::int16x8 ) return vcleq_s16(v, w); - else if constexpr( cat == category::int8x16 ) return vcleq_s8(v, w); - else if constexpr( cat == category::uint32x4 ) return vcleq_u32(v, w); - else if constexpr( cat == category::uint16x8 ) return vcleq_u16(v, w); - else if constexpr( cat == category::uint8x16 ) return vcleq_u8(v, w); - else if constexpr( cat == category::float32x4) return vcleq_f32(v, w); - else if constexpr( cat == category::int32x2 ) return vcle_s32(v, w); - else if constexpr( cat == category::int16x4 ) return vcle_s16(v, w); - else if constexpr( cat == category::int8x8 ) return vcle_s8(v, w); - else if constexpr( cat == category::uint32x2 ) return vcle_u32(v, w); - else if constexpr( cat == category::uint16x4 ) return vcle_u16(v, w); - else if constexpr( cat == category::uint8x8 ) return vcle_u8(v, w); - else if constexpr( cat == category::float32x2) return vcle_f32(v, w); - else if constexpr( current_api >= asimd) - { - if constexpr( cat == category::float64x1) return vcle_f64(v, w); - else if constexpr( cat == category::int64x1) return vcle_s64(v, w); - else if constexpr( cat == category::uint64x1) return vcle_u64(v, w); - else if constexpr( cat == category::float64x2) return vcleq_f64(v, w); - else if constexpr( cat == category::int64x2) return vcleq_s64(v, w); - else if constexpr( cat == category::uint64x2) return vcleq_u64(v, w); - } - else if constexpr( sizeof(T) == 8 ) - return map([](E const& e, E const& f){ return as_logical_t(e <= f); }, v, w); - } } diff --git a/include/eve/detail/function/simd/arm/sve/friends.hpp b/include/eve/detail/function/simd/arm/sve/friends.hpp index 19d2c2ed28..7661a394fd 100644 --- a/include/eve/detail/function/simd/arm/sve/friends.hpp +++ b/include/eve/detail/function/simd/arm/sve/friends.hpp @@ -22,11 +22,6 @@ EVE_FORCEINLINE auto self_neq(wide v, wide w) noexcept -> as_logical_t> requires sve_abi> { return svcmpne(sve_true(), v, w); } -template -EVE_FORCEINLINE auto -self_leq(wide v, wide w) noexcept -> as_logical_t> -requires sve_abi> { return svcmple(sve_true(), v, w); } - template EVE_FORCEINLINE auto self_geq(wide v, wide w) noexcept -> as_logical_t> diff --git a/include/eve/detail/function/simd/common/friends.hpp b/include/eve/detail/function/simd/common/friends.hpp index 0396a6cbfa..fc95c6be1d 100644 --- a/include/eve/detail/function/simd/common/friends.hpp +++ b/include/eve/detail/function/simd/common/friends.hpp @@ -99,19 +99,6 @@ namespace eve::detail //================================================================================================ // Ordering operators - template - EVE_FORCEINLINE auto self_leq(Wide const& v,Wide const& w) noexcept - { - if constexpr( product_type ) - { - return convert(kumi::to_tuple(v) <= kumi::to_tuple(w), as_element>()); - } - else - { - constexpr auto ge = [](E const& e, E const& f) { return as_logical_t(e <= f); }; - return apply_over(ge, v, w); - } - } template EVE_FORCEINLINE auto self_geq(Wide const& v,Wide const& w) noexcept diff --git a/include/eve/detail/function/simd/ppc/friends.hpp b/include/eve/detail/function/simd/ppc/friends.hpp index f0f76ad0cf..ce1a0ea290 100644 --- a/include/eve/detail/function/simd/ppc/friends.hpp +++ b/include/eve/detail/function/simd/ppc/friends.hpp @@ -37,14 +37,4 @@ namespace eve::detail else return !(v < w); } - - template - EVE_FORCEINLINE auto self_leq(wide const &v, wide const &w) noexcept - requires ppc_abi> - { - if constexpr(std::is_floating_point_v) - return logical>(vec_cmple(v.storage(), w.storage())); - else - return !(v > w); - } } diff --git a/include/eve/detail/function/simd/riscv/friends.hpp b/include/eve/detail/function/simd/riscv/friends.hpp index c77dad9219..1e6c7820fb 100644 --- a/include/eve/detail/function/simd/riscv/friends.hpp +++ b/include/eve/detail/function/simd/riscv/friends.hpp @@ -44,37 +44,6 @@ requires rvv_abi> return self_geq_impl(lhs, rhs); } -template -EVE_FORCEINLINE auto -self_leq_impl(wide lhs, U rhs) noexcept -> logical> -requires rvv_abi> && (std::same_as, U> || scalar_value) -{ - if constexpr( scalar_value && !std::same_as ) return self_leq(lhs, static_cast(rhs)); - else - { - constexpr auto c = categorize>(); - if constexpr( match(c, category::int_) ) return __riscv_vmsle(lhs, rhs, N::value); - else if constexpr( match(c, category::uint_) ) return __riscv_vmsleu(lhs, rhs, N::value); - else if constexpr( match(c, category::float_) ) return __riscv_vmfle(lhs, rhs, N::value); - } -} - -template -EVE_FORCEINLINE auto -self_leq(wide lhs, wide rhs) noexcept -> logical> -requires rvv_abi> -{ - return self_leq_impl(lhs, rhs); -} - -template -EVE_FORCEINLINE auto -self_leq(wide lhs, std::convertible_to auto rhs) noexcept -> logical> -requires rvv_abi> -{ - return self_leq_impl(lhs, rhs); -} - template EVE_FORCEINLINE auto self_eq_impl(wide lhs, U rhs) noexcept -> logical> diff --git a/include/eve/detail/function/simd/x86/friends.hpp b/include/eve/detail/function/simd/x86/friends.hpp index fe18f49008..6a3de8915d 100644 --- a/include/eve/detail/function/simd/x86/friends.hpp +++ b/include/eve/detail/function/simd/x86/friends.hpp @@ -229,55 +229,4 @@ EVE_FORCEINLINE as_logical_t> else return !(v < w); } } - -//================================================================================================ -template -EVE_FORCEINLINE as_logical_t> - self_leq(wide v, wide w) noexcept requires x86_abi> -{ - constexpr auto c = categorize>(); - constexpr auto f = to_integer(cmp_flt::le_oq); - - if constexpr( current_api >= avx512 ) - { - if constexpr( c == category::float32x16 ) return mask16 {_mm512_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float32x8 ) return mask8 {_mm256_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float32x4 ) return mask8 {_mm_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float64x8 ) return mask8 {_mm512_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::float64x4 ) return mask8 {_mm256_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::float64x2 ) return mask8 {_mm_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::uint64x8 ) return mask8 {_mm512_cmple_epu64_mask(v, w)}; - else if constexpr( c == category::uint64x4 ) return mask8 {_mm256_cmple_epu64_mask(v, w)}; - else if constexpr( c == category::uint64x2 ) return mask8 {_mm_cmple_epu64_mask(v, w)}; - else if constexpr( c == category::uint32x16 ) return mask16 {_mm512_cmple_epu32_mask(v, w)}; - else if constexpr( c == category::uint32x8 ) return mask8 {_mm256_cmple_epu32_mask(v, w)}; - else if constexpr( c == category::uint32x4 ) return mask8 {_mm_cmple_epu32_mask(v, w)}; - else if constexpr( c == category::uint16x32 ) return mask32 {_mm512_cmple_epu16_mask(v, w)}; - else if constexpr( c == category::uint16x16 ) return mask16 {_mm256_cmple_epu16_mask(v, w)}; - else if constexpr( c == category::uint16x8 ) return mask8 {_mm_cmple_epu16_mask(v, w)}; - else if constexpr( c == category::uint8x64 ) return mask64 {_mm512_cmple_epu8_mask(v, w)}; - else if constexpr( c == category::uint8x32 ) return mask32 {_mm256_cmple_epu8_mask(v, w)}; - else if constexpr( c == category::uint8x16 ) return mask16 {_mm_cmple_epu8_mask(v, w)}; - else if constexpr( c == category::int64x8 ) return mask8 {_mm512_cmple_epi64_mask(v, w)}; - else if constexpr( c == category::int64x4 ) return mask8 {_mm256_cmple_epi64_mask(v, w)}; - else if constexpr( c == category::int64x2 ) return mask8 {_mm_cmple_epi64_mask(v, w)}; - else if constexpr( c == category::int32x16 ) return mask16 {_mm512_cmple_epi32_mask(v, w)}; - else if constexpr( c == category::int32x8 ) return mask8 {_mm256_cmple_epi32_mask(v, w)}; - else if constexpr( c == category::int32x4 ) return mask8 {_mm_cmple_epi32_mask(v, w)}; - else if constexpr( c == category::int16x32 ) return mask32 {_mm512_cmple_epi16_mask(v, w)}; - else if constexpr( c == category::int16x16 ) return mask16 {_mm256_cmple_epi16_mask(v, w)}; - else if constexpr( c == category::int16x8 ) return mask8 {_mm_cmple_epi16_mask(v, w)}; - else if constexpr( c == category::int8x64 ) return mask64 {_mm512_cmple_epi8_mask(v, w)}; - else if constexpr( c == category::int8x32 ) return mask32 {_mm256_cmple_epi8_mask(v, w)}; - else if constexpr( c == category::int8x16 ) return mask16 {_mm_cmple_epi8_mask(v, w)}; - } - else - { - if constexpr( c == category::float32x8 ) return _mm256_cmp_ps(v, w, f); - else if constexpr( c == category::float64x4 ) return _mm256_cmp_pd(v, w, f); - else if constexpr( c == category::float32x4 ) return _mm_cmple_ps(v, w); - else if constexpr( c == category::float64x2 ) return _mm_cmple_pd(v, w); - else return !(v > w); - } -} } diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp index cfb3c49261..6d5d77de69 100644 --- a/include/eve/module/core/regular/impl/is_less.hpp +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -7,10 +7,10 @@ //================================================================================================== #pragma once -#include #include #include #include +#include namespace eve::detail { diff --git a/include/eve/module/core/regular/impl/is_less_equal.hpp b/include/eve/module/core/regular/impl/is_less_equal.hpp new file mode 100644 index 0000000000..2b0f8acc6f --- /dev/null +++ b/include/eve/module/core/regular/impl/is_less_equal.hpp @@ -0,0 +1,36 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE constexpr as_logical_t is_less_equal_(EVE_REQUIRES(cpu_), O const& o, T a, T b) noexcept + { + if constexpr (O::contains(almost)) + { + auto tol = o[almost].value(T{}); + if constexpr(integral_value) return a <= eve::next(b, tol); + else return a <= fam(b, tol, eve::max(eve::abs(a), eve::abs(b))); + } + else if constexpr (product_type) + { + return kumi::to_tuple(a) <= kumi::to_tuple(b); + } + else + { + if constexpr (scalar_value) return as_logical_t(a <= b); + else return map([](auto e, auto f) { return e <= f; }, a, b); + } + } +} diff --git a/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp b/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp index e374b0e600..51d8ead572 100644 --- a/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp @@ -49,7 +49,7 @@ namespace eve::detail else if constexpr (cat == category::int64x2) return vcltq_s64(a, b); else if constexpr (cat == category::uint64x2) return vcltq_u64(a, b); } - else return map(is_less, a, b); + else return map([](auto e, auto f){ return e < f; }, a, b); } } } diff --git a/include/eve/module/core/regular/impl/simd/arm/neon/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/arm/neon/is_less_equal.hpp new file mode 100644 index 0000000000..4800e6f4fd --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/arm/neon/is_less_equal.hpp @@ -0,0 +1,55 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_equal_(EVE_REQUIRES(neon128_), O const& opts, wide a, wide b) noexcept + requires arm_abi> + { + if constexpr (O::contains(almost)) + { + return is_less_equal.behavior(cpu_{}, opts, a, b); + } + else + { + constexpr auto cat = categorize>(); + + if constexpr (cat == category::int32x4) return vcleq_s32(a, b); + else if constexpr (cat == category::int16x8) return vcleq_s16(a, b); + else if constexpr (cat == category::int8x16) return vcleq_s8(a, b); + else if constexpr (cat == category::uint32x4) return vcleq_u32(a, b); + else if constexpr (cat == category::uint16x8) return vcleq_u16(a, b); + else if constexpr (cat == category::uint8x16) return vcleq_u8(a, b); + else if constexpr (cat == category::float32x4) return vcleq_f32(a, b); + else if constexpr (cat == category::int32x2) return vcle_s32(a, b); + else if constexpr (cat == category::int16x4) return vcle_s16(a, b); + else if constexpr (cat == category::int8x8) return vcle_s8(a, b); + else if constexpr (cat == category::uint32x2) return vcle_u32(a, b); + else if constexpr (cat == category::uint16x4) return vcle_u16(a, b); + else if constexpr (cat == category::uint8x8) return vcle_u8(a, b); + else if constexpr (cat == category::float32x2) return vcle_f32(a, b); + else if constexpr (current_api >= asimd) + { + if constexpr (cat == category::float64x1) return vcle_f64(a, b); + else if constexpr (cat == category::int64x1) return vcle_s64(a, b); + else if constexpr (cat == category::uint64x1) return vcle_u64(a, b); + else if constexpr (cat == category::float64x2) return vcleq_f64(a, b); + else if constexpr (cat == category::int64x2) return vcleq_s64(a, b); + else if constexpr (cat == category::uint64x2) return vcleq_u64(a, b); + } + else return map([](auto e, auto f){ return e <= f; }, a, b); + } + } +} diff --git a/include/eve/module/core/regular/impl/simd/arm/sve/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/arm/sve/is_less_equal.hpp new file mode 100644 index 0000000000..c89c98fe89 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/arm/sve/is_less_equal.hpp @@ -0,0 +1,22 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_equal_(EVE_REQUIRES(sve_), O const& opts, wide a, wide b) noexcept + requires sve_abi> + { + if constexpr (O::contains(almost)) return is_less_equal.behavior(cpu_{}, opts, a, b); + else return svcmple(sve_true(), a, b); + } +} diff --git a/include/eve/module/core/regular/impl/simd/ppc/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/ppc/is_less_equal.hpp new file mode 100644 index 0000000000..29cb564f34 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/ppc/is_less_equal.hpp @@ -0,0 +1,25 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_equal_(EVE_REQUIRES(vmx_), O const& opts, wide a, wide b) noexcept + requires ppc_abi> + { + if constexpr (O::contains(almost)) return is_less_equal.behavior(cpu_{}, opts, a, b); + else if constexpr(std::is_floating_point_v) return logical>(vec_cmple(v.storage(), w.storage())); + else return !(v > w); + } +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/riscv/is_less_equal.hpp new file mode 100644 index 0000000000..64a710725d --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/is_less_equal.hpp @@ -0,0 +1,32 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_equal_(EVE_REQUIRES(rvv_), O const& opts, wide a, U b) noexcept + requires (rvv_abi> && same_element_type) + { + if constexpr (O::contains(almost)) + { + return is_less_equal.behavior(cpu_{}, opts, a, b); + } + else + { + constexpr auto c = categorize>(); + + if constexpr (match(c, category::int_)) return __riscv_vmsle(a, b, N::value); + else if constexpr (match(c, category::uint_)) return __riscv_vmsleu(a, b, N::value); + else if constexpr (match(c, category::float_)) return __riscv_vmfle(a, b, N::value); + } + } +} diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp index 73c874da67..c5c64fa844 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp @@ -19,57 +19,111 @@ namespace eve::detail { -// ----------------------------------------------------------------------------------------------- -// masked implementation - template - EVE_FORCEINLINE as_logical_t> is_less_equal_(EVE_REQUIRES(avx512_), - C const& mask, - O const& opts, - wide const& v, - wide const& w) noexcept requires x86_abi> + template + EVE_FORCEINLINE as_logical_t> is_less_equal_(EVE_REQUIRES(sse2_), C const& mask, O const& opts, wide a, wide b) noexcept + requires x86_abi> { - if constexpr( C::has_alternative || O::contains(almost) ) + if constexpr (O::contains(almost)) { - return is_less_equal.behavior(cpu_{}, opts, v, w); + return is_less_equal.behavior(cpu_{}, opts, a, b); } else { - auto const s = alternative(mask, v, as(to_logical(v))); - [[maybe_unused]] auto m = expand_mask(mask, as(v)).storage().value; + constexpr auto c = categorize>(); + constexpr auto f = to_integer(cmp_flt::le_oq); + + if constexpr (current_api >= avx512) + { + if constexpr (c == category::float32x16) return mask16 {_mm512_cmp_ps_mask(v, w, f)}; + else if constexpr (c == category::float32x8) return mask8 {_mm256_cmp_ps_mask(v, w, f)}; + else if constexpr (c == category::float32x4) return mask8 {_mm_cmp_ps_mask(v, w, f)}; + else if constexpr (c == category::float64x8) return mask8 {_mm512_cmp_pd_mask(v, w, f)}; + else if constexpr (c == category::float64x4) return mask8 {_mm256_cmp_pd_mask(v, w, f)}; + else if constexpr (c == category::float64x2) return mask8 {_mm_cmp_pd_mask(v, w, f)}; + else if constexpr (c == category::uint64x8) return mask8 {_mm512_cmple_epu64_mask(v, w)}; + else if constexpr (c == category::uint64x4) return mask8 {_mm256_cmple_epu64_mask(v, w)}; + else if constexpr (c == category::uint64x2) return mask8 {_mm_cmple_epu64_mask(v, w)}; + else if constexpr (c == category::uint32x16) return mask16 {_mm512_cmple_epu32_mask(v, w)}; + else if constexpr (c == category::uint32x8) return mask8 {_mm256_cmple_epu32_mask(v, w)}; + else if constexpr (c == category::uint32x4) return mask8 {_mm_cmple_epu32_mask(v, w)}; + else if constexpr (c == category::uint16x32) return mask32 {_mm512_cmple_epu16_mask(v, w)}; + else if constexpr (c == category::uint16x16) return mask16 {_mm256_cmple_epu16_mask(v, w)}; + else if constexpr (c == category::uint16x8) return mask8 {_mm_cmple_epu16_mask(v, w)}; + else if constexpr (c == category::uint8x64) return mask64 {_mm512_cmple_epu8_mask(v, w)}; + else if constexpr (c == category::uint8x32) return mask32 {_mm256_cmple_epu8_mask(v, w)}; + else if constexpr (c == category::uint8x16) return mask16 {_mm_cmple_epu8_mask(v, w)}; + else if constexpr (c == category::int64x8) return mask8 {_mm512_cmple_epi64_mask(v, w)}; + else if constexpr (c == category::int64x4) return mask8 {_mm256_cmple_epi64_mask(v, w)}; + else if constexpr (c == category::int64x2) return mask8 {_mm_cmple_epi64_mask(v, w)}; + else if constexpr (c == category::int32x16) return mask16 {_mm512_cmple_epi32_mask(v, w)}; + else if constexpr (c == category::int32x8) return mask8 {_mm256_cmple_epi32_mask(v, w)}; + else if constexpr (c == category::int32x4) return mask8 {_mm_cmple_epi32_mask(v, w)}; + else if constexpr (c == category::int16x32) return mask32 {_mm512_cmple_epi16_mask(v, w)}; + else if constexpr (c == category::int16x16) return mask16 {_mm256_cmple_epi16_mask(v, w)}; + else if constexpr (c == category::int16x8) return mask8 {_mm_cmple_epi16_mask(v, w)}; + else if constexpr (c == category::int8x64) return mask64 {_mm512_cmple_epi8_mask(v, w)}; + else if constexpr (c == category::int8x32) return mask32 {_mm256_cmple_epi8_mask(v, w)}; + else if constexpr (c == category::int8x16) return mask16 {_mm_cmple_epi8_mask(v, w)}; + } + else + { + if constexpr (c == category::float32x8) return _mm256_cmp_ps(v, w, f); + else if constexpr (c == category::float64x4) return _mm256_cmp_pd(v, w, f); + else if constexpr (c == category::float32x4) return _mm_cmple_ps(v, w); + else if constexpr (c == category::float64x2) return _mm_cmple_pd(v, w); + else return !(v > w); + } + } + } + + // ----------------------------------------------------------------------------------------------- + // masked implementation + template + EVE_FORCEINLINE as_logical_t> is_less_equal_(EVE_REQUIRES(avx512_), C const& mask, O const& opts, wide a, wide b) noexcept + requires x86_abi> + { + if constexpr (C::has_alternative || O::contains(almost)) + { + return is_less_equal.behavior(cpu_{}, opts, a, b); + } + else + { + auto const s = alternative(mask, a, as(to_logical(a))); + [[maybe_unused]] auto m = expand_mask(mask, as(a)).storage().value; constexpr auto c = categorize>(); constexpr auto f = to_integer(cmp_flt::le_oq); - if constexpr( C::is_complete ) return s; - else if constexpr( c == category::float32x16 ) return mask16 {_mm512_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x8 ) return mask8 {_mm512_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::float32x8 ) return mask8 {_mm256_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x4 ) return mask8 {_mm256_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::float32x4 ) return mask8 {_mm_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x2 ) return mask8 {_mm_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::int64x8 ) return mask8 {_mm512_mask_cmple_epi64_mask(m, v, w)}; - else if constexpr( c == category::int64x4 ) return mask8 {_mm256_mask_cmple_epi64_mask(m, v, w)}; - else if constexpr( c == category::int64x2 ) return mask8 {_mm_mask_cmple_epi64_mask(m, v, w)}; - else if constexpr( c == category::int32x16 ) return mask16 {_mm512_mask_cmple_epi32_mask(m, v, w)}; - else if constexpr( c == category::int32x8 ) return mask8 {_mm256_mask_cmple_epi32_mask(m, v, w)}; - else if constexpr( c == category::int32x4 ) return mask8 {_mm_mask_cmple_epi32_mask(m, v, w)}; - else if constexpr( c == category::int16x32 ) return mask32 {_mm512_mask_cmple_epi16_mask(m, v, w)}; - else if constexpr( c == category::int16x16 ) return mask16 {_mm256_mask_cmple_epi16_mask(m, v, w)}; - else if constexpr( c == category::int16x8 ) return mask8 {_mm_mask_cmple_epi16_mask(m, v, w)}; - else if constexpr( c == category::int8x64 ) return mask64 {_mm512_mask_cmple_epi8_mask(m, v, w)}; - else if constexpr( c == category::int8x32 ) return mask32 {_mm256_mask_cmple_epi8_mask(m, v, w)}; - else if constexpr( c == category::int8x16 ) return mask16 {_mm_mask_cmple_epi8_mask(m, v, w)}; - else if constexpr( c == category::uint64x8 ) return mask8 {_mm512_mask_cmple_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint64x4 ) return mask8 {_mm256_mask_cmple_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint64x2 ) return mask8 {_mm_mask_cmple_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint32x16 ) return mask16 {_mm512_mask_cmple_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint32x8 ) return mask8 {_mm256_mask_cmple_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint32x4 ) return mask8 {_mm_mask_cmple_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint16x32 ) return mask32 {_mm512_mask_cmple_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint16x16 ) return mask16 {_mm256_mask_cmple_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint16x8 ) return mask8 {_mm_mask_cmple_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint8x64 ) return mask64 {_mm512_mask_cmple_epu8_mask(m, v, w)}; - else if constexpr( c == category::uint8x32 ) return mask32 {_mm256_mask_cmple_epu8_mask(m, v, w)}; - else if constexpr( c == category::uint8x16 ) return mask16 {_mm_mask_cmple_epu8_mask(m, v, w)}; + if constexpr (C::is_complete) return s; + else if constexpr (c == category::float32x16) return mask16 {_mm512_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr (c == category::float64x8) return mask8 {_mm512_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr (c == category::float32x8) return mask8 {_mm256_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr (c == category::float64x4) return mask8 {_mm256_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr (c == category::float32x4) return mask8 {_mm_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr (c == category::float64x2) return mask8 {_mm_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr (c == category::int64x8) return mask8 {_mm512_mask_cmple_epi64_mask(m, a, b)}; + else if constexpr (c == category::int64x4) return mask8 {_mm256_mask_cmple_epi64_mask(m, a, b)}; + else if constexpr (c == category::int64x2) return mask8 {_mm_mask_cmple_epi64_mask(m, a, b)}; + else if constexpr (c == category::int32x16) return mask16 {_mm512_mask_cmple_epi32_mask(m, a, b)}; + else if constexpr (c == category::int32x8) return mask8 {_mm256_mask_cmple_epi32_mask(m, a, b)}; + else if constexpr (c == category::int32x4) return mask8 {_mm_mask_cmple_epi32_mask(m, a, b)}; + else if constexpr (c == category::int16x32) return mask32 {_mm512_mask_cmple_epi16_mask(m, a, b)}; + else if constexpr (c == category::int16x16) return mask16 {_mm256_mask_cmple_epi16_mask(m, a, b)}; + else if constexpr (c == category::int16x8) return mask8 {_mm_mask_cmple_epi16_mask(m, a, b)}; + else if constexpr (c == category::int8x64) return mask64 {_mm512_mask_cmple_epi8_mask(m, a, b)}; + else if constexpr (c == category::int8x32) return mask32 {_mm256_mask_cmple_epi8_mask(m, a, b)}; + else if constexpr (c == category::int8x16) return mask16 {_mm_mask_cmple_epi8_mask(m, a, b)}; + else if constexpr (c == category::uint64x8) return mask8 {_mm512_mask_cmple_epu64_mask(m, a, b)}; + else if constexpr (c == category::uint64x4) return mask8 {_mm256_mask_cmple_epu64_mask(m, a, b)}; + else if constexpr (c == category::uint64x2) return mask8 {_mm_mask_cmple_epu64_mask(m, a, b)}; + else if constexpr (c == category::uint32x16) return mask16 {_mm512_mask_cmple_epu32_mask(m, a, b)}; + else if constexpr (c == category::uint32x8) return mask8 {_mm256_mask_cmple_epu32_mask(m, a, b)}; + else if constexpr (c == category::uint32x4) return mask8 {_mm_mask_cmple_epu32_mask(m, a, b)}; + else if constexpr (c == category::uint16x32) return mask32 {_mm512_mask_cmple_epu16_mask(m, a, b)}; + else if constexpr (c == category::uint16x16) return mask16 {_mm256_mask_cmple_epu16_mask(m, a, b)}; + else if constexpr (c == category::uint16x8) return mask8 {_mm_mask_cmple_epu16_mask(m, a, b)}; + else if constexpr (c == category::uint8x64) return mask64 {_mm512_mask_cmple_epu8_mask(m, a, b)}; + else if constexpr (c == category::uint8x32) return mask32 {_mm256_mask_cmple_epu8_mask(m, a, b)}; + else if constexpr (c == category::uint8x16) return mask16 {_mm_mask_cmple_epu8_mask(m, a, b)}; } } } diff --git a/include/eve/module/core/regular/is_less.hpp b/include/eve/module/core/regular/is_less.hpp index 45df111b91..0a7b27295c 100644 --- a/include/eve/module/core/regular/is_less.hpp +++ b/include/eve/module/core/regular/is_less.hpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/include/eve/module/core/regular/is_less_equal.hpp b/include/eve/module/core/regular/is_less_equal.hpp index 5ca61df55c..cfe74ac7cb 100644 --- a/include/eve/module/core/regular/is_less_equal.hpp +++ b/include/eve/module/core/regular/is_less_equal.hpp @@ -9,21 +9,25 @@ #include #include -#include #include -#include -#include +#include + namespace eve { template - struct is_less_equal_t : strict_elementwise_callable + struct is_less_equal_t : elementwise_callable { - template - requires(eve::same_lanes_or_scalar) - constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const + template + constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const + requires (compatible_arithmetic_values) { - // static_assert( valid_tolerance, Options>::value, "[eve::is_less_equal] simd tolerance requires at least one simd parameter." ); + if constexpr (Options::contains(almost)) + { + static_assert(floating_value, "[eve::is_less_equal] The definitely option is only supported for floating types."); + } + + // static_assert( valid_tolerance, Options>::value, "[eve::is_less_equal] simd tolerance requires at least one simd parameter." ); return EVE_DISPATCH_CALL(a, b); } @@ -86,40 +90,26 @@ namespace eve //================================================================================================ //! @} //================================================================================================ - - namespace detail - { - template - EVE_FORCEINLINE constexpr common_logical_t - is_less_equal_(EVE_REQUIRES(cpu_), O const&, logical a, logical b) noexcept - { - if constexpr( scalar_value && scalar_value) return common_logical_t(a <= b); - else return a <= b; - } - - template - EVE_FORCEINLINE constexpr common_logical_t - is_less_equal_(EVE_REQUIRES(cpu_), O const & o, T const& aa, U const& bb) noexcept - { - if constexpr(O::contains(almost)) - { - using w_t = common_value_t; - auto a = w_t(aa); - auto b = w_t(bb); - - auto tol = o[almost].value(w_t{}); - if constexpr(integral_value) return a <= eve::next(b, tol); - else return a <= fam(b, tol, eve::max(eve::abs(a), eve::abs(b))); - } - else - { - if constexpr(scalar_value && scalar_value) return common_logical_t(aa <= bb); - else return aa <= bb; - } - } - } } +#include + #if defined(EVE_INCLUDE_X86_HEADER) # include #endif + +#if defined(EVE_INCLUDE_POWERPC_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_ARM_NEON_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_ARM_SVE_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif From 19d37702d58efba5b0b6934d31188f0687b86802 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Mon, 27 Jan 2025 15:01:17 +0100 Subject: [PATCH 2/4] fix typo --- .../regular/impl/simd/ppc/is_less_equal.hpp | 4 +- .../regular/impl/simd/x86/is_less_equal.hpp | 70 +++++++++---------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/include/eve/module/core/regular/impl/simd/ppc/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/ppc/is_less_equal.hpp index 29cb564f34..7660c2f104 100644 --- a/include/eve/module/core/regular/impl/simd/ppc/is_less_equal.hpp +++ b/include/eve/module/core/regular/impl/simd/ppc/is_less_equal.hpp @@ -19,7 +19,7 @@ namespace eve::detail requires ppc_abi> { if constexpr (O::contains(almost)) return is_less_equal.behavior(cpu_{}, opts, a, b); - else if constexpr(std::is_floating_point_v) return logical>(vec_cmple(v.storage(), w.storage())); - else return !(v > w); + else if constexpr(std::is_floating_point_v) return logical>(vec_cmple(a.storage(), b.storage())); + else return !(a > b); } } diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp index c5c64fa844..90a4812b3f 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp @@ -34,44 +34,44 @@ namespace eve::detail if constexpr (current_api >= avx512) { - if constexpr (c == category::float32x16) return mask16 {_mm512_cmp_ps_mask(v, w, f)}; - else if constexpr (c == category::float32x8) return mask8 {_mm256_cmp_ps_mask(v, w, f)}; - else if constexpr (c == category::float32x4) return mask8 {_mm_cmp_ps_mask(v, w, f)}; - else if constexpr (c == category::float64x8) return mask8 {_mm512_cmp_pd_mask(v, w, f)}; - else if constexpr (c == category::float64x4) return mask8 {_mm256_cmp_pd_mask(v, w, f)}; - else if constexpr (c == category::float64x2) return mask8 {_mm_cmp_pd_mask(v, w, f)}; - else if constexpr (c == category::uint64x8) return mask8 {_mm512_cmple_epu64_mask(v, w)}; - else if constexpr (c == category::uint64x4) return mask8 {_mm256_cmple_epu64_mask(v, w)}; - else if constexpr (c == category::uint64x2) return mask8 {_mm_cmple_epu64_mask(v, w)}; - else if constexpr (c == category::uint32x16) return mask16 {_mm512_cmple_epu32_mask(v, w)}; - else if constexpr (c == category::uint32x8) return mask8 {_mm256_cmple_epu32_mask(v, w)}; - else if constexpr (c == category::uint32x4) return mask8 {_mm_cmple_epu32_mask(v, w)}; - else if constexpr (c == category::uint16x32) return mask32 {_mm512_cmple_epu16_mask(v, w)}; - else if constexpr (c == category::uint16x16) return mask16 {_mm256_cmple_epu16_mask(v, w)}; - else if constexpr (c == category::uint16x8) return mask8 {_mm_cmple_epu16_mask(v, w)}; - else if constexpr (c == category::uint8x64) return mask64 {_mm512_cmple_epu8_mask(v, w)}; - else if constexpr (c == category::uint8x32) return mask32 {_mm256_cmple_epu8_mask(v, w)}; - else if constexpr (c == category::uint8x16) return mask16 {_mm_cmple_epu8_mask(v, w)}; - else if constexpr (c == category::int64x8) return mask8 {_mm512_cmple_epi64_mask(v, w)}; - else if constexpr (c == category::int64x4) return mask8 {_mm256_cmple_epi64_mask(v, w)}; - else if constexpr (c == category::int64x2) return mask8 {_mm_cmple_epi64_mask(v, w)}; - else if constexpr (c == category::int32x16) return mask16 {_mm512_cmple_epi32_mask(v, w)}; - else if constexpr (c == category::int32x8) return mask8 {_mm256_cmple_epi32_mask(v, w)}; - else if constexpr (c == category::int32x4) return mask8 {_mm_cmple_epi32_mask(v, w)}; - else if constexpr (c == category::int16x32) return mask32 {_mm512_cmple_epi16_mask(v, w)}; - else if constexpr (c == category::int16x16) return mask16 {_mm256_cmple_epi16_mask(v, w)}; - else if constexpr (c == category::int16x8) return mask8 {_mm_cmple_epi16_mask(v, w)}; - else if constexpr (c == category::int8x64) return mask64 {_mm512_cmple_epi8_mask(v, w)}; - else if constexpr (c == category::int8x32) return mask32 {_mm256_cmple_epi8_mask(v, w)}; - else if constexpr (c == category::int8x16) return mask16 {_mm_cmple_epi8_mask(v, w)}; + if constexpr (c == category::float32x16) return mask16 {_mm512_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float32x8) return mask8 {_mm256_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float32x4) return mask8 {_mm_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float64x8) return mask8 {_mm512_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::float64x4) return mask8 {_mm256_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::float64x2) return mask8 {_mm_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::uint64x8) return mask8 {_mm512_cmple_epu64_mask(a, b)}; + else if constexpr (c == category::uint64x4) return mask8 {_mm256_cmple_epu64_mask(a, b)}; + else if constexpr (c == category::uint64x2) return mask8 {_mm_cmple_epu64_mask(a, b)}; + else if constexpr (c == category::uint32x16) return mask16 {_mm512_cmple_epu32_mask(a, b)}; + else if constexpr (c == category::uint32x8) return mask8 {_mm256_cmple_epu32_mask(a, b)}; + else if constexpr (c == category::uint32x4) return mask8 {_mm_cmple_epu32_mask(a, b)}; + else if constexpr (c == category::uint16x32) return mask32 {_mm512_cmple_epu16_mask(a, b)}; + else if constexpr (c == category::uint16x16) return mask16 {_mm256_cmple_epu16_mask(a, b)}; + else if constexpr (c == category::uint16x8) return mask8 {_mm_cmple_epu16_mask(a, b)}; + else if constexpr (c == category::uint8x64) return mask64 {_mm512_cmple_epu8_mask(a, b)}; + else if constexpr (c == category::uint8x32) return mask32 {_mm256_cmple_epu8_mask(a, b)}; + else if constexpr (c == category::uint8x16) return mask16 {_mm_cmple_epu8_mask(a, b)}; + else if constexpr (c == category::int64x8) return mask8 {_mm512_cmple_epi64_mask(a, b)}; + else if constexpr (c == category::int64x4) return mask8 {_mm256_cmple_epi64_mask(a, b)}; + else if constexpr (c == category::int64x2) return mask8 {_mm_cmple_epi64_mask(a, b)}; + else if constexpr (c == category::int32x16) return mask16 {_mm512_cmple_epi32_mask(a, b)}; + else if constexpr (c == category::int32x8) return mask8 {_mm256_cmple_epi32_mask(a, b)}; + else if constexpr (c == category::int32x4) return mask8 {_mm_cmple_epi32_mask(a, b)}; + else if constexpr (c == category::int16x32) return mask32 {_mm512_cmple_epi16_mask(a, b)}; + else if constexpr (c == category::int16x16) return mask16 {_mm256_cmple_epi16_mask(a, b)}; + else if constexpr (c == category::int16x8) return mask8 {_mm_cmple_epi16_mask(a, b)}; + else if constexpr (c == category::int8x64) return mask64 {_mm512_cmple_epi8_mask(a, b)}; + else if constexpr (c == category::int8x32) return mask32 {_mm256_cmple_epi8_mask(a, b)}; + else if constexpr (c == category::int8x16) return mask16 {_mm_cmple_epi8_mask(a, b)}; } else { - if constexpr (c == category::float32x8) return _mm256_cmp_ps(v, w, f); - else if constexpr (c == category::float64x4) return _mm256_cmp_pd(v, w, f); - else if constexpr (c == category::float32x4) return _mm_cmple_ps(v, w); - else if constexpr (c == category::float64x2) return _mm_cmple_pd(v, w); - else return !(v > w); + if constexpr (c == category::float32x8) return _mm256_cmp_ps(a, b, f); + else if constexpr (c == category::float64x4) return _mm256_cmp_pd(a, b, f); + else if constexpr (c == category::float32x4) return _mm_cmple_ps(a, b); + else if constexpr (c == category::float64x2) return _mm_cmple_pd(a, b); + else return !(a > b); } } } From e272676fd3dccc02897550e909f8c2861231b50b Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Mon, 27 Jan 2025 15:15:30 +0100 Subject: [PATCH 3/4] fix types --- include/eve/module/core/regular/impl/is_less.hpp | 2 +- include/eve/module/core/regular/impl/is_less_equal.hpp | 2 +- .../eve/module/core/regular/impl/simd/arm/neon/is_less.hpp | 2 +- .../module/core/regular/impl/simd/arm/neon/is_less_equal.hpp | 2 +- .../eve/module/core/regular/impl/simd/x86/is_less_equal.hpp | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp index 6d5d77de69..dac7d1c57d 100644 --- a/include/eve/module/core/regular/impl/is_less.hpp +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -31,7 +31,7 @@ namespace eve::detail else { if constexpr (scalar_value) return as_logical_t(a < b); - else return map([](auto e, auto f) { return e < f; }, a, b); + else return map([](E e, E f){ return as_logical_t(e < f); }, a, b); } } } diff --git a/include/eve/module/core/regular/impl/is_less_equal.hpp b/include/eve/module/core/regular/impl/is_less_equal.hpp index 2b0f8acc6f..e1ade44039 100644 --- a/include/eve/module/core/regular/impl/is_less_equal.hpp +++ b/include/eve/module/core/regular/impl/is_less_equal.hpp @@ -30,7 +30,7 @@ namespace eve::detail else { if constexpr (scalar_value) return as_logical_t(a <= b); - else return map([](auto e, auto f) { return e <= f; }, a, b); + else return map([](E e, E f){ return as_logical_t(e <= f); }, a, b); } } } diff --git a/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp b/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp index 51d8ead572..1e89ebe018 100644 --- a/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp @@ -49,7 +49,7 @@ namespace eve::detail else if constexpr (cat == category::int64x2) return vcltq_s64(a, b); else if constexpr (cat == category::uint64x2) return vcltq_u64(a, b); } - else return map([](auto e, auto f){ return e < f; }, a, b); + else return map([](E e, E f){ return as_logical_t(e < f); }, a, b); } } } diff --git a/include/eve/module/core/regular/impl/simd/arm/neon/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/arm/neon/is_less_equal.hpp index 4800e6f4fd..7b37d0b355 100644 --- a/include/eve/module/core/regular/impl/simd/arm/neon/is_less_equal.hpp +++ b/include/eve/module/core/regular/impl/simd/arm/neon/is_less_equal.hpp @@ -49,7 +49,7 @@ namespace eve::detail else if constexpr (cat == category::int64x2) return vcleq_s64(a, b); else if constexpr (cat == category::uint64x2) return vcleq_u64(a, b); } - else return map([](auto e, auto f){ return e <= f; }, a, b); + else return map([](E e, E f){ return as_logical_t(e <= f); }, a, b); } } } diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp index 90a4812b3f..f2e8688cbe 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp @@ -19,8 +19,8 @@ namespace eve::detail { - template - EVE_FORCEINLINE as_logical_t> is_less_equal_(EVE_REQUIRES(sse2_), C const& mask, O const& opts, wide a, wide b) noexcept + template + EVE_FORCEINLINE as_logical_t> is_less_equal_(EVE_REQUIRES(sse2_), O const& opts, wide a, wide b) noexcept requires x86_abi> { if constexpr (O::contains(almost)) From dc0d3702a235e47c9d108cfc679e0f5d1cf988d9 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Wed, 29 Jan 2025 14:41:25 +0100 Subject: [PATCH 4/4] improved is_less_equal codegen on x86 --- .../core/regular/impl/simd/x86/is_less.hpp | 26 ++++++++++--------- .../regular/impl/simd/x86/is_less_equal.hpp | 17 ++++++++---- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp index 8ecc20658f..491371d796 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp @@ -74,6 +74,7 @@ namespace eve::detail else { constexpr auto use_avx2 = current_api >= avx2; + constexpr auto use_avx = current_api >= avx; constexpr auto use_sse4_1 = current_api >= sse4_1; constexpr auto use_sse4_2 = current_api >= sse4_2; constexpr auto lt = [](E ev, E fv) { return as_logical_t(ev < fv); }; @@ -85,18 +86,19 @@ namespace eve::detail return bit_cast((bit_cast(lhs, as(sm)) - sm) < (bit_cast(rhs, as(sm)) - sm), as{}); }; - if constexpr (use_avx2 && c == category::int64x4) return _mm256_cmpgt_epi64(b, a); - else if constexpr (use_avx2 && c == category::uint64x4) return unsigned_cmp(a, b); - else if constexpr (use_avx2 && c == category::int32x8) return _mm256_cmpgt_epi32(b, a); - else if constexpr (use_avx2 && c == category::uint32x8) return eve::min(a, b) != b; - else if constexpr (use_avx2 && c == category::int16x16) return _mm256_cmpgt_epi16(b, a); - else if constexpr (use_avx2 && c == category::uint16x16) return eve::min(a, b) != b; - else if constexpr (use_avx2 && c == category::int8x32) return _mm256_cmpgt_epi8(b, a); - else if constexpr (use_avx2 && c == category::uint8x32) return eve::min(a, b) != b; - else if constexpr (use_sse4_2 && c == category::int64x2) return _mm_cmpgt_epi64(b, a); - else if constexpr (c == category::int32x4) return _mm_cmplt_epi32(a, b); - else if constexpr (c == category::int16x8) return _mm_cmplt_epi16(a, b); - else if constexpr (c == category::int8x16) return _mm_cmplt_epi8(a, b); + if constexpr (use_avx2 && c == category::int64x4) return _mm256_cmpgt_epi64(b, a); + else if constexpr (use_avx2 && c == category::uint64x4) return unsigned_cmp(a, b); + else if constexpr (use_avx2 && c == category::int32x8) return _mm256_cmpgt_epi32(b, a); + else if constexpr (use_avx2 && c == category::uint32x8) return eve::min(a, b) != b; + else if constexpr (use_avx2 && c == category::int16x16) return _mm256_cmpgt_epi16(b, a); + else if constexpr (use_avx2 && c == category::uint16x16) return eve::min(a, b) != b; + else if constexpr (use_avx2 && c == category::int8x32) return _mm256_cmpgt_epi8(b, a); + else if constexpr (use_avx2 && c == category::uint8x32) return eve::min(a, b) != b; + else if constexpr (use_avx && ((sizeof(T) * N::value) == 32)) return aggregate(is_less, a, b); + else if constexpr (use_sse4_2 && c == category::int64x2) return _mm_cmpgt_epi64(b, a); + else if constexpr (c == category::int32x4) return _mm_cmplt_epi32(a, b); + else if constexpr (c == category::int16x8) return _mm_cmplt_epi16(a, b); + else if constexpr (c == category::int8x16) return _mm_cmplt_epi8(a, b); else if constexpr (c == category::uint32x4) { if constexpr (use_sse4_1) return eve::min(a, b) != b; diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp index f2e8688cbe..c0bb93423b 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less_equal.hpp @@ -67,11 +67,18 @@ namespace eve::detail } else { - if constexpr (c == category::float32x8) return _mm256_cmp_ps(a, b, f); - else if constexpr (c == category::float64x4) return _mm256_cmp_pd(a, b, f); - else if constexpr (c == category::float32x4) return _mm_cmple_ps(a, b); - else if constexpr (c == category::float64x2) return _mm_cmple_pd(a, b); - else return !(a > b); + constexpr auto use_avx2 = current_api >= avx2; + constexpr auto use_avx = current_api >= avx; + constexpr auto use_sse4_1 = current_api >= sse4_1; + + if constexpr (c == category::float32x8) return _mm256_cmp_ps(a, b, f); + else if constexpr (c == category::float64x4) return _mm256_cmp_pd(a, b, f); + else if constexpr (c == category::float32x4) return _mm_cmple_ps(a, b); + else if constexpr (c == category::float64x2) return _mm_cmple_pd(a, b); + else if constexpr (use_avx2) return eve::min(a, b) == a; + else if constexpr (use_avx && ((sizeof(T) * N::value) == 32)) return aggregate(is_less_equal, a, b); + else if constexpr (use_sse4_1) return eve::min(a, b) == a; + else return !is_less(b, a); } } }