diff --git a/.github/toolchains/gcc-powerpc64-linux-gnu.cmake b/.github/toolchains/gcc-powerpc64-linux-gnu.cmake new file mode 100644 index 000000000..5dd97d6c6 --- /dev/null +++ b/.github/toolchains/gcc-powerpc64-linux-gnu.cmake @@ -0,0 +1,5 @@ +set(CMAKE_SYSTEM_PROCESSOR powerpc64) +set(triple powerpc64-linux-gnu) + +include(${CMAKE_CURRENT_LIST_DIR}/gcc.cmake) + diff --git a/.github/toolchains/gcc-powerpc64le-linux-gnu.cmake b/.github/toolchains/gcc-powerpc64le-linux-gnu.cmake new file mode 100644 index 000000000..eca1a2837 --- /dev/null +++ b/.github/toolchains/gcc-powerpc64le-linux-gnu.cmake @@ -0,0 +1,5 @@ +set(CMAKE_SYSTEM_PROCESSOR powerpc64le) +set(triple powerpc64le-linux-gnu) + +include(${CMAKE_CURRENT_LIST_DIR}/gcc.cmake) + diff --git a/.github/workflows/cross.yml b/.github/workflows/cross-arm.yml similarity index 100% rename from .github/workflows/cross.yml rename to .github/workflows/cross-arm.yml diff --git a/.github/workflows/cross-ppc.yml b/.github/workflows/cross-ppc.yml new file mode 100644 index 000000000..92ffae333 --- /dev/null +++ b/.github/workflows/cross-ppc.yml @@ -0,0 +1,44 @@ +name: PowerPC cross-compilation build +on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.job }}-${{ github.ref }} + cancel-in-progress: true +jobs: + build: + runs-on: ubuntu-latest + name: '${{ matrix.target.arch }}, ${{ matrix.sys.compiler }} ${{ matrix.sys.version }}' + strategy: + matrix: + target: + - { platform: 'ppc64le', dir: 'powerpc64le-linux-gnu', flags: '-maltivec -mvsx -mcpu=power10', full: 'OFF' } + - { platform: 'ppc64', dir: 'powerpc64-linux-gnu', flags: '-maltivec -mvsx -mcpu=power10', full: 'OFF' } + sys: + - { compiler: 'gcc', version: '12' } + steps: + - name: Setup compiler + if: ${{ matrix.sys.compiler == 'gcc' }} + run: | + sudo apt-get update || exit 1 + sudo apt-get --no-install-suggests --no-install-recommends install g++-${{ matrix.sys.version }}-${{ matrix.target.dir }} g++-${{ matrix.sys.version }}-multilib || exit 1 + sudo update-alternatives --remove-all ${{ matrix.target.dir }}-gcc || true + sudo update-alternatives --remove-all ${{ matrix.target.dir }}-g++ || true + sudo update-alternatives --install /usr/bin/${{ matrix.target.dir }}-gcc ${{ matrix.target.dir }}-gcc /usr/bin/${{ matrix.target.dir }}-gcc-${{ matrix.sys.version }} 20 + sudo update-alternatives --install /usr/bin/${{ matrix.target.dir }}-g++ ${{ matrix.target.dir }}-g++ /usr/bin/${{ matrix.target.dir }}-g++-${{ matrix.sys.version }} 20 + - name: Setup QEMU + run: | + sudo apt-get --no-install-suggests --no-install-recommends install qemu-user + - name: Setup Ninja + run: | + sudo apt-get install ninja-build + - name: Checkout xsimd + uses: actions/checkout@v3 + - name: Setup + run: | + mkdir _build + cd _build && cmake .. -DBUILD_TESTS=ON -DDOWNLOAD_DOCTEST=ON -DBUILD_BENCHMARK=${{ matrix.target.full }} -DBUILD_EXAMPLES=${{ matrix.target.full }} -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="${{ matrix.target.flags }}" -DCMAKE_CXX_FLAGS="${{ matrix.target.flags }}" -DCMAKE_TOOLCHAIN_FILE=${{ github.workspace }}/.github/toolchains/${{ matrix.sys.compiler }}-${{ matrix.target.dir }}.cmake + - name: Build + run: cmake --build _build --verbose -j1 + - name: Testing xsimd + run: | + qemu-${{ matrix.target.platform }} -cpu power10 -L /usr/${{ matrix.target.dir}}/ ./test/test_xsimd + working-directory: ${{ github.workspace }}/_build diff --git a/CMakeLists.txt b/CMakeLists.txt index 860a84bad..6dffce659 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,7 @@ ${XSIMD_INCLUDE_DIR}/xsimd/arch/xsimd_sse3.hpp ${XSIMD_INCLUDE_DIR}/xsimd/arch/xsimd_sse4_1.hpp ${XSIMD_INCLUDE_DIR}/xsimd/arch/xsimd_sse4_2.hpp ${XSIMD_INCLUDE_DIR}/xsimd/arch/xsimd_ssse3.hpp +${XSIMD_INCLUDE_DIR}/xsimd/arch/xsimd_vsx.hpp ${XSIMD_INCLUDE_DIR}/xsimd/arch/xsimd_sve.hpp ${XSIMD_INCLUDE_DIR}/xsimd/config/xsimd_arch.hpp ${XSIMD_INCLUDE_DIR}/xsimd/config/xsimd_config.hpp @@ -70,6 +71,7 @@ ${XSIMD_INCLUDE_DIR}/xsimd/types/xsimd_fma3_sse_register.hpp ${XSIMD_INCLUDE_DIR}/xsimd/types/xsimd_fma4_register.hpp ${XSIMD_INCLUDE_DIR}/xsimd/types/xsimd_common_arch.hpp ${XSIMD_INCLUDE_DIR}/xsimd/types/xsimd_register.hpp +${XSIMD_INCLUDE_DIR}/xsimd/types/xsimd_vsx_register.hpp ${XSIMD_INCLUDE_DIR}/xsimd/types/xsimd_rvv_register.hpp ${XSIMD_INCLUDE_DIR}/xsimd/types/xsimd_sse2_register.hpp ${XSIMD_INCLUDE_DIR}/xsimd/types/xsimd_sse3_register.hpp diff --git a/README.md b/README.md index 87082f488..9313453a3 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,7 @@ x86 | AVX512BW, AVX512CD, AVX512DQ, AVX512F (gcc7 and higher) x86 AMD | FMA4 ARM | NEON, NEON64, SVE128/256/512 (fixed vector size) WebAssembly | WASM +powerpc64 | VSX RISC-V | RISC-V128/256/512 (fixed vector size) ## Installation diff --git a/docs/Doxyfile b/docs/Doxyfile index 390baf223..3d0137efe 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -9,6 +9,7 @@ INPUT = ../include/xsimd/types/xsimd_api.hpp \ ../include/xsimd/memory/xsimd_aligned_allocator.hpp \ ../include/xsimd/types/xsimd_common_arch.hpp \ ../include/xsimd/types/xsimd_traits.hpp \ + ../include/xsimd/types/xsimd_vsx_register.hpp \ ../include/xsimd/types/xsimd_avx2_register.hpp \ ../include/xsimd/types/xsimd_avx512bw_register.hpp \ ../include/xsimd/types/xsimd_avx512cd_register.hpp \ diff --git a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp index ebef263ee..2e83d33de 100644 --- a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +++ b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp @@ -203,10 +203,9 @@ namespace xsimd { if (std::is_signed::value) { - auto mask = (other >> (8 * sizeof(T) - 1)); auto self_pos_branch = min(std::numeric_limits::max() - other, self); auto self_neg_branch = max(std::numeric_limits::min() - other, self); - return other + select(batch_bool(mask.data), self_neg_branch, self_pos_branch); + return other + select(other >= 0, self_pos_branch, self_neg_branch); } else { diff --git a/include/xsimd/arch/common/xsimd_common_math.hpp b/include/xsimd/arch/common/xsimd_common_math.hpp index 689029aae..cdd35c5dd 100644 --- a/include/xsimd/arch/common/xsimd_common_math.hpp +++ b/include/xsimd/arch/common/xsimd_common_math.hpp @@ -1087,7 +1087,7 @@ namespace xsimd template XSIMD_INLINE batch from_bool(batch_bool const& self, requires_arch) noexcept { - return batch(self.data) & batch(1); + return batch((typename batch::register_type)self.data) & batch(1); } // horner diff --git a/include/xsimd/arch/xsimd_isa.hpp b/include/xsimd/arch/xsimd_isa.hpp index 398d22511..1772159a0 100644 --- a/include/xsimd/arch/xsimd_isa.hpp +++ b/include/xsimd/arch/xsimd_isa.hpp @@ -132,6 +132,10 @@ #include "./xsimd_wasm.hpp" #endif +#if XSIMD_WITH_VSX +#include "./xsimd_vsx.hpp" +#endif + // Must come last to have access to all conversion specializations. #include "./xsimd_common.hpp" diff --git a/include/xsimd/arch/xsimd_vsx.hpp b/include/xsimd/arch/xsimd_vsx.hpp new file mode 100644 index 000000000..48b56f749 --- /dev/null +++ b/include/xsimd/arch/xsimd_vsx.hpp @@ -0,0 +1,797 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_VSX_HPP +#define XSIMD_VSX_HPP + +#include +#include +#include + +#include "../types/xsimd_vsx_register.hpp" + +#include + +namespace xsimd +{ + template + struct batch_bool_constant; + + template + XSIMD_INLINE batch bitwise_cast(batch const& x) noexcept; + + template + struct batch_constant; + + namespace kernel + { + template + XSIMD_INLINE batch avg(batch const&, batch const&, requires_arch) noexcept; + template + XSIMD_INLINE batch avgr(batch const&, batch const&, requires_arch) noexcept; + + // abs + template + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept + { + return vec_abs(self.data); + } + + template + XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept + { + return vec_abs(self.data); + } + + // add + template ::value, void>::type> + XSIMD_INLINE batch add(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_add(self.data, other.data); + } + + // all + template ::value, void>::type> + XSIMD_INLINE bool all(batch_bool const& self, requires_arch) noexcept + { + return vec_all_ne(self.data, vec_xor(self.data, self.data)); + } + + // any + template ::value, void>::type> + XSIMD_INLINE bool any(batch_bool const& self, requires_arch) noexcept + { + return vec_any_ne(self.data, vec_xor(self.data, self.data)); + } + + // avgr + template ::value && sizeof(T) < 8, void>::type> + XSIMD_INLINE batch avgr(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_avg(self.data, other.data); + } + template + XSIMD_INLINE batch avgr(batch const& self, batch const& other, requires_arch) noexcept + { + return avgr(self, other, common {}); + } + template + XSIMD_INLINE batch avgr(batch const& self, batch const& other, requires_arch) noexcept + { + return avgr(self, other, common {}); + } + + // avg + template ::value, void>::type> + XSIMD_INLINE batch avg(batch const& self, batch const& other, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(sizeof(T) < 8) + { + constexpr auto nbit = 8 * sizeof(T) - 1; + auto adj = bitwise_cast(bitwise_cast>((self ^ other) << nbit) >> nbit); + return avgr(self, other, A {}) - adj; + } + else + { + return avg(self, other, common {}); + } + } + template + XSIMD_INLINE batch avg(batch const& self, batch const& other, requires_arch) noexcept + { + return avg(self, other, common {}); + } + template + XSIMD_INLINE batch avg(batch const& self, batch const& other, requires_arch) noexcept + { + return avg(self, other, common {}); + } + + // batch_bool_cast + template + XSIMD_INLINE batch_bool batch_bool_cast(batch_bool const& self, batch_bool const&, requires_arch) noexcept + { + return (typename batch_bool::register_type)self.data; + } + + // bitwise_and + template ::value, void>::type> + XSIMD_INLINE batch bitwise_and(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_and(self.data, other.data); + } + template ::value, void>::type> + XSIMD_INLINE batch_bool bitwise_and(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + { + return vec_and(self.data, other.data); + } + + // bitwise_andnot + template ::value, void>::type> + XSIMD_INLINE batch bitwise_andnot(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_and(self.data, vec_nor(other.data, other.data)); + } + template ::value, void>::type> + XSIMD_INLINE batch_bool bitwise_andnot(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + { + return self.data & ~other.data; + } + + // bitwise_lshift + template ::value, void>::type> + XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept + { + using shift_type = as_unsigned_integer_t; + batch shift(static_cast(other)); + return vec_sl(self.data, shift.data); + } + + // bitwise_not + template ::value, void>::type> + XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept + { + return vec_nor(self.data, self.data); + } + template ::value, void>::type> + XSIMD_INLINE batch_bool bitwise_not(batch_bool const& self, requires_arch) noexcept + { + return vec_nor(self.data, self.data); + } + + // bitwise_or + template ::value, void>::type> + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_or(self.data, other.data); + } + template ::value, void>::type> + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + { + return vec_or(self.data, other.data); + } + + // bitwise_rshift + template ::value, void>::type> + XSIMD_INLINE batch bitwise_rshift(batch const& self, int32_t other, requires_arch) noexcept + { + using shift_type = as_unsigned_integer_t; + batch shift(static_cast(other)); + XSIMD_IF_CONSTEXPR(std::is_signed::value) + { + return vec_sra(self.data, shift.data); + } + else + { + return vec_sr(self.data, shift.data); + } + } + + // bitwise_xor + template ::value, void>::type> + XSIMD_INLINE batch bitwise_xor(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_xor(self.data, other.data); + } + template ::value, void>::type> + XSIMD_INLINE batch_bool bitwise_xor(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + { + return vec_xor(self.data, other.data); + } + + // bitwise_cast + template + XSIMD_INLINE batch bitwise_cast(batch const& self, batch const&, requires_arch) noexcept + { + return (typename batch::register_type)(self.data); + } + + // broadcast + template ::value, void>::type> + XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept + { + return vec_splats(val); + } + + // store_complex + namespace detail + { + // complex_low + template + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch) noexcept + { + return vec_mergeh(self.real().data, self.imag().data); + } + template + XSIMD_INLINE batch complex_low(batch, A> const& self, requires_arch) noexcept + { + return vec_mergeh(self.real().data, self.imag().data); + } + // complex_high + template + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch) noexcept + { + return vec_mergel(self.real().data, self.imag().data); + } + template + XSIMD_INLINE batch complex_high(batch, A> const& self, requires_arch) noexcept + { + return vec_mergel(self.real().data, self.imag().data); + } + } + + // decr_if + template ::value, void>::type> + XSIMD_INLINE batch decr_if(batch const& self, batch_bool const& mask, requires_arch) noexcept + { + return self + batch((typename batch::register_type)mask.data); + } + + // div + template + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_div(self.data, other.data); + } + template + XSIMD_INLINE batch div(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_div(self.data, other.data); + } + + // fast_cast + namespace detail + { + template + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + { + return vec_ctf(self.data, 0); + } + template + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + { + return vec_ctf(self.data, 0); + } + + template + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + { + return vec_cts(self.data, 0); + } + + template + XSIMD_INLINE batch fast_cast(batch const& self, batch const&, requires_arch) noexcept + { + return vec_ctu(self.data, 0); + } + } + + // eq + template ::value, void>::type> + XSIMD_INLINE batch_bool eq(batch const& self, batch const& other, requires_arch) noexcept + { + auto res = vec_cmpeq(self.data, other.data); + return *reinterpret_cast::register_type*>(&res); + } + template ::value, void>::type> + XSIMD_INLINE batch_bool eq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + { + auto res = vec_cmpeq(self.data, other.data); + return *reinterpret_cast::register_type*>(&res); + } + + // first + template ::value, void>::type> + XSIMD_INLINE T first(batch const& self, requires_arch) noexcept + { + return vec_extract(self.data, 0); + } + + // ge + template ::value, void>::type> + XSIMD_INLINE batch_bool ge(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_cmpge(self.data, other.data); + } + + // gt + template ::value, void>::type> + XSIMD_INLINE batch_bool gt(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_cmpgt(self.data, other.data); + } + + // haddp + template + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept + { + auto tmp0 = vec_mergee(row[0].data, row[1].data); // v00 v10 v02 v12 + auto tmp1 = vec_mergeo(row[0].data, row[1].data); // v01 v11 v03 v13 + auto tmp4 = vec_add(tmp0, tmp1); // (v00 + v01, v10 + v11, v02 + v03, v12 + v13) + + auto tmp2 = vec_mergee(row[2].data, row[3].data); // v20 v30 v22 v32 + auto tmp3 = vec_mergeo(row[2].data, row[3].data); // v21 v31 v23 v33 + auto tmp5 = vec_add(tmp0, tmp1); // (v20 + v21, v30 + v31, v22 + v23, v32 + v33) + + auto tmp6 = vec_perm(tmp4, tmp5, (__vector unsigned char) { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 }); // (v00 + v01, v10 + v11, v20 + v21, v30 + v31 + auto tmp7 = vec_perm(tmp4, tmp5, (__vector unsigned char) { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 }); // (v02 + v03, v12 + v13, v12 + v13, v32 + v33) + + return vec_add(tmp6, tmp7); + } + + template + XSIMD_INLINE batch haddp(batch const* row, requires_arch) noexcept + { + auto tmp0 = vec_mergee(row[0].data, row[1].data); // v00 v10 v02 v12 + auto tmp1 = vec_mergeo(row[0].data, row[1].data); // v01 v11 v03 v13 + return vec_add(tmp0, tmp1); + } + + // incr_if + template ::value, void>::type> + XSIMD_INLINE batch incr_if(batch const& self, batch_bool const& mask, requires_arch) noexcept + { + return self - batch((typename batch::register_type)mask.data); + } + + // insert + template ::value, void>::type> + XSIMD_INLINE batch insert(batch const& self, T val, index, requires_arch) noexcept + { + return vec_insert(val, self.data, I); + } + + // isnan + template + XSIMD_INLINE batch_bool isnan(batch const& self, requires_arch) noexcept + { + return ~vec_cmpeq(self.data, self.data); + } + template + XSIMD_INLINE batch_bool isnan(batch const& self, requires_arch) noexcept + { + return ~vec_cmpeq(self.data, self.data); + } + + // load_aligned + template ::value, void>::type> + XSIMD_INLINE batch load_aligned(T const* mem, convert, requires_arch) noexcept + { + return vec_ld(0, reinterpret_cast::register_type*>(mem)); + } + + // load_unaligned + template ::value, void>::type> + XSIMD_INLINE batch load_unaligned(T const* mem, convert, requires_arch) noexcept + { + return vec_vsx_ld(0, (typename batch::register_type const*)mem); + } + + // load_complex + namespace detail + { + template + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + { + __vector unsigned char perme = { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 }; + __vector unsigned char permo = { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 }; + return { vec_perm(hi.data, lo.data, perme), vec_perm(hi.data, lo.data, permo) }; + } + template + XSIMD_INLINE batch, A> load_complex(batch const& hi, batch const& lo, requires_arch) noexcept + { + return { vec_mergee(hi.data, lo.data), vec_mergeo(hi.data, lo.data) }; + } + } + + // le + template ::value, void>::type> + XSIMD_INLINE batch_bool le(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_cmple(self.data, other.data); + } + + // lt + template ::value, void>::type> + XSIMD_INLINE batch_bool lt(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_cmplt(self.data, other.data); + } + + // max + template ::value, void>::type> + XSIMD_INLINE batch max(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_max(self.data, other.data); + } + + // min + template ::value, void>::type> + XSIMD_INLINE batch min(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_min(self.data, other.data); + } + + // mul + template ::value, void>::type> + XSIMD_INLINE batch mul(batch const& self, batch const& other, requires_arch) noexcept + { + return self.data * other.data; + // return vec_mul(self.data, other.data); + } + + // neg + template ::value, void>::type> + XSIMD_INLINE batch neg(batch const& self, requires_arch) noexcept + { + return -(self.data); + } + + // neq + template ::value, void>::type> + XSIMD_INLINE batch_bool neq(batch const& self, batch const& other, requires_arch) noexcept + { + return ~vec_cmpeq(self.data, other.data); + } + template ::value, void>::type> + XSIMD_INLINE batch_bool neq(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + { + return ~vec_cmpeq(self.data, other.data); + } + + // reciprocal + template + XSIMD_INLINE batch reciprocal(batch const& self, + kernel::requires_arch) + { + return vec_re(self.data); + } + template + XSIMD_INLINE batch reciprocal(batch const& self, + kernel::requires_arch) + { + return vec_re(self.data); + } + + // reduce_add + template + XSIMD_INLINE signed reduce_add(batch const& self, requires_arch) noexcept + { + auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0 + auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0 + auto tmp2 = vec_mergel(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0 + auto tmp3 = vec_add(tmp1, tmp2); + return vec_extract(tmp3, 0); + } + template + XSIMD_INLINE unsigned reduce_add(batch const& self, requires_arch) noexcept + { + auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0 + auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0 + auto tmp2 = vec_mergel(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0 + auto tmp3 = vec_add(tmp1, tmp2); + return vec_extract(tmp3, 0); + } + template + XSIMD_INLINE float reduce_add(batch const& self, requires_arch) noexcept + { + // FIXME: find an in-order approach + auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0 + auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0 + auto tmp2 = vec_mergel(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0 + auto tmp3 = vec_add(tmp1, tmp2); + return vec_extract(tmp3, 0); + } + template + XSIMD_INLINE double reduce_add(batch const& self, requires_arch) noexcept + { + auto tmp0 = vec_reve(self.data); // v1, v0 + auto tmp1 = vec_add(self.data, tmp0); // v0 + v1, v1 + v0 + return vec_extract(tmp1, 0); + } + template ::value, void>::type> + XSIMD_INLINE T reduce_add(batch const& self, requires_arch) noexcept + { + return hadd(self, common {}); + } + + // rsqrt + template + XSIMD_INLINE batch rsqrt(batch const& val, requires_arch) noexcept + { + return vec_rsqrt(val.data); + } + template + XSIMD_INLINE batch rsqrt(batch const& val, requires_arch) noexcept + { + return vec_rsqrt(val.data); + } + + // select + template ::value, void>::type> + XSIMD_INLINE batch select(batch_bool const& cond, batch const& true_br, batch const& false_br, requires_arch) noexcept + { + return vec_sel(false_br.data, true_br.data, cond.data); + } + template ::value, void>::type> + XSIMD_INLINE batch select(batch_bool_constant const&, batch const& true_br, batch const& false_br, requires_arch) noexcept + { + return select(batch_bool { Values... }, true_br, false_br, vsx {}); + } + + // shuffle + template + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept + { + return vec_perm(x.data, y.data, + (__vector unsigned char) { + 4 * I0 + 0, 4 * I0 + 1, 4 * I0 + 2, 4 * I0 + 3, + 4 * I1 + 0, 4 * I1 + 1, 4 * I1 + 2, 4 * I1 + 3, + 4 * I2 + 0, 4 * I2 + 1, 4 * I2 + 2, 4 * I2 + 3, + 4 * I3 + 0, 4 * I3 + 1, 4 * I3 + 2, 4 * I3 + 3 }); + } + + template + XSIMD_INLINE batch shuffle(batch const& x, batch const& y, batch_constant, requires_arch) noexcept + { + return vec_perm(x.data, y.data, + (__vector unsigned char) { + 8 * I0 + 0, + 8 * I0 + 1, + 8 * I0 + 2, + 8 * I0 + 3, + 8 * I0 + 4, + 8 * I0 + 5, + 8 * I0 + 6, + 8 * I0 + 7, + 8 * I1 + 0, + 8 * I1 + 1, + 8 * I1 + 2, + 8 * I1 + 3, + 8 * I1 + 4, + 8 * I1 + 5, + 8 * I1 + 6, + 8 * I1 + 7, + }); + } + + // sqrt + template + XSIMD_INLINE batch sqrt(batch const& val, requires_arch) noexcept + { + return vec_sqrt(val.data); + } + + template + XSIMD_INLINE batch sqrt(batch const& val, requires_arch) noexcept + { + return vec_sqrt(val.data); + } + + // slide_left + template + XSIMD_INLINE batch slide_left(batch const& x, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(N == batch::size * sizeof(T)) + { + return batch(0); + } + else + { + auto slider = vec_splats((uint8_t)(8 * N)); + return (typename batch::register_type)vec_slo(x.data, slider); + } + } + + // slide_right + template + XSIMD_INLINE batch slide_right(batch const& x, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(N == batch::size * sizeof(T)) + { + return batch(0); + } + else + { + auto slider = vec_splats((uint8_t)(8 * N)); + return (typename batch::register_type)vec_sro((__vector unsigned char)x.data, slider); + } + } + + // sadd + template ::value && sizeof(T) != 8, void>::type> + XSIMD_INLINE batch sadd(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_adds(self.data, other.data); + } + + // set + template + XSIMD_INLINE batch set(batch const&, requires_arch, Values... values) noexcept + { + static_assert(sizeof...(Values) == batch::size, "consistent init"); + return typename batch::register_type { values... }; + } + + template ::value, void>::type> + XSIMD_INLINE batch_bool set(batch_bool const&, requires_arch, Values... values) noexcept + { + static_assert(sizeof...(Values) == batch_bool::size, "consistent init"); + return typename batch_bool::register_type { static_cast::register_type>()[0])>(values ? -1LL : 0LL)... }; + } + + // ssub + + template ::value && sizeof(T) == 1, void>::type> + XSIMD_INLINE batch ssub(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_subs(self.data, other.data); + } + + // store_aligned + template ::value, void>::type> + XSIMD_INLINE void store_aligned(T* mem, batch const& self, requires_arch) noexcept + { + return vec_st(self.data, 0, reinterpret_cast::register_type*>(mem)); + } + + // store_unaligned + template ::value, void>::type> + XSIMD_INLINE void store_unaligned(T* mem, batch const& self, requires_arch) noexcept + { + return vec_vsx_st(self.data, 0, reinterpret_cast::register_type*>(mem)); + } + + // sub + template ::value, void>::type> + XSIMD_INLINE batch sub(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_sub(self.data, other.data); + } + + // swizzle + + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + { + return vec_perm(self.data, self.data, + (__vector unsigned char) { + 4 * V0 + 0, 4 * V0 + 1, 4 * V0 + 2, 4 * V0 + 3, + 4 * V1 + 0, 4 * V1 + 1, 4 * V1 + 2, 4 * V1 + 3, + 4 * V2 + 0, 4 * V2 + 1, 4 * V2 + 2, 4 * V2 + 3, + 4 * V3 + 0, 4 * V3 + 1, 4 * V3 + 2, 4 * V3 + 3 }); + } + + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + { + return vec_perm(self.data, self.data, + (__vector unsigned char) { + 8 * V0 + 0, + 8 * V0 + 1, + 8 * V0 + 2, + 8 * V0 + 3, + 8 * V0 + 4, + 8 * V0 + 5, + 8 * V0 + 6, + 8 * V0 + 7, + 8 * V1 + 0, + 8 * V1 + 1, + 8 * V1 + 2, + 8 * V1 + 3, + 8 * V1 + 4, + 8 * V1 + 5, + 8 * V1 + 6, + 8 * V1 + 7, + }); + } + + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + { + return vec_perm(self.data, self.data, + (__vector unsigned char) { + 8 * V0 + 0, + 8 * V0 + 1, + 8 * V0 + 2, + 8 * V0 + 3, + 8 * V0 + 4, + 8 * V0 + 5, + 8 * V0 + 6, + 8 * V0 + 7, + 8 * V1 + 0, + 8 * V1 + 1, + 8 * V1 + 2, + 8 * V1 + 3, + 8 * V1 + 4, + 8 * V1 + 5, + 8 * V1 + 6, + 8 * V1 + 7, + }); + } + + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + { + return bitwise_cast(swizzle(bitwise_cast(self), mask, vsx {})); + } + + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + { + return vec_perm(self.data, self.data, + (__vector unsigned char) { + 4 * V0 + 0, 4 * V0 + 1, 4 * V0 + 2, 4 * V0 + 3, + 4 * V1 + 0, 4 * V1 + 1, 4 * V1 + 2, 4 * V1 + 3, + 4 * V2 + 0, 4 * V2 + 1, 4 * V2 + 2, 4 * V2 + 3, + 4 * V3 + 0, 4 * V3 + 1, 4 * V3 + 2, 4 * V3 + 3 }); + } + + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + { + return bitwise_cast(swizzle(bitwise_cast(self), mask, vsx {})); + } + + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant, requires_arch) noexcept + { + return vec_perm(self.data, self.data, + (__vector unsigned char) { + 2 * V0 + 0, 2 * V0 + 1, 2 * V1 + 0, 2 * V1 + 1, + 2 * V2 + 0, 2 * V2 + 1, 2 * V3 + 0, 2 * V3 + 1, + 2 * V4 + 0, 2 * V4 + 1, 2 * V5 + 0, 2 * V5 + 1, + 2 * V6 + 0, 2 * V6 + 1, 2 * V7 + 0, 2 * V7 + 1 }); + } + + template + XSIMD_INLINE batch swizzle(batch const& self, batch_constant mask, requires_arch) noexcept + { + return bitwise_cast(swizzle(bitwise_cast(self), mask, vsx {})); + } + + // zip_hi + template ::value, void>::type> + XSIMD_INLINE batch zip_hi(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_mergel(self.data, other.data); + } + + // zip_lo + template ::value, void>::type> + XSIMD_INLINE batch zip_lo(batch const& self, batch const& other, requires_arch) noexcept + { + return vec_mergeh(self.data, other.data); + } + } +} + +#endif diff --git a/include/xsimd/config/xsimd_arch.hpp b/include/xsimd/config/xsimd_arch.hpp index 89fc6783d..ddd78e13d 100644 --- a/include/xsimd/config/xsimd_arch.hpp +++ b/include/xsimd/config/xsimd_arch.hpp @@ -169,14 +169,16 @@ namespace xsimd using all_sve_architectures = arch_list, detail::sve<256>, detail::sve<128>>; using all_rvv_architectures = arch_list, detail::rvv<256>, detail::rvv<128>>; using all_arm_architectures = typename detail::join, neon64, neon>>::type; + using all_power_architectures = arch_list; using all_riscv_architectures = all_rvv_architectures; using all_wasm_architectures = arch_list; - using all_architectures = typename detail::join::type; + using all_architectures = typename detail::join::type; using supported_architectures = typename detail::supported::type; using x86_arch = typename detail::supported::type::best; using arm_arch = typename detail::supported::type::best; + using power_arch = typename detail::supported::type::best; using riscv_arch = typename detail::supported::type::best; using best_arch = typename supported_architectures::best; diff --git a/include/xsimd/config/xsimd_config.hpp b/include/xsimd/config/xsimd_config.hpp index 326f766c4..70d065292 100644 --- a/include/xsimd/config/xsimd_config.hpp +++ b/include/xsimd/config/xsimd_config.hpp @@ -408,6 +408,17 @@ #define XSIMD_WITH_WASM 0 #endif +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if VMX with VSX extension is available at compile-time, to 0 otherwise. + */ +#if defined(__VEC__) && defined(__VSX__) +#define XSIMD_WITH_VSX 1 +#else +#define XSIMD_WITH_VSX 0 +#endif + // Workaround for MSVC compiler #ifdef _MSC_VER @@ -466,7 +477,7 @@ #endif -#if !XSIMD_WITH_SSE2 && !XSIMD_WITH_SSE3 && !XSIMD_WITH_SSSE3 && !XSIMD_WITH_SSE4_1 && !XSIMD_WITH_SSE4_2 && !XSIMD_WITH_AVX && !XSIMD_WITH_AVX2 && !XSIMD_WITH_AVXVNNI && !XSIMD_WITH_FMA3_SSE && !XSIMD_WITH_FMA4 && !XSIMD_WITH_FMA3_AVX && !XSIMD_WITH_FMA3_AVX2 && !XSIMD_WITH_AVX512F && !XSIMD_WITH_AVX512CD && !XSIMD_WITH_AVX512DQ && !XSIMD_WITH_AVX512BW && !XSIMD_WITH_AVX512ER && !XSIMD_WITH_AVX512PF && !XSIMD_WITH_AVX512IFMA && !XSIMD_WITH_AVX512VBMI && !XSIMD_WITH_AVX512VBMI2 && !XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 && !XSIMD_WITH_SVE && !XSIMD_WITH_RVV && !XSIMD_WITH_WASM && !XSIMD_WITH_EMULATED +#if !XSIMD_WITH_SSE2 && !XSIMD_WITH_SSE3 && !XSIMD_WITH_SSSE3 && !XSIMD_WITH_SSE4_1 && !XSIMD_WITH_SSE4_2 && !XSIMD_WITH_AVX && !XSIMD_WITH_AVX2 && !XSIMD_WITH_AVXVNNI && !XSIMD_WITH_FMA3_SSE && !XSIMD_WITH_FMA4 && !XSIMD_WITH_FMA3_AVX && !XSIMD_WITH_FMA3_AVX2 && !XSIMD_WITH_AVX512F && !XSIMD_WITH_AVX512CD && !XSIMD_WITH_AVX512DQ && !XSIMD_WITH_AVX512BW && !XSIMD_WITH_AVX512ER && !XSIMD_WITH_AVX512PF && !XSIMD_WITH_AVX512IFMA && !XSIMD_WITH_AVX512VBMI && !XSIMD_WITH_AVX512VBMI2 && !XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 && !XSIMD_WITH_SVE && !XSIMD_WITH_RVV && !XSIMD_WITH_WASM && !XSIMD_WITH_VSX && !XSIMD_WITH_EMULATED #define XSIMD_NO_SUPPORTED_ARCHITECTURE #endif diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 7b940f655..8fdae0e51 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -84,6 +84,7 @@ namespace xsimd ARCH_FIELD_EX_REUSE(detail::rvv<256>, rvv) ARCH_FIELD_EX_REUSE(detail::rvv<128>, rvv) ARCH_FIELD(wasm) + ARCH_FIELD(vsx) #undef ARCH_FIELD @@ -95,6 +96,10 @@ namespace xsimd wasm = 1; #endif +#if XSIMD_WITH_VSX + vsx = 1; +#endif + #if defined(__aarch64__) || defined(_M_ARM64) neon = 1; neon64 = 1; diff --git a/include/xsimd/config/xsimd_inline.hpp b/include/xsimd/config/xsimd_inline.hpp index a87410b41..eaf024103 100644 --- a/include/xsimd/config/xsimd_inline.hpp +++ b/include/xsimd/config/xsimd_inline.hpp @@ -12,7 +12,9 @@ #ifndef XSIMD_INLINE_HPP #define XSIMD_INLINE_HPP -#if defined __has_attribute +#if defined(__VEC__) +#define XSIMD_INLINE inline +#elif defined __has_attribute #if __has_attribute(always_inline) #define XSIMD_INLINE inline __attribute__((always_inline)) #endif diff --git a/include/xsimd/types/xsimd_all_registers.hpp b/include/xsimd/types/xsimd_all_registers.hpp index a652061a8..33f9b465d 100644 --- a/include/xsimd/types/xsimd_all_registers.hpp +++ b/include/xsimd/types/xsimd_all_registers.hpp @@ -48,6 +48,8 @@ #include "xsimd_wasm_register.hpp" +#include "xsimd_vsx_register.hpp" + #if XSIMD_WITH_EMULATED #include "xsimd_emulated_register.hpp" #endif diff --git a/include/xsimd/types/xsimd_api.hpp b/include/xsimd/types/xsimd_api.hpp index 3b080dd95..eeee5b267 100644 --- a/include/xsimd/types/xsimd_api.hpp +++ b/include/xsimd/types/xsimd_api.hpp @@ -2206,6 +2206,9 @@ namespace xsimd * Slide the whole batch to the left by \c n bytes. This is different from * \c bitwise_lshift that shifts each batch element to the left. * + * @warning The behavior of this function is platform-dependent on big + * endian architectures. + * * @tparam N Amount of bytes to slide to the left. * @param x batch of integer values. * @return slided batch. @@ -2224,6 +2227,9 @@ namespace xsimd * Slide the whole batch to the right by \c N bytes. This is different from * \c bitwise_rshift that shifts each batch element to the right. * + * @warning The behavior of this function is platform-dependent on big + * endian architectures. + * * @tparam N Amount of bytes to slide to the right. * @param x batch of integer values. * @return slided batch. diff --git a/include/xsimd/types/xsimd_vsx_register.hpp b/include/xsimd/types/xsimd_vsx_register.hpp new file mode 100644 index 000000000..cfd450317 --- /dev/null +++ b/include/xsimd/types/xsimd_vsx_register.hpp @@ -0,0 +1,77 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_VSX_REGISTER_HPP +#define XSIMD_VSX_REGISTER_HPP + +#include "./xsimd_common_arch.hpp" +#include "./xsimd_register.hpp" + +#if XSIMD_WITH_VSX +#include +#endif + +namespace xsimd +{ + /** + * @ingroup architectures + * + * VSX instructions + */ + struct vsx : common + { + static constexpr bool supported() noexcept { return XSIMD_WITH_VSX; } + static constexpr bool available() noexcept { return true; } + static constexpr bool requires_alignment() noexcept { return true; } + static constexpr std::size_t alignment() noexcept { return 16; } + static constexpr char const* name() noexcept { return "vmx+vsx"; } + }; + +#if XSIMD_WITH_VSX + namespace types + { + +#define XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(T, Tb) \ + template <> \ + struct get_bool_simd_register \ + { \ + struct type \ + { \ + using register_type = __vector __bool Tb; \ + register_type data; \ + type() = default; \ + type(register_type r) \ + : data(r) \ + { \ + } \ + operator register_type() const noexcept { return data; } \ + }; \ + }; \ + XSIMD_DECLARE_SIMD_REGISTER(T, vsx, __vector T) + + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(signed char, char); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned char, char); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(char, char); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned short, short); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(short, short); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned int, int); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(int, int); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned long, long); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(long, long); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(float, int); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(double, long); + +#undef XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER + } +#endif +} + +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8a4ce50d5..e6bad7999 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -107,6 +107,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU" set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=${TARGET_ARCH} -mtune=${TARGET_ARCH}") elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64") # Nothing specific + elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc") + # Nothing specific elseif(NOT WIN32 AND NOT EMSCRIPTEN) if(NOT CMAKE_CXX_FLAGS MATCHES "-march" AND NOT CMAKE_CXX_FLAGS MATCHES "-arch" AND NOT CMAKE_OSX_ARCHITECTURES) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${TARGET_ARCH}") diff --git a/test/test_batch_int.cpp b/test/test_batch_int.cpp index 9a992a566..3b58e0548 100644 --- a/test/test_batch_int.cpp +++ b/test/test_batch_int.cpp @@ -285,21 +285,15 @@ struct batch_int_test for (int32_t i = 0; i < s; ++i) { res = lhs << i; - value_type expected = value_type(1) << i; - for (std::size_t j = 0; j < size; ++j) - { - CHECK_EQ(res.get(j), expected); - } + batch_type expected(value_type(1) << i); + CHECK_BATCH_EQ(res, expected); } lhs = batch_type(std::numeric_limits::max()); for (int32_t i = 0; i < s; ++i) { res = lhs >> i; - value_type expected = std::numeric_limits::max() >> i; - for (std::size_t j = 0; j < size; ++j) - { - CHECK_EQ(res.get(j), expected); - } + batch_type expected(std::numeric_limits::max() >> i); + CHECK_BATCH_EQ(res, expected); } } diff --git a/test/test_load_store.cpp b/test/test_load_store.cpp index 7a4d80932..449c41e85 100644 --- a/test/test_load_store.cpp +++ b/test/test_load_store.cpp @@ -39,7 +39,9 @@ struct load_store_test using ulong_vector_type = std::vector>; #endif using float_vector_type = std::vector>; +#if !XSIMD_WITH_NEON || XSIMD_WITH_NEON64 using double_vector_type = std::vector>; +#endif int8_vector_type i8_vec; uint8_vector_type ui8_vec; @@ -54,7 +56,9 @@ struct load_store_test ulong_vector_type ul_vec; #endif float_vector_type f_vec; +#if !XSIMD_WITH_NEON || XSIMD_WITH_NEON64 double_vector_type d_vec; +#endif array_type expected; @@ -73,7 +77,9 @@ struct load_store_test init_test_vector(ul_vec); #endif init_test_vector(f_vec); +#if !XSIMD_WITH_NEON || XSIMD_WITH_NEON64 init_test_vector(d_vec); +#endif } void test_load() diff --git a/test/test_memory.cpp b/test/test_memory.cpp index 930ef26fd..f582d59bd 100644 --- a/test/test_memory.cpp +++ b/test/test_memory.cpp @@ -46,7 +46,7 @@ TEST_CASE("[is_aligned]") float f[100]; void* unaligned_f = static_cast(&f[0]); constexpr std::size_t alignment = xsimd::default_arch::alignment(); - std::size_t aligned_f_size; + std::size_t aligned_f_size = sizeof(f); void* aligned_f = std::align(alignment, sizeof(f), unaligned_f, aligned_f_size); CHECK_UNARY(xsimd::is_aligned(aligned_f)); diff --git a/test/test_shuffle.cpp b/test/test_shuffle.cpp index 846da5b7d..b08210974 100644 --- a/test/test_shuffle.cpp +++ b/test/test_shuffle.cpp @@ -14,6 +14,13 @@ #include "test_utils.hpp" +#ifdef __linux__ +#include "endian.h" +#if BYTE_ORDER == BIG_ENDIAN +#define XSIMD_NO_SLIDE +#endif +#endif + #include namespace @@ -197,6 +204,7 @@ struct slide_test : public init_slide_base INFO("slide_left full"); CHECK_BATCH_EQ(b_res_left_full, b_left_full); +#ifndef XSIMD_NO_SLIDE B b_res_left_half = xsimd::slide_left(b_in); INFO("slide_left half_slide"); CHECK_BATCH_EQ(b_res_left_half, b_left_half); @@ -215,6 +223,7 @@ struct slide_test : public init_slide_base INFO("slide_left below_half_slide"); CHECK_BATCH_EQ(b_res_left_below_half, b_left_below_half); } +#endif } void slide_right() @@ -235,6 +244,7 @@ struct slide_test : public init_slide_base INFO("slide_right full"); CHECK_BATCH_EQ(b_res_right_full, b_right_full); +#ifndef XSIMD_NO_SLIDE B b_res_right_half = xsimd::slide_right(b_in); INFO("slide_right half_slide"); CHECK_BATCH_EQ(b_res_right_half, b_right_half); @@ -253,6 +263,7 @@ struct slide_test : public init_slide_base INFO("slide_right below_half_slide"); CHECK_BATCH_EQ(b_res_right_below_half, b_right_below_half); } +#endif } }; @@ -344,7 +355,9 @@ struct compress_test } }; -TEST_CASE_TEMPLATE("[compress]", B, BATCH_FLOAT_TYPES, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch) +#define XSIMD_COMPRESS_TYPES BATCH_FLOAT_TYPES, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch + +TEST_CASE_TEMPLATE("[compress]", B, XSIMD_COMPRESS_TYPES) { compress_test Test; SUBCASE("empty") @@ -440,7 +453,9 @@ struct expand_test } }; -TEST_CASE_TEMPLATE("[expand]", B, BATCH_FLOAT_TYPES, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch) +#define XSIMD_EXPAND_TYPES XSIMD_COMPRESS_TYPES + +TEST_CASE_TEMPLATE("[expand]", B, XSIMD_EXPAND_TYPES) { expand_test Test; SUBCASE("empty") @@ -687,7 +702,9 @@ struct shuffle_test } }; -TEST_CASE_TEMPLATE("[shuffle]", B, BATCH_FLOAT_TYPES, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch) +#define XSIMD_SHUFFLE_TYPES BATCH_FLOAT_TYPES, xsimd::batch, xsimd::batch, xsimd::batch, xsimd::batch + +TEST_CASE_TEMPLATE("[shuffle]", B, XSIMD_SHUFFLE_TYPES) { shuffle_test Test; SUBCASE("no-op")