Skip to content

Commit e0a1058

Browse files
Fix fma4 support
Fix #1071
1 parent 0252ae7 commit e0a1058

File tree

3 files changed

+13
-10
lines changed

3 files changed

+13
-10
lines changed

Diff for: include/xsimd/arch/xsimd_fma4.hpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -23,52 +23,52 @@ namespace xsimd
2323

2424
// fnma
2525
template <class A>
26-
XSIMD_INLINE batch<float, A> fnma(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
26+
XSIMD_INLINE batch<float, A> fnma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma4>) noexcept
2727
{
2828
return _mm_nmacc_ps(x, y, z);
2929
}
3030

3131
template <class A>
32-
XSIMD_INLINE batch<double, A> fnma(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
32+
XSIMD_INLINE batch<double, A> fnma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma4>) noexcept
3333
{
3434
return _mm_nmacc_pd(x, y, z);
3535
}
3636

3737
// fnms
3838
template <class A>
39-
XSIMD_INLINE batch<float, A> fnms(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
39+
XSIMD_INLINE batch<float, A> fnms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma4>) noexcept
4040
{
4141
return _mm_nmsub_ps(x, y, z);
4242
}
4343

4444
template <class A>
45-
XSIMD_INLINE batch<double, A> fnms(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
45+
XSIMD_INLINE batch<double, A> fnms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma4>) noexcept
4646
{
4747
return _mm_nmsub_pd(x, y, z);
4848
}
4949

5050
// fma
5151
template <class A>
52-
XSIMD_INLINE batch<float, A> fma(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
52+
XSIMD_INLINE batch<float, A> fma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma4>) noexcept
5353
{
5454
return _mm_macc_ps(x, y, z);
5555
}
5656

5757
template <class A>
58-
XSIMD_INLINE batch<double, A> fma(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
58+
XSIMD_INLINE batch<double, A> fma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma4>) noexcept
5959
{
6060
return _mm_macc_pd(x, y, z);
6161
}
6262

6363
// fms
6464
template <class A>
65-
XSIMD_INLINE batch<float, A> fms(simd_register<float, A> const& x, simd_register<float, A> const& y, simd_register<float, A> const& z, requires_arch<fma4>) noexcept
65+
XSIMD_INLINE batch<float, A> fms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<fma4>) noexcept
6666
{
6767
return _mm_msub_ps(x, y, z);
6868
}
6969

7070
template <class A>
71-
XSIMD_INLINE batch<double, A> fms(simd_register<double, A> const& x, simd_register<double, A> const& y, simd_register<double, A> const& z, requires_arch<fma4>) noexcept
71+
XSIMD_INLINE batch<double, A> fms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<fma4>) noexcept
7272
{
7373
return _mm_msub_pd(x, y, z);
7474
}

Diff for: include/xsimd/types/xsimd_fma4_register.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414

1515
#include "./xsimd_sse4_2_register.hpp"
1616

17+
#if XSIMD_WITH_FMA4
18+
#include <x86intrin.h>
19+
#endif
20+
1721
namespace xsimd
1822
{
1923
/**

Diff for: test/architectures/CMakeLists.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
set(INTEL_PROCESSORS
2-
knl knm skylake-avx512 cannonlake icelake-client
2+
bdver1 knl knm skylake-avx512 cannonlake icelake-client
33
icelake-server cascadelake cooperlake tigerlake sapphirerapids alderlake
44
rocketlake graniterapids graniterapids-d znver4)
5-
set(CMAKE_CXX_FLAGS "-Werror=unused-command-line-argument")
65

76
foreach(INTEL_PROCESSOR ${INTEL_PROCESSORS})
87
# Adding the werror here to choke if the -march is incompatible with the

0 commit comments

Comments
 (0)