Skip to content

Commit

Permalink
Add support for Apple Silicon
Browse files Browse the repository at this point in the history
  • Loading branch information
dean0x7d committed Oct 15, 2023
1 parent 78690e4 commit 0567aea
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 12 deletions.
2 changes: 2 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

* Fixed compatibility issues with the latests Python versions (up to 3.12).

* Fixed compilation on Apple Silicon (M1, M2).

* Fixed deprecation warnings from the latest versions of `numpy`.

* Dropped support for Python 3.6. You must have Python 3.7 or newer to install this version.
Expand Down
12 changes: 8 additions & 4 deletions cppcore/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ download_dependency(variant 1.1.4
https://raw.githubusercontent.com/mapbox/variant/v\${VERSION}/include
mapbox/variant.hpp mapbox/recursive_wrapper.hpp mapbox/variant_visitor.hpp)
target_include_directories(cppcore SYSTEM PUBLIC ${VARIANT_INCLUDE_DIR})
download_dependency(simdpp 2.0-rc2
download_dependency(simdpp 2.1
https://github.com/p12tic/libsimdpp/archive
/v\${VERSION}.tar.gz */simdpp)
target_include_directories(cppcore SYSTEM PUBLIC ${SIMDPP_INCLUDE_DIR})
Expand All @@ -125,7 +125,11 @@ include(fmt)
target_link_libraries(cppcore PUBLIC fmt)

if(PB_NATIVE_SIMD AND NOT MSVC) # MSVC does not have anything like a /arch:native flag
target_compile_options(cppcore PUBLIC -march=native)
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag(-march=native PB_HAS_ARCH_NATIVE)
if(PB_HAS_ARCH_NATIVE)
target_compile_options(cppcore PUBLIC -march=native)
endif()
endif()

if(PB_MKL)
Expand All @@ -140,7 +144,7 @@ if(PB_CUDA)
endif()

if(PB_TESTS)
set(catch_url https://raw.githubusercontent.com/philsquared/Catch/v\${VERSION}/single_include)
download_dependency(catch 1.8.1 ${catch_url} catch.hpp)
set(catch_url https://raw.githubusercontent.com/catchorg/Catch2/v\${VERSION}/single_include/catch2)
download_dependency(catch 2.13.10 ${catch_url} catch.hpp)
add_subdirectory(tests)
endif()
51 changes: 46 additions & 5 deletions cppcore/include/support/simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
# define SIMDPP_ARCH_X86_SSE3
#elif defined(__SSE2__) || defined(_M_X64) || _M_IX86_FP == 2
# define SIMDPP_ARCH_X86_SSE2
#elif defined(__ARM_NEON)
# define SIMDPP_ARCH_ARM_NEON
#endif

#if defined(__FMA__) || (defined(_MSC_VER) && defined(__AVX2__))
Expand Down Expand Up @@ -138,8 +140,13 @@ split_loop_t<step> split_loop(scalar_t const* p, idx_t start, idx_t end) {
RAII class which disables floating-point denormals (flush-to-zero mode)
*/
struct scope_disable_denormals {
#if SIMDPP_USE_SSE2
CPB_ALWAYS_INLINE scope_disable_denormals() { _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); }
CPB_ALWAYS_INLINE ~scope_disable_denormals() { _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF); }
#else // ARM NEON defaults for flush-to-zero
scope_disable_denormals() {}
~scope_disable_denormals() {}
#endif
};

namespace detail {
Expand Down Expand Up @@ -179,7 +186,41 @@ namespace detail {
return _mm_castpd_ps(r);
}
};
#endif // SIMDPP_USE_SSE2
#else // generic SIMD on ARM NEON or anything other than SSE/AVX
template<>
struct Gather<float64x2> {
CPB_ALWAYS_INLINE
static float64x2 call(double const* data, std::int32_t const* indices) {
auto const low = simdpp::load_splat<float64x2>(data + indices[0]);
auto const high = simdpp::load_splat<float64x2>(data + indices[1]);
return simdpp::zip2_lo(low, high);
}

CPB_ALWAYS_INLINE
static float64x2 call(std::complex<double> const* data, std::int32_t const* indices) {
return simdpp::load<float64x2>(data + indices[0]);
}
};

template<>
struct Gather<float32x4> {
CPB_ALWAYS_INLINE
static float32x4 call(float const* data, std::int32_t const* indices) {
auto const a = simdpp::load_splat<float32x4>(data + indices[0]);
auto const b = simdpp::load_splat<float32x4>(data + indices[1]);
auto const c = simdpp::load_splat<float32x4>(data + indices[2]);
auto const d = simdpp::load_splat<float32x4>(data + indices[3]);
auto const ac = simdpp::zip4_lo(a, c);
auto const bd = simdpp::zip4_lo(b, d);
return simdpp::zip4_lo(ac, bd);
}

CPB_ALWAYS_INLINE
static float32x4 call(std::complex<float> const* data, std::int32_t const* indices) {
return simdpp::bit_cast<float32x4>(Gather<float64x2>::call(reinterpret_cast<double const*>(data), indices));
}
};
#endif

#if SIMDPP_USE_AVX && !SIMDPP_USE_AVX2
template<>
Expand Down Expand Up @@ -301,24 +342,24 @@ Vec<N, void> addsub(Vec<N, E1> const& a, Vec<N, E2> const& b) {
#if SIMDPP_USE_SSE3
template<class E1, class E2> CPB_ALWAYS_INLINE
float32x4 addsub(float32<4, E1> const& a, float32<4, E2> const& b) {
return _mm_addsub_ps(a.eval(), b.eval());
return _mm_addsub_ps(a.eval().native(), b.eval().native());
}

template<class E1, class E2> CPB_ALWAYS_INLINE
float64x2 addsub(float64<2, E1> const& a, float64<2, E2> const& b) {
return _mm_addsub_pd(a.eval(), b.eval());
return _mm_addsub_pd(a.eval().native(), b.eval().native());
}
#endif // SIMDPP_USE_SSE3

#if SIMDPP_USE_AVX
template<class E1, class E2> CPB_ALWAYS_INLINE
float32x8 addsub(float32<8, E1> const& a, float32<8, E2> const& b) {
return _mm256_addsub_ps(a.eval(), b.eval());
return _mm256_addsub_ps(a.eval().native(), b.eval().native());
}

template<class E1, class E2> CPB_ALWAYS_INLINE
float64x4 addsub(float64<4, E1> const& a, float64<4, E2> const& b) {
return _mm256_addsub_pd(a.eval(), b.eval());
return _mm256_addsub_pd(a.eval().native(), b.eval().native());
}
#endif // SIMDPP_USE_AVX

Expand Down
4 changes: 1 addition & 3 deletions pybinding/utils/cpuinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,10 @@ def summary():
return "py-cpuinfo is not installed"

info = info.copy()
hz_raw, scale = info['hz_advertised_raw']
info['ghz'] = hz_raw * 10**(scale - 9)
info['physical'] = physical_core_count()
info['virtual'] = virtual_core_count()
info['simd'] = _cpp.simd_info()
return "{brand}\n{physical}/{virtual} cores @ {ghz:.2g} GHz with {simd}".format_map(info)
return "{brand_raw}\n{physical}/{virtual} cores with {simd}".format_map(info)


if __name__ == '__main__':
Expand Down

0 comments on commit 0567aea

Please sign in to comment.