diff --git a/changelog.md b/changelog.md
index 788dd5de..00e1b544 100644
--- a/changelog.md
+++ b/changelog.md
@@ -6,6 +6,8 @@
 
 * Fixed compatibility issues with the latests Python versions (up to 3.12).
 
+* Fixed compilation on Apple Silicon (M1, M2).
+
 * Fixed deprecation warnings from the latest versions of `numpy`.
 
 * Dropped support for Python 3.6. You must have Python 3.7 or newer to install this version.
diff --git a/cppcore/CMakeLists.txt b/cppcore/CMakeLists.txt
index 05d480c7..22cb40cf 100644
--- a/cppcore/CMakeLists.txt
+++ b/cppcore/CMakeLists.txt
@@ -116,7 +116,7 @@ download_dependency(variant 1.1.4
                     https://raw.githubusercontent.com/mapbox/variant/v\${VERSION}/include
                     mapbox/variant.hpp mapbox/recursive_wrapper.hpp mapbox/variant_visitor.hpp)
 target_include_directories(cppcore SYSTEM PUBLIC ${VARIANT_INCLUDE_DIR})
-download_dependency(simdpp 2.0-rc2
+download_dependency(simdpp 2.1
                     https://github.com/p12tic/libsimdpp/archive
                     /v\${VERSION}.tar.gz */simdpp)
 target_include_directories(cppcore SYSTEM PUBLIC ${SIMDPP_INCLUDE_DIR})
@@ -125,7 +125,11 @@ include(fmt)
 target_link_libraries(cppcore PUBLIC fmt)
 
 if(PB_NATIVE_SIMD AND NOT MSVC) # MSVC does not have anything like a /arch:native flag
-    target_compile_options(cppcore PUBLIC -march=native)
+    include(CheckCXXCompilerFlag)
+    check_cxx_compiler_flag(-march=native PB_HAS_ARCH_NATIVE)
+    if(PB_HAS_ARCH_NATIVE)
+        target_compile_options(cppcore PUBLIC -march=native)
+    endif()
 endif()
 
 if(PB_MKL)
@@ -140,7 +144,7 @@ if(PB_CUDA)
 endif()
 
 if(PB_TESTS)
-    set(catch_url https://raw.githubusercontent.com/philsquared/Catch/v\${VERSION}/single_include)
-    download_dependency(catch 1.8.1 ${catch_url} catch.hpp)
+    set(catch_url https://raw.githubusercontent.com/catchorg/Catch2/v\${VERSION}/single_include/catch2)
+    download_dependency(catch 2.13.10 ${catch_url} catch.hpp)
     add_subdirectory(tests)
 endif()
diff --git a/cppcore/include/support/simd.hpp b/cppcore/include/support/simd.hpp
index 63ccca12..8541078c 100644
--- a/cppcore/include/support/simd.hpp
+++ b/cppcore/include/support/simd.hpp
@@ -8,6 +8,8 @@
 # define SIMDPP_ARCH_X86_SSE3
 #elif defined(__SSE2__) || defined(_M_X64) || _M_IX86_FP == 2
 # define SIMDPP_ARCH_X86_SSE2
+#elif defined(__ARM_NEON)
+# define SIMDPP_ARCH_ARM_NEON
 #endif
 
 #if defined(__FMA__) || (defined(_MSC_VER) && defined(__AVX2__))
@@ -138,8 +140,13 @@ split_loop_t<step> split_loop(scalar_t const* p, idx_t start, idx_t end) {
  RAII class which disables floating-point denormals (flush-to-zero mode)
  */
 struct scope_disable_denormals {
+#if SIMDPP_USE_SSE2
     CPB_ALWAYS_INLINE scope_disable_denormals() { _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); }
     CPB_ALWAYS_INLINE ~scope_disable_denormals() { _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF); }
+#else // ARM NEON defaults for flush-to-zero
+    scope_disable_denormals() {}
+    ~scope_disable_denormals() {}
+#endif
 };
 
 namespace detail {
@@ -179,7 +186,41 @@ namespace detail {
             return _mm_castpd_ps(r);
         }
     };
-#endif // SIMDPP_USE_SSE2
+#else // generic SIMD on ARM NEON or anything other than SSE/AVX
+    template<>
+    struct Gather<float64x2> {
+        CPB_ALWAYS_INLINE
+        static float64x2 call(double const* data, std::int32_t const* indices) {
+            auto const low = simdpp::load_splat<float64x2>(data + indices[0]);
+            auto const high = simdpp::load_splat<float64x2>(data + indices[1]);
+            return simdpp::zip2_lo(low, high);
+        }
+
+        CPB_ALWAYS_INLINE
+        static float64x2 call(std::complex<double> const* data, std::int32_t const* indices) {
+            return simdpp::load<float64x2>(data + indices[0]);
+        }
+    };
+
+    template<>
+    struct Gather<float32x4> {
+        CPB_ALWAYS_INLINE
+        static float32x4 call(float const* data, std::int32_t const* indices) {
+            auto const a = simdpp::load_splat<float32x4>(data + indices[0]);
+            auto const b = simdpp::load_splat<float32x4>(data + indices[1]);
+            auto const c = simdpp::load_splat<float32x4>(data + indices[2]);
+            auto const d = simdpp::load_splat<float32x4>(data + indices[3]);
+            auto const ac = simdpp::zip4_lo(a, c);
+            auto const bd = simdpp::zip4_lo(b, d);
+            return simdpp::zip4_lo(ac, bd);
+        }
+
+        CPB_ALWAYS_INLINE
+        static float32x4 call(std::complex<float> const* data, std::int32_t const* indices) {
+            return simdpp::bit_cast<float32x4>(Gather<float64x2>::call(reinterpret_cast<double const*>(data), indices));
+        }
+    };
+#endif
 
 #if SIMDPP_USE_AVX && !SIMDPP_USE_AVX2
     template<>
@@ -301,24 +342,24 @@ Vec<N, void> addsub(Vec<N, E1> const& a, Vec<N, E2> const& b) {
 #if SIMDPP_USE_SSE3
 template<class E1, class E2> CPB_ALWAYS_INLINE
 float32x4 addsub(float32<4, E1> const& a, float32<4, E2> const& b) {
-    return _mm_addsub_ps(a.eval(), b.eval());
+    return _mm_addsub_ps(a.eval().native(), b.eval().native());
 }
 
 template<class E1, class E2> CPB_ALWAYS_INLINE
 float64x2 addsub(float64<2, E1> const& a, float64<2, E2> const& b) {
-    return _mm_addsub_pd(a.eval(), b.eval());
+    return _mm_addsub_pd(a.eval().native(), b.eval().native());
 }
 #endif // SIMDPP_USE_SSE3
 
 #if SIMDPP_USE_AVX
 template<class E1, class E2> CPB_ALWAYS_INLINE
 float32x8 addsub(float32<8, E1> const& a, float32<8, E2> const& b) {
-    return _mm256_addsub_ps(a.eval(), b.eval());
+    return _mm256_addsub_ps(a.eval().native(), b.eval().native());
 }
 
 template<class E1, class E2> CPB_ALWAYS_INLINE
 float64x4 addsub(float64<4, E1> const& a, float64<4, E2> const& b) {
-    return _mm256_addsub_pd(a.eval(), b.eval());
+    return _mm256_addsub_pd(a.eval().native(), b.eval().native());
 }
 #endif // SIMDPP_USE_AVX
 
diff --git a/pybinding/utils/cpuinfo.py b/pybinding/utils/cpuinfo.py
index 53b0c2f5..943b1387 100644
--- a/pybinding/utils/cpuinfo.py
+++ b/pybinding/utils/cpuinfo.py
@@ -52,12 +52,10 @@ def summary():
         return "py-cpuinfo is not installed"
 
     info = info.copy()
-    hz_raw, scale = info['hz_advertised_raw']
-    info['ghz'] = hz_raw * 10**(scale - 9)
     info['physical'] = physical_core_count()
     info['virtual'] = virtual_core_count()
     info['simd'] = _cpp.simd_info()
-    return "{brand}\n{physical}/{virtual} cores @ {ghz:.2g} GHz with {simd}".format_map(info)
+    return "{brand_raw}\n{physical}/{virtual} cores with {simd}".format_map(info)
 
 
 if __name__ == '__main__':