From 40c07af0cd60f176c82e69b55c742b513e714983 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Tue, 2 Jan 2024 20:35:14 +0100 Subject: [PATCH] Add fastpath for loading/storing SimdN --- include/llama/Simd.hpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/include/llama/Simd.hpp b/include/llama/Simd.hpp index 56c7263935..9b9d70979c 100644 --- a/include/llama/Simd.hpp +++ b/include/llama/Simd.hpp @@ -312,8 +312,11 @@ namespace llama // structured dstSimd type and record reference if constexpr(isRecordRef && isRecordRef) { - forEachLeafCoord([&](auto rc) LLAMA_LAMBDA_INLINE - { internal::loadSimdFromField(srcRef, dstSimd, rc); }); + if constexpr(simdLanes == simdLanes) // fast path mainly for scalar SimdN + dstSimd = srcRef; + else + forEachLeafCoord( + [&](auto rc) LLAMA_LAMBDA_INLINE { internal::loadSimdFromField(srcRef, dstSimd, rc); }); } // unstructured dstSimd and reference type else if constexpr(!isRecordRef && !isRecordRef) @@ -340,8 +343,11 @@ namespace llama // structured Simd type and record reference if constexpr(isRecordRef && isRecordRef) { - forEachLeafCoord([&](auto rc) LLAMA_LAMBDA_INLINE - { internal::storeSimdToField(srcSimd, dstRef, rc); }); + if constexpr(simdLanes == simdLanes) // fast path mainly for scalar SimdN + dstRef = srcSimd; + else + forEachLeafCoord( + [&](auto rc) LLAMA_LAMBDA_INLINE { internal::storeSimdToField(srcSimd, dstRef, rc); }); } // unstructured srcSimd and reference type else if constexpr(!isRecordRef && !isRecordRef)