Skip to content

Commit

Permalink
Add SIMD special handling for AoSoA
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Jan 2, 2024
1 parent e44b23e commit 7c7bd39
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 37 deletions.
80 changes: 49 additions & 31 deletions include/llama/Simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "RecordRef.hpp"
#include "macros.hpp"
#include "mapping/AoS.hpp"
#include "mapping/AoSoA.hpp"
#include "mapping/SoA.hpp"

#include <type_traits>
Expand Down Expand Up @@ -210,41 +211,43 @@ namespace llama
using ElementSimd = std::decay_t<decltype(dstSimd(rc))>;
using Traits = SimdTraits<ElementSimd>;

auto loadElementWise = [&]
{
auto b = ArrayIndexIterator{srcRef.view.extents(), srcRef.arrayIndex()};
for(std::size_t i = 0; i < Traits::lanes; i++)
reinterpret_cast<FieldType*>(&dstSimd(rc))[i]
= srcRef.view(*b++)(cat(typename T::BoundRecordCoord{}, rc));
};

// TODO(bgruber): can we generalize the logic whether we can load a dstSimd from that mapping?
using Mapping = typename T::View::Mapping;
if constexpr(mapping::isSoA<Mapping>)
{
LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
dstSimd(rc) = Traits::loadUnaligned(&srcRef(rc)); // SIMD load
dstSimd(rc) = Traits::loadUnaligned(&srcRef(rc));
LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
}
// else if constexpr(mapping::isAoSoA<typename T::View::Mapping>)
//{
// // it turns out we do not need the specialization, because clang already fuses the scalar
// loads
// // into a vector load :D
// assert(srcRef.arrayDimsCoord()[0] % SIMD_WIDTH == 0);
// // if(srcRef.arrayDimsCoord()[0] % SIMD_WIDTH != 0)
// // __builtin_unreachable(); // this also helps nothing
// //__builtin_assume(srcRef.arrayDimsCoord()[0] % SIMD_WIDTH == 0); // this also helps nothing
// dstSimd(rc) = Traits::load_from(&srcRef(rc)); // SIMD load
//}
else if constexpr(mapping::isAoSoA<typename T::View::Mapping>)
{
// TODO(bgruber): this check is too strict
if(T::View::Mapping::ArrayExtents::rank == 1 && srcRef.arrayIndex()[0] % Traits::lanes == 0
&& T::View::Mapping::lanes >= Traits::lanes)
{
LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
dstSimd(rc) = Traits::loadUnaligned(&srcRef(rc));
LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
}
else
loadElementWise();
}
else if constexpr(mapping::isAoS<Mapping>)
{
static_assert(mapping::isAoS<Mapping>);
LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
dstSimd(rc) = Traits::gather(&srcRef(rc), aosStridedIndices<Mapping, FieldType, Traits::lanes>);
LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
}
else
{
auto b = ArrayIndexIterator{srcRef.view.extents(), srcRef.arrayIndex()};
ElementSimd elemSimd; // g++-12 really needs the intermediate elemSimd and memcpy
for(auto i = 0; i < Traits::lanes; i++)
reinterpret_cast<FieldType*>(&elemSimd)[i]
= srcRef.view(*b++)(cat(typename T::BoundRecordCoord{}, rc)); // scalar loads
std::memcpy(&dstSimd(rc), &elemSimd, sizeof(elemSimd));
}
loadElementWise();
}

template<typename Simd, typename TFwd, typename RecordCoord>
Expand All @@ -256,30 +259,45 @@ namespace llama
using ElementSimd = std::decay_t<decltype(srcSimd(rc))>;
using Traits = SimdTraits<ElementSimd>;

auto storeElementWise = [&]
{
// TODO(bgruber): how does this generalize conceptually to 2D and higher dimensions? in which
// direction should we collect SIMD values?
auto b = ArrayIndexIterator{dstRef.view.extents(), dstRef.arrayIndex()};
for(std::size_t i = 0; i < Traits::lanes; i++)
dstRef.view (*b++)(cat(typename T::BoundRecordCoord{}, rc))
= reinterpret_cast<const FieldType*>(&srcSimd(rc))[i];
};

// TODO(bgruber): can we generalize the logic whether we can store a srcSimd to that mapping?
using Mapping = typename std::remove_reference_t<T>::View::Mapping;
if constexpr(mapping::isSoA<Mapping>)
{
LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
Traits::storeUnaligned(srcSimd(rc), &dstRef(rc)); // SIMD store
Traits::storeUnaligned(srcSimd(rc), &dstRef(rc));
LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
}
else if constexpr(mapping::isAoSoA<typename T::View::Mapping>)
{
// TODO(bgruber): this check is too strict
if(T::View::Mapping::ArrayExtents::rank == 1 && dstRef.arrayIndex()[0] % Traits::lanes == 0
&& T::View::Mapping::lanes >= Traits::lanes)
{
LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
Traits::storeUnaligned(srcSimd(rc), &dstRef(rc));
LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
}
else
storeElementWise();
}
else if constexpr(mapping::isAoS<Mapping>)
{
LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
Traits::scatter(srcSimd(rc), &dstRef(rc), aosStridedIndices<Mapping, FieldType, Traits::lanes>);
LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
}
else
{
// TODO(bgruber): how does this generalize conceptually to 2D and higher dimensions? in which
// direction should we collect SIMD values?
const ElementSimd elemSimd = srcSimd(rc);
auto b = ArrayIndexIterator{dstRef.view.extents(), dstRef.arrayIndex()};
for(auto i = 0; i < Traits::lanes; i++)
dstRef.view (*b++)(cat(typename T::BoundRecordCoord{}, rc))
= reinterpret_cast<const FieldType*>(&elemSimd)[i]; // scalar store
}
storeElementWise();
}
} // namespace internal

Expand Down
48 changes: 42 additions & 6 deletions tests/simd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,13 @@ TEST_CASE("simd.loadSimd.simd.stdsimd")
CHECK(s[3] == 4.0f);
}

TEMPLATE_TEST_CASE("simd.loadSimd.record.scalar", "", llama::mapping::BindAoS<>, llama::mapping::BindSoA<>)
TEMPLATE_TEST_CASE(
"simd.loadSimd.record.scalar",
"",
llama::mapping::BindAoS<>,
llama::mapping::BindSoA<>,
llama::mapping::BindAoSoA<2>,
llama::mapping::BindAoSoA<32>)
{
using ArrayExtents = llama::ArrayExtentsDynamic<int, 1>;
const auto mapping = typename TestType::template fn<ArrayExtents, ParticleSimd>(ArrayExtents{1});
Expand All @@ -235,7 +241,13 @@ TEMPLATE_TEST_CASE("simd.loadSimd.record.scalar", "", llama::mapping::BindAoS<>,
CHECK(p(tag::Flags{}, llama::RecordCoord<3>{}) == 10);
}

TEMPLATE_TEST_CASE("simd.loadSimd.record.stdsimd", "", llama::mapping::BindAoS<>, llama::mapping::BindSoA<>)
TEMPLATE_TEST_CASE(
"simd.loadSimd.record.stdsimd",
"",
llama::mapping::BindAoS<>,
llama::mapping::BindSoA<>,
llama::mapping::BindAoSoA<2>,
llama::mapping::BindAoSoA<32>)
{
using ArrayExtents = llama::ArrayExtentsDynamic<int, 1>;
const auto mapping = typename TestType::template fn<ArrayExtents, ParticleSimd>(ArrayExtents{16});
Expand Down Expand Up @@ -303,7 +315,13 @@ TEST_CASE("simd.storeSimd.simd.stdsimd")
CHECK(a[3] == 4.0f);
}

TEMPLATE_TEST_CASE("simd.storeSimd.record.scalar", "", llama::mapping::BindAoS<>, llama::mapping::BindSoA<>)
TEMPLATE_TEST_CASE(
"simd.storeSimd.record.scalar",
"",
llama::mapping::BindAoS<>,
llama::mapping::BindSoA<>,
llama::mapping::BindAoSoA<2>,
llama::mapping::BindAoSoA<32>)
{
using ArrayExtents = llama::ArrayExtentsDynamic<int, 1>;
const auto mapping = typename TestType::template fn<ArrayExtents, ParticleSimd>(ArrayExtents{1});
Expand Down Expand Up @@ -336,7 +354,13 @@ TEMPLATE_TEST_CASE("simd.storeSimd.record.scalar", "", llama::mapping::BindAoS<>
CHECK(view(0)(tag::Flags{}, llama::RecordCoord<3>{}) == 10);
}

TEMPLATE_TEST_CASE("simd.storeSimd.record.stdsimd", "", llama::mapping::BindAoS<>, llama::mapping::BindSoA<>)
TEMPLATE_TEST_CASE(
"simd.storeSimd.record.stdsimd",
"",
llama::mapping::BindAoS<>,
llama::mapping::BindSoA<>,
llama::mapping::BindAoSoA<2>,
llama::mapping::BindAoSoA<32>)
{
using ArrayExtents = llama::ArrayExtentsDynamic<int, 1>;
const auto mapping = typename TestType::template fn<ArrayExtents, ParticleSimd>(ArrayExtents{16});
Expand Down Expand Up @@ -371,7 +395,13 @@ TEMPLATE_TEST_CASE("simd.storeSimd.record.stdsimd", "", llama::mapping::BindAoS<
CHECK(view(3)(tag::Mass{}) == 0);
}

TEMPLATE_TEST_CASE("simd.simdForEachN.stdsimd", "", llama::mapping::BindAoS<>, llama::mapping::BindSoA<>)
TEMPLATE_TEST_CASE(
"simd.simdForEachN.stdsimd",
"",
llama::mapping::BindAoS<>,
llama::mapping::BindSoA<>,
llama::mapping::BindAoSoA<2>,
llama::mapping::BindAoSoA<32>)
{
using ArrayExtents = llama::ArrayExtentsDynamic<int, 2>;
for(auto extents : {ArrayExtents{16, 32}, ArrayExtents{11, 7}})
Expand Down Expand Up @@ -401,7 +431,13 @@ TEMPLATE_TEST_CASE("simd.simdForEachN.stdsimd", "", llama::mapping::BindAoS<>, l
}
}

TEMPLATE_TEST_CASE("simd.simdForEach.stdsimd", "", llama::mapping::BindAoS<>, llama::mapping::BindSoA<>)
TEMPLATE_TEST_CASE(
"simd.simdForEach.stdsimd",
"",
llama::mapping::BindAoS<>,
llama::mapping::BindSoA<>,
llama::mapping::BindAoSoA<2>,
llama::mapping::BindAoSoA<32>)
{
using ArrayExtents = llama::ArrayExtentsDynamic<int, 2>;
for(auto extents : {ArrayExtents{16, 32}, ArrayExtents{11, 7}})
Expand Down

0 comments on commit 7c7bd39

Please sign in to comment.