From 1876251145723a7f1f1b4e5d639147a6b1a4e222 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 17 Oct 2024 14:57:03 -0700 Subject: [PATCH 01/32] WIP new properties --- .../sycl/ext/oneapi/properties/properties.hpp | 154 ++++++++++++++++++ .../extensions/properties/new_properties.cpp | 72 ++++++++ 2 files changed, 226 insertions(+) create mode 100644 sycl/test/extensions/properties/new_properties.cpp diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 1c93e00dbe880..8fc220d7403d7 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -8,6 +8,15 @@ #pragma once +#include +#include +#include +#include + +#include + + +// For old properties: #include #include // for IsRuntimePr... #include // for Sorted, Mer... @@ -17,6 +26,151 @@ #include // for enable_if_t #include // for tuple + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { +namespace new_properties { +namespace detail { +template struct properties_type_list; + +#if __has_builtin(__type_pack_element) +template +using nth_type_t = __type_pack_element; +#else +template struct nth_type { + using type = typename nth_type::type; +}; + +template struct nth_type<0, T, Ts...> { + using type = T; +}; + +template +using nth_type_t = typename nth_type::type; +#endif + +template struct properties_sorter; + +// Specialization to avoid zero-size array creation. +template <> struct properties_sorter> { + using type = properties_type_list<>; +}; + +template +struct properties_sorter, + property_tys...> { + static constexpr auto sorted_indices = []() constexpr { + int idx = 0; + int N = sizeof...(property_tys); + // TODO: Use C++20 constexpr std::sort if available. + std::array to_sort{std::pair{property_tys::sort_key, idx++}...}; + auto swap_pair = [](auto &x, auto &y) constexpr { + auto tmp_first = x.first; + auto tmp_second = x.second; + x.first = y.first; + x.second = y.second; + y.first = tmp_first; + y.second = tmp_second; + }; + for (int i = 0; i < N; ++i) + for (int j = i; j < N; ++j) + if (to_sort[j].first < to_sort[i].first) + swap_pair(to_sort[i], to_sort[j]); + + std::array sorted_indices{}; + for (int i = 0; i < N; ++i) + sorted_indices[i] = to_sort[i].second; + + return sorted_indices; + }(); + + using type = properties_type_list< + nth_type_t...>; +}; + +struct property_key_tag_base {}; + +template +struct property_key_tag : property_key_tag_base {}; + +template +struct property_base : property_key_tag { +protected: + using key_t = property_key_t; + constexpr property_t get_property(property_key_tag) const { + return *static_cast(this); + } +}; +} // namespace detail + +using property_sort_key_t = std::string_view; + +template class properties; +template +class __SYCL_EBO properties> + : property_tys... { + static_assert( + []() constexpr { + if constexpr (sizeof...(property_tys) == 0) { + return true; + } else { + const std::array sort_keys = {property_tys::sort_key...}; + // std::is_sorted isn't constexpr until C++20. + if (sort_keys.empty()) + return true; + for (std::size_t idx = 1; idx < sort_keys.size(); ++idx) + if (sort_keys[idx - 1] >= sort_keys[idx]) + return false; + return true; + } + }(), + "Properties must be sorted and non-repeating!"); + static_assert( + (std::is_base_of_v && ...)); + using property_tys::get_property...; + +public: + template + properties(unsorted_property_tys... props) + : unsorted_property_tys(props)... {} + + // TODO: Do we need this? If so, is separate CTAD needed? + // template + // properties(unsorted_property_tys &&...props) + // : unsorted_property_tys(std::forward(props))... + // {} + + template static constexpr bool has_property() { + return std::is_base_of_v, + properties>; + } + + template constexpr auto get_property() { + return get_property(detail::property_key_tag{}); + } +}; + +template +properties(unsorted_property_tys...) + -> properties, + unsorted_property_tys...>::type>; + +using empty_properties_t = decltype(properties{}); + +template struct is_property_list : std::false_type {}; +template +struct is_property_list> : std::true_type {}; +template +inline constexpr bool is_property_list_v = is_property_list::value; + +template struct is_property_key_of : std::false_type {}; +} // namespace new_properties +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl + namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { diff --git a/sycl/test/extensions/properties/new_properties.cpp b/sycl/test/extensions/properties/new_properties.cpp new file mode 100644 index 0000000000000..99d1b335669a1 --- /dev/null +++ b/sycl/test/extensions/properties/new_properties.cpp @@ -0,0 +1,72 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +#include + +using namespace sycl::ext::oneapi::experimental::new_properties; + +using mock_property_sort_key_t = int; + +namespace test_sorting { +template struct Property : detail::property_base> { + static constexpr mock_property_sort_key_t sort_key = N; +}; +static_assert( + std::is_same_v{}, Property<2>{}}), + decltype(properties{Property<2>{}, Property<3>{}})>); +} // namespace test_sorting + +namespace test { +struct property1 : detail::property_base { + static constexpr mock_property_sort_key_t sort_key = 1; +}; + +template +struct property2 : detail::property_base, struct property2_key> { + static constexpr mock_property_sort_key_t sort_key = 2; +}; + +struct property3 : detail::property_base { + static constexpr mock_property_sort_key_t sort_key = 3; + property3(int x) : x(x) {} + int x; +}; + +void test() { + property1 p1; + property2<42> p2; + property3 p3{11}; + + properties pl1{p1, p2, p3}; + properties pl2{p3}; + + static_assert(pl1.has_property()); + static_assert(!pl2.has_property()); + + static_assert(pl1.has_property()); + static_assert(!pl2.has_property()); + + static_assert(pl1.has_property()); + static_assert(pl2.has_property()); +} +} // namespace test + +namespace bench { +template struct property : detail::property_base> { + static constexpr mock_property_sort_key_t sort_key = 1000 + N; + static constexpr int value() { return N; } +}; + +template void test(std::integer_sequence) { + properties pl{property{}...}; + static_assert((pl.template has_property>() && ...)); + static_assert( + ((pl.template get_property>().value() == N) && ...)); +} +} // namespace bench + +int main() { + test::test(); + bench::test(std::make_integer_sequence{}); + + properties empty_props{}; +} From 01c938972c1f2da6a81b8e73a64d1c5c8c4f8f85 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 17 Oct 2024 21:36:27 -0700 Subject: [PATCH 02/32] Fixes for group_load_store: * merge ctor * SFINAE on ctors/CTAD rules * more tests --- .../sycl/ext/oneapi/properties/properties.hpp | 62 +++++++++++++--- .../extensions/properties/new_properties.cpp | 73 +++++++++++++++++++ 2 files changed, 125 insertions(+), 10 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 8fc220d7403d7..0e506e9882294 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -107,9 +107,22 @@ struct property_base : property_key_tag { using property_sort_key_t = std::string_view; template class properties; + +template struct is_property_list : std::false_type {}; +template +struct is_property_list> : std::true_type {}; +template +inline constexpr bool is_property_list_v = is_property_list::value; + +template +inline constexpr bool is_property_v = + std::is_base_of_v && + !is_property_list_v; + template class __SYCL_EBO properties> - : property_tys... { + : public property_tys... { + static_assert((is_property_v && ...)); static_assert( []() constexpr { if constexpr (sizeof...(property_tys) == 0) { @@ -131,10 +144,24 @@ class __SYCL_EBO properties> using property_tys::get_property...; public: - template - properties(unsorted_property_tys... props) + template && ...))>> + constexpr properties(unsorted_property_tys... props) : unsorted_property_tys(props)... {} + // TODO: add a unit-test for this. + template < + typename... other_property_list_tys, typename... other_property_tys, + typename = std::enable_if_t<((is_property_v && ...))>> + constexpr properties( + properties> + other_properties, + other_property_tys... props) + : other_property_list_tys( + static_cast(other_properties))..., + other_property_tys(props)... {} + // TODO: Do we need this? If so, is separate CTAD needed? // template // properties(unsorted_property_tys &&...props) @@ -149,21 +176,36 @@ class __SYCL_EBO properties> template constexpr auto get_property() { return get_property(detail::property_key_tag{}); } + + template + constexpr auto + get_property_or_default_to(default_property_t default_property) { + if constexpr (has_property()) + return get_property(); + else + return default_property; + } }; -template +template && ...))>> properties(unsorted_property_tys...) -> properties, unsorted_property_tys...>::type>; -using empty_properties_t = decltype(properties{}); +template < + typename... other_property_list_tys, typename... other_property_tys, + typename = std::enable_if_t<((is_property_v && ...))>> +properties(properties>, + other_property_tys...) + -> properties, + other_property_list_tys..., other_property_tys...>::type>; -template struct is_property_list : std::false_type {}; -template -struct is_property_list> : std::true_type {}; -template -inline constexpr bool is_property_list_v = is_property_list::value; +using empty_properties_t = decltype(properties{}); template struct is_property_key_of : std::false_type {}; } // namespace new_properties diff --git a/sycl/test/extensions/properties/new_properties.cpp b/sycl/test/extensions/properties/new_properties.cpp index 99d1b335669a1..f69ebe001cf8f 100644 --- a/sycl/test/extensions/properties/new_properties.cpp +++ b/sycl/test/extensions/properties/new_properties.cpp @@ -64,6 +64,79 @@ template void test(std::integer_sequence) { } } // namespace bench +namespace test_group_load_store { +struct naive : detail::property_base { + static constexpr mock_property_sort_key_t sort_key = 1; +}; +struct full_group : detail::property_base { + static constexpr mock_property_sort_key_t sort_key = 2; +}; +constexpr properties pl1{full_group{}}; +constexpr properties pl2{pl1, naive{}}; +static_assert(pl1.template has_property()); +static_assert(!pl1.template has_property()); +static_assert(pl2.template has_property()); +static_assert(pl2.template has_property()); + +enum class data_placement { blocked, striped }; +template +struct data_placement_property + : detail::property_base, + struct data_placement_property_key> { + static constexpr mock_property_sort_key_t sort_key = 3; + static constexpr bool is_blocked() { + return placement == data_placement::blocked; + } +}; +inline constexpr data_placement_property blocked; +inline constexpr data_placement_property striped; + +static_assert(properties{naive{}, blocked} + .get_property() + .is_blocked()); +static_assert(!properties{naive{}, striped} + .get_property() + .is_blocked()); +static_assert( + properties{naive{}, blocked} + .get_property_or_default_to(blocked) + .is_blocked()); +static_assert( + !properties{naive{}, data_placement_property{}} + .get_property_or_default_to( + blocked) + .is_blocked()); +static_assert( + properties{naive{}} + .get_property_or_default_to(blocked) + .is_blocked()); +static_assert( + !properties{naive{}} + .get_property_or_default_to( + striped) + .is_blocked()); + +constexpr properties pl3{full_group{}, blocked}; +// constexpr properties pl4{pl3, naive{}}; +template +constexpr auto merge_properties( + properties>, + other_property_tys...) { + return 42; +} +static_assert(merge_properties(pl3, naive{}) == 42); +} + +namespace test_merge_ctor { + template + struct property : detail::property_base> { + static constexpr int sort_key = N; + }; + + constexpr properties pl1{property<1>{}, property<2>{}, property<3>{}}; + constexpr properties pl2{pl1, property<4>{}}; +} + int main() { test::test(); bench::test(std::make_integer_sequence{}); From c357d6f71fdc45245fb6b6f18fed690d04322d18 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 17 Oct 2024 21:44:20 -0700 Subject: [PATCH 03/32] WIP Port group_load_store to new properties --- .../oneapi/experimental/group_load_store.hpp | 111 +++-- .../GroupAlgorithm/load_store/basic.cpp | 6 +- .../load_store/odd_sized_type.cpp | 6 +- .../GroupAlgorithm/load_store/odd_wg_size.cpp | 6 +- .../GroupAlgorithm/load_store/partial_sg.cpp | 6 +- sycl/test/check_device_code/group_load.cpp | 382 +++++++-------- sycl/test/check_device_code/group_store.cpp | 454 +++++++++--------- 7 files changed, 494 insertions(+), 477 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp b/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp index a4af373753c10..8fcccfed0820c 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp @@ -19,45 +19,56 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { - +// Data placement property enum class data_placement_enum { blocked, striped }; -struct data_placement_key - : detail::compile_time_property_key { - template - using value_t = - property_value(Placement)>>; +template +struct data_placement_property : new_properties::detail::property_base< + data_placement_property, + struct data_placement_property_key> { + static constexpr new_properties::property_sort_key_t sort_key{ + "sycl::ext::oneapi::experimental::data_placement_property_key"}; + + static constexpr bool is_blocked() { + return Placement == data_placement_enum::blocked; + } }; template -inline constexpr data_placement_key::value_t data_placement; - -inline constexpr data_placement_key::value_t - data_placement_blocked; -inline constexpr data_placement_key::value_t - data_placement_striped; - -struct contiguous_memory_key - : detail::compile_time_property_key { - using value_t = property_value; +inline constexpr data_placement_property data_placement; + +inline constexpr auto data_placement_blocked = + data_placement; +inline constexpr auto data_placement_striped = + data_placement; + +// Contiguous memory property +struct contiguous_memory_property + : new_properties::detail::property_base { + static constexpr new_properties::property_sort_key_t sort_key{ + "sycl::ext::oneapi::experimental::contiguous_memory_property"}; }; -inline constexpr contiguous_memory_key::value_t contiguous_memory; +inline constexpr contiguous_memory_property contiguous_memory; -struct full_group_key - : detail::compile_time_property_key { - using value_t = property_value; +// Full group property +struct full_group_property + : new_properties::detail::property_base { + static constexpr new_properties::property_sort_key_t sort_key{ + "sycl::ext::oneapi::experimental::full_group_property"}; }; -inline constexpr full_group_key::value_t full_group; +inline constexpr full_group_property full_group; namespace detail { -struct naive_key : detail::compile_time_property_key { - using value_t = property_value; +// Naive implementation property +struct naive_property + : new_properties::detail::property_base { + static constexpr new_properties::property_sort_key_t sort_key{ + "sycl::ext::oneapi::experimental::naive_property"}; }; -inline constexpr naive_key::value_t naive; + +inline constexpr naive_property naive; using namespace sycl::detail; } // namespace detail @@ -94,11 +105,10 @@ inline constexpr bool verify_store_types = std::is_default_constructible_v; template constexpr bool isBlocked(Properties properties) { - if constexpr (properties.template has_property()) - return properties.template get_property() == - data_placement_blocked; - else - return true; + return properties + .template get_property_or_default_to( + data_placement_blocked) + .is_blocked(); } template @@ -191,13 +201,13 @@ auto get_block_op_ptr(IteratorT iter, [[maybe_unused]] Properties props) { if constexpr (!is_spir || !BlkInfo::has_builtin) { return nullptr; - } else if constexpr (!props.template has_property()) { + } else if constexpr (!props.template has_property()) { return nullptr; } else if constexpr (detail::is_multi_ptr_v) { return get_block_op_ptr( iter.get_decorated(), props); } else if constexpr (!std::is_pointer_v) { - if constexpr (props.template has_property()) + if constexpr (props.template has_property()) return get_block_op_ptr(&*iter, props); else @@ -231,30 +241,29 @@ auto get_block_op_ptr(IteratorT iter, [[maybe_unused]] Properties props) { // Load API span overload. template + typename Properties = decltype(new_properties::properties())> std::enable_if_t && detail::is_generic_group_v> group_load(Group g, InputIteratorT in_ptr, span out, Properties props = {}) { constexpr bool blocked = detail::isBlocked(props); - using use_naive = - detail::merged_properties_t; - if constexpr (props.template has_property()) { + if constexpr (props.template has_property()) { group_barrier(g); for (int i = 0; i < out.size(); ++i) out[i] = in_ptr[detail::get_mem_idx(g, i)]; group_barrier(g); return; } else if constexpr (!std::is_same_v) { - return group_load(g, in_ptr, out, use_naive{}); + return group_load(g, in_ptr, out, + new_properties::properties{props, detail::naive}); } else { auto ptr = detail::get_block_op_ptr<4 /* load align */, ElementsPerWorkItem>( in_ptr, props); if (!ptr) - return group_load(g, in_ptr, out, use_naive{}); + return group_load(g, in_ptr, out, + new_properties::properties{props, detail::naive}); if constexpr (!std::is_same_v) { // Do optimized load. @@ -303,30 +312,30 @@ group_load(Group g, InputIteratorT in_ptr, // Store API span overload. template + typename Properties = decltype(new_properties::properties())> std::enable_if_t && detail::is_generic_group_v> group_store(Group g, const span in, OutputIteratorT out_ptr, Properties props = {}) { constexpr bool blocked = detail::isBlocked(props); - using use_naive = - detail::merged_properties_t; - if constexpr (props.template has_property()) { + + if constexpr (props.template has_property()) { group_barrier(g); for (int i = 0; i < in.size(); ++i) out_ptr[detail::get_mem_idx(g, i)] = in[i]; group_barrier(g); return; } else if constexpr (!std::is_same_v) { - return group_store(g, in, out_ptr, use_naive{}); + return group_store(g, in, out_ptr, + new_properties::properties{props, detail::naive}); } else { auto ptr = detail::get_block_op_ptr<16 /* store align */, ElementsPerWorkItem>( out_ptr, props); if (!ptr) - return group_store(g, in, out_ptr, use_naive{}); + return group_store(g, in, out_ptr, + new_properties::properties{props, detail::naive}); if constexpr (!std::is_same_v) { // Do optimized store. @@ -350,7 +359,7 @@ group_store(Group g, const span in, // Load API scalar. template + typename Properties = decltype(new_properties::properties())> std::enable_if_t && detail::is_generic_group_v> group_load(Group g, InputIteratorT in_ptr, OutputT &out, @@ -360,7 +369,7 @@ group_load(Group g, InputIteratorT in_ptr, OutputT &out, // Store API scalar. template + typename Properties = decltype(new_properties::properties())> std::enable_if_t && detail::is_generic_group_v> group_store(Group g, const InputT &in, OutputIteratorT out_ptr, @@ -370,7 +379,7 @@ group_store(Group g, const InputT &in, OutputIteratorT out_ptr, // Load API sycl::vec overload. template + typename Properties = decltype(new_properties::properties())> std::enable_if_t && detail::is_generic_group_v> group_load(Group g, InputIteratorT in_ptr, sycl::vec &out, @@ -380,7 +389,7 @@ group_load(Group g, InputIteratorT in_ptr, sycl::vec &out, // Store API sycl::vec overload. template + typename Properties = decltype(new_properties::properties())> std::enable_if_t && detail::is_generic_group_v> group_store(Group g, const sycl::vec &in, OutputIteratorT out_ptr, diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/basic.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/basic.cpp index 1028da48f6051..e79b1c58e3a8d 100644 --- a/sycl/test-e2e/GroupAlgorithm/load_store/basic.cpp +++ b/sycl/test-e2e/GroupAlgorithm/load_store/basic.cpp @@ -49,8 +49,10 @@ int main() { int data[elems_per_wi]; - auto blocked = sycl_exp::properties{sycl_exp::data_placement_blocked}; - auto striped = sycl_exp::properties{sycl_exp::data_placement_striped}; + auto blocked = sycl_exp::new_properties::properties{ + sycl_exp::data_placement_blocked}; + auto striped = sycl_exp::new_properties::properties{ + sycl_exp::data_placement_striped}; // default sycl_exp::group_load(g, input.begin() + offset, span{data}); diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/odd_sized_type.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/odd_sized_type.cpp index 98cdc1eb9f47a..7f97942b36f44 100644 --- a/sycl/test-e2e/GroupAlgorithm/load_store/odd_sized_type.cpp +++ b/sycl/test-e2e/GroupAlgorithm/load_store/odd_sized_type.cpp @@ -64,8 +64,10 @@ int main() { S data[elems_per_wi]; - auto blocked = sycl_exp::properties{sycl_exp::data_placement_blocked}; - auto striped = sycl_exp::properties{sycl_exp::data_placement_striped}; + auto blocked = sycl_exp::new_properties::properties{ + sycl_exp::data_placement_blocked}; + auto striped = sycl_exp::new_properties::properties{ + sycl_exp::data_placement_striped}; // blocked sycl_exp::group_load(g, input.begin(), span{data}, blocked); diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp index adce5e9f588bc..7aaa21ecec960 100644 --- a/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp +++ b/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp @@ -48,8 +48,10 @@ template void test(queue &q) { int data[elems_per_wi]; - auto blocked = sycl_exp::properties{sycl_exp::data_placement_blocked}; - auto striped = sycl_exp::properties{sycl_exp::data_placement_striped}; + auto blocked = sycl_exp::new_properties::properties{ + sycl_exp::data_placement_blocked}; + auto striped = sycl_exp::new_properties::properties{ + sycl_exp::data_placement_striped}; // blocked sycl_exp::group_load(g, input.begin() + offset, span{data}, blocked); diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp index 72b87364f0ee9..2012db6183500 100644 --- a/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp +++ b/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp @@ -45,8 +45,10 @@ template void test(queue &q) { int data[elems_per_wi]; - auto blocked = sycl_exp::properties{sycl_exp::data_placement_blocked}; - auto striped = sycl_exp::properties{sycl_exp::data_placement_striped}; + auto blocked = sycl_exp::new_properties::properties{ + sycl_exp::data_placement_blocked}; + auto striped = sycl_exp::new_properties::properties{ + sycl_exp::data_placement_striped}; // blocked sycl_exp::group_load(sg, input.begin() + offset, span{data}, blocked); diff --git a/sycl/test/check_device_code/group_load.cpp b/sycl/test/check_device_code/group_load.cpp index f1f9650b69393..18ffb6d84eed8 100644 --- a/sycl/test/check_device_code/group_load.cpp +++ b/sycl/test/check_device_code/group_load.cpp @@ -16,22 +16,22 @@ namespace oneapi_exp = sycl::ext::oneapi::experimental; using namespace sycl::ext::oneapi::experimental; using full_group_blocked = - decltype(properties(full_group, data_placement_blocked)); + decltype(new_properties::properties(full_group, data_placement_blocked)); -using naive_blocked = - decltype(properties(oneapi_exp::detail::naive, data_placement_blocked)); +using naive_blocked = decltype(new_properties::properties( + oneapi_exp::detail::naive, data_placement_blocked)); -using opt_blocked = - decltype(properties(full_group, contiguous_memory, data_placement_blocked)); +using opt_blocked = decltype(new_properties::properties( + full_group, contiguous_memory, data_placement_blocked)); using full_group_striped = - decltype(properties(full_group, data_placement_striped)); + decltype(new_properties::properties(full_group, data_placement_striped)); -using naive_striped = - decltype(properties(oneapi_exp::detail::naive, data_placement_striped)); +using naive_striped = decltype(new_properties::properties( + oneapi_exp::detail::naive, data_placement_striped)); -using opt_striped = - decltype(properties(full_group, contiguous_memory, data_placement_striped)); +using opt_striped = decltype(new_properties::properties( + full_group, contiguous_memory, data_placement_striped)); template using plain_global_ptr = typename sycl::detail::DecoratedType< @@ -41,15 +41,15 @@ using plain_global_ptr = typename sycl::detail::DecoratedType< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, naive_blocked>( sycl::sub_group, plain_global_ptr, int &, naive_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_SL_RSM_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_RSL_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4:[0-9]+]] // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7:![0-9]+]] -// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -57,26 +57,26 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, opt_blocked>( sycl::sub_group, plain_global_ptr, int &, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_RSN_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_RSL_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) -// CHECK-NEXT: [[CALL4_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: store i32 [[CALL4_I]], ptr addrspace(4) [[OUT]], align 4 +// CHECK-NEXT: [[CALL6_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: store i32 [[CALL6_I]], ptr addrspace(4) [[OUT]], align 4 // CHECK-NEXT: ret void // Check that contiguous_memory can be auto-detected. template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, full_group_blocked>( sycl::sub_group, plain_global_ptr, int &, full_group_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_RSL_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESL_SJ_RSK_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.8") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) -// CHECK-NEXT: [[CALL4_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: store i32 [[CALL4_I]], ptr addrspace(4) [[OUT]], align 4 +// CHECK-NEXT: [[CALL6_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: store i32 [[CALL6_I]], ptr addrspace(4) [[OUT]], align 4 // CHECK-NEXT: ret void // SYCL 2020's accessor can't be statically known to be contiguous. @@ -87,19 +87,19 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load( sycl::sub_group, accessor_iter_t, int &, full_group_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_RSN_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiNS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_SL_RSM_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.8") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA12:![0-9]+]] // CHECK-NEXT: [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13:![0-9]+]] +// CHECK-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14:![0-9]+]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[CONV3_I_I:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP1_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP1_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[ADD_PTR_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP1]], i64 [[CONV3_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -108,12 +108,12 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, accessor_iter_t, int, opt_blocked>(sycl::sub_group, accessor_iter_t, int &, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_SO_RSP_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiNS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_RSN_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] +// CHECK-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[AGG_TMP1_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP1_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(4) [[ADD_PTR_I_I_I]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) @@ -125,15 +125,15 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[CONV3_I_I:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], i64 [[CONV3_I_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM1ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_SO_NS0_4SPANISP_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM1ENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] // CHECK: if.end.i: -// CHECK-NEXT: [[CALL6_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[CALL_I_I_I_I]]) #[[ATTR4]] -// CHECK-NEXT: store i32 [[CALL6_I]], ptr addrspace(4) [[OUT]], align 4 -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM1ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_SO_NS0_4SPANISP_XT2_EEET3__EXIT]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm1ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEET3_.exit: +// CHECK-NEXT: [[CALL8_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[CALL_I_I_I_I]]) #[[ATTR4]] +// CHECK-NEXT: store i32 [[CALL8_I]], ptr addrspace(4) [[OUT]], align 4 +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM1ENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm1ENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: // CHECK-NEXT: ret void // Run-time alignment check is needed if type's alignment is less than BlockRead @@ -141,8 +141,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, char, opt_blocked>( sycl::sub_group, plain_global_ptr, char &, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_RSN_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 1 dereferenceable(1) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_RSL_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 1 dereferenceable(1) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) @@ -155,15 +155,15 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA15:![0-9]+]] -// CHECK-NEXT: store i8 [[TMP2]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA15]] +// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA16:![0-9]+]] +// CHECK-NEXT: store i8 [[TMP2]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA16]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM1ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM1ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESM_SK_NS0_4SPANISL_XT2_EEET3__EXIT:%.*]] // CHECK: if.end.i: -// CHECK-NEXT: [[CALL4_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z30__spirv_SubgroupBlockReadINTELIhET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: store i8 [[CALL4_I]], ptr addrspace(4) [[OUT]], align 1 -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM1ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm1ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: +// CHECK-NEXT: [[CALL6_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z30__spirv_SubgroupBlockReadINTELIhET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: store i8 [[CALL6_I]], ptr addrspace(4) [[OUT]], align 1 +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM1ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESM_SK_NS0_4SPANISL_XT2_EEET3__EXIT]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm1ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_.exit: // CHECK-NEXT: ret void // Four shorts in blocked data layout could be loaded as a single 64-bit @@ -171,8 +171,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, short, 4, opt_blocked>( sycl::sub_group, plain_global_ptr, span, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.5") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.11") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) @@ -181,33 +181,33 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META17:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META18:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP3]], 2 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I14:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I14]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I18:%.*]] = icmp ult i32 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I18]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ADD_I_I:%.*]] = or disjoint i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA20:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA21:![0-9]+]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA21]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP22:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: -// CHECK-NEXT: [[CALL4:%.*]] = tail call spir_func noundef i64 @_Z30__spirv_SubgroupBlockReadINTELImET_PU3AS1Km(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA24:![0-9]+]] -// CHECK-NEXT: store i64 [[CALL4]], ptr addrspace(4) [[TMP5]], align 2 +// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef i64 @_Z30__spirv_SubgroupBlockReadINTELImET_PU3AS1Km(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA25:![0-9]+]] +// CHECK-NEXT: store i64 [[CALL6]], ptr addrspace(4) [[TMP5]], align 2 // CHECK-NEXT: br label [[CLEANUP]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -216,30 +216,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 3, opt_blocked>( sycl::sub_group, plain_global_ptr, span, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.6") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.12") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META26:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META27:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP2]], 3 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP29:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -248,30 +248,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 4, opt_blocked>( sycl::sub_group, plain_global_ptr, span, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.7") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.13") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META30:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META31:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP2]], 2 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM4ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM4ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ADD_I_I:%.*]] = or disjoint i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP33:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -279,30 +279,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 7, opt_blocked>( sycl::sub_group, plain_global_ptr, span, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm7ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.8") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm7ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.14") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META34:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META35:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP2]], 7 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 7 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM7ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM7ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP37:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm7ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm7ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -313,13 +313,13 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 2, naive_striped>( sycl::sub_group, plain_global_ptr, span, naive_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_SL_NS0_4spanISM_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.10") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.16") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META38:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META41:![0-9]+]] -// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA44:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META39:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META42:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA45:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -334,38 +334,38 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP0]], [[MUL_I]] // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[CONV]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // Check that optimized implementation is selected. template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 2, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: cleanup: // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) -// CHECK-NEXT: [[CALL4:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA44]] -// CHECK-NEXT: store <2 x i32> [[CALL4]], ptr addrspace(4) [[TMP0]], align 4 +// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA45]] +// CHECK-NEXT: store <2 x i32> [[CALL6]], ptr addrspace(4) [[TMP0]], align 4 // CHECK-NEXT: ret void // Check that contiguous_memory can be auto-detected. template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 2, full_group_striped>( sycl::sub_group, plain_global_ptr, span, full_group_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.13") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESL_SJ_NS0_4spanISK_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.21") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: cleanup: // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) -// CHECK-NEXT: [[CALL4:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA44]] -// CHECK-NEXT: store <2 x i32> [[CALL4]], ptr addrspace(4) [[TMP0]], align 4 +// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA45]] +// CHECK-NEXT: store <2 x i32> [[CALL6]], ptr addrspace(4) [[TMP0]], align 4 // CHECK-NEXT: ret void // SYCL 2020's accessor can't be statically known to be contiguous. @@ -375,35 +375,35 @@ using accessor_iter_t = accessor( sycl::sub_group, accessor_iter_t, span, full_group_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.13") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_SL_NS0_4spanISM_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.21") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP3_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[AGG_TMP3_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[AGG_TMP3_SROA_2_0_IN_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP3_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP3_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[AGG_TMP3_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP3_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META47:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META50:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META48:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META51:![0-9]+]] // CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP3_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP3_SROA_2_0_COPYLOAD]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM2ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM2ENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_SN_NS0_4SPANISO_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[CONV3_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP4]], i64 [[CONV3_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP53:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_14full_group_keyEJEEENSC_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP54:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_SN_NS0_4spanISO_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -413,12 +413,12 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< accessor_iter_t, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[AGG_TMP_SROA_2_0_IN_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] +// CHECK-NEXT: [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(4) [[ADD_PTR_I_I]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) @@ -426,34 +426,34 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq ptr addrspace(1) [[CALL_I_I_I]], null // CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META54:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META57:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META55:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META58:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM2ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM2ENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_SO_NS0_4SPANISP_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[CONV3_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[ADD_PTR_I_I]], i64 [[CONV3_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP60:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP61:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: -// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[CALL_I_I_I]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA44]] -// CHECK-NEXT: store <2 x i32> [[CALL6]], ptr addrspace(4) [[TMP5]], align 4 +// CHECK-NEXT: [[CALL8:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[CALL_I_I_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA45]] +// CHECK-NEXT: store <2 x i32> [[CALL8]], ptr addrspace(4) [[TMP5]], align 4 // CHECK-NEXT: br label [[CLEANUP]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -463,8 +463,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, char, 2, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm2ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.23") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) @@ -473,34 +473,34 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META61:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META64:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META62:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META65:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I14:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I14]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM2ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I18:%.*]] = icmp ult i32 [[I_0_I]], 2 +// CHECK-NEXT: br i1 [[CMP_I18]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM2ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I]], align 1, !tbaa [[TBAA15]] +// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I]], align 1, !tbaa [[TBAA16]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: store i8 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA15]] +// CHECK-NEXT: store i8 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA16]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP67:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP68:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm2ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: -// CHECK-NEXT: [[CALL4:%.*]] = tail call spir_func noundef <2 x i8> @_Z30__spirv_SubgroupBlockReadINTELIDv2_hET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA68:![0-9]+]] -// CHECK-NEXT: store <2 x i8> [[CALL4]], ptr addrspace(4) [[TMP6]], align 1 +// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef <2 x i8> @_Z30__spirv_SubgroupBlockReadINTELIDv2_hET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA69:![0-9]+]] +// CHECK-NEXT: store <2 x i8> [[CALL6]], ptr addrspace(4) [[TMP6]], align 1 // CHECK-NEXT: br label [[CLEANUP]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -510,8 +510,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, short, 4, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.5") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.11") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) @@ -520,34 +520,34 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META70:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META73:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META71:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META74:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I14:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I14]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I18:%.*]] = icmp ult i32 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I18]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA21]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA21]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP76:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP77:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: -// CHECK-NEXT: [[CALL4:%.*]] = tail call spir_func noundef <4 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv4_tET_PU3AS1Kt(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA24]] -// CHECK-NEXT: store <4 x i16> [[CALL4]], ptr addrspace(4) [[TMP6]], align 2 +// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef <4 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv4_tET_PU3AS1Kt(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA25]] +// CHECK-NEXT: store <4 x i16> [[CALL6]], ptr addrspace(4) [[TMP6]], align 2 // CHECK-NEXT: br label [[CLEANUP]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -556,31 +556,31 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 3, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.6") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.12") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META77:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META80:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META78:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META81:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP83:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP84:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -588,31 +588,31 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 16, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm16ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.16") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm16ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.24") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META84:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META87:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META85:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META88:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 16 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM16ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM16ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP90:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm16ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP91:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm16ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -620,30 +620,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 11, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm11ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.17") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm11ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.25") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META91:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META94:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META92:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META95:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 11 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM11ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM11ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP97:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm11ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP98:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm11ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void diff --git a/sycl/test/check_device_code/group_store.cpp b/sycl/test/check_device_code/group_store.cpp index 6533a5d40f9f3..7647530f6446a 100644 --- a/sycl/test/check_device_code/group_store.cpp +++ b/sycl/test/check_device_code/group_store.cpp @@ -16,22 +16,22 @@ namespace oneapi_exp = sycl::ext::oneapi::experimental; using namespace sycl::ext::oneapi::experimental; using full_group_blocked = - decltype(properties(full_group, data_placement_blocked)); + decltype(new_properties::properties(full_group, data_placement_blocked)); -using naive_blocked = - decltype(properties(oneapi_exp::detail::naive, data_placement_blocked)); +using naive_blocked = decltype(new_properties::properties( + oneapi_exp::detail::naive, data_placement_blocked)); -using opt_blocked = - decltype(properties(full_group, contiguous_memory, data_placement_blocked)); +using opt_blocked = decltype(new_properties::properties( + full_group, contiguous_memory, data_placement_blocked)); using full_group_striped = - decltype(properties(full_group, data_placement_striped)); + decltype(new_properties::properties(full_group, data_placement_striped)); -using naive_striped = - decltype(properties(oneapi_exp::detail::naive, data_placement_striped)); +using naive_striped = decltype(new_properties::properties( + oneapi_exp::detail::naive, data_placement_striped)); -using opt_striped = - decltype(properties(full_group, contiguous_memory, data_placement_striped)); +using opt_striped = decltype(new_properties::properties( + full_group, contiguous_memory, data_placement_striped)); template using plain_global_ptr = typename sycl::detail::DecoratedType< @@ -41,15 +41,15 @@ using plain_global_ptr = typename sycl::detail::DecoratedType< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, plain_global_ptr, naive_blocked>( sycl::sub_group, const int &, plain_global_ptr, naive_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_RKSL_SM_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_RKSK_SL_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5:[0-9]+]] // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7:![0-9]+]] -// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -57,8 +57,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, plain_global_ptr, opt_blocked>( sycl::sub_group, const int &, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_RKSM_SN_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_RKSK_SL_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) @@ -71,23 +71,23 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_T3__EXIT:%.*]] +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESN_NS0_4SPANISL_XT1_EEESM_T3__EXIT:%.*]] // CHECK: if.end.i: -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], i32 noundef [[TMP3]]) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_T3__EXIT]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_.exit: +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESN_NS0_4SPANISL_XT1_EEESM_T3__EXIT]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1EPU3AS1iNS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_.exit: // CHECK-NEXT: ret void // Check that contiguous_memory can be auto-detected. template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, plain_global_ptr, full_group_blocked>( sycl::sub_group, const int &, plain_global_ptr, full_group_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_RKSK_SL_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESL_RKSJ_SK_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.8") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) @@ -100,15 +100,15 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESN_NS0_4SPANISL_XT1_EEESM_T3__EXIT:%.*]] +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESM_NS0_4SPANISK_XT1_EEESL_T3__EXIT:%.*]] // CHECK: if.end.i: -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], i32 noundef [[TMP3]]) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESN_NS0_4SPANISL_XT1_EEESM_T3__EXIT]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_.exit: +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESM_NS0_4SPANISK_XT1_EEESL_T3__EXIT]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1EPU3AS1iNS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_.exit: // CHECK-NEXT: ret void // SYCL 2020's accessor can't be statically known to be contiguous. @@ -118,19 +118,19 @@ using accessor_iter_t = accessor( sycl::sub_group, const int &, accessor_iter_t, full_group_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiNS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_RKSL_SM_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiNS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_RKSK_SL_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.8") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP2_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11:![0-9]+]] +// CHECK-NEXT: [[AGG_TMP2_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA12:![0-9]+]] // CHECK-NEXT: [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13:![0-9]+]] +// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14:![0-9]+]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[CONV5_I_I:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP2_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP2_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[ADD_PTR_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP1]], i64 [[CONV5_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -140,20 +140,20 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< const int &, accessor_iter_t, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiNS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_RKSN_SO_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiNS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_RKSL_SM_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP2_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[AGG_TMP2_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] +// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[AGG_TMP2_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP2_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(4) [[ADD_PTR_I_I_I]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(4) [[ADD_PTR_I_I_I]] to i64 // CHECK-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 // CHECK-NEXT: [[CMP1_I_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-NEXT: br i1 [[CMP1_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GET_BLOCK_OP_PTRILI16ELM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEDAT1_T2__EXIT_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental6detail16get_block_op_ptrILi16ELm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEEDaT1_T2_.exit.i: +// CHECK-NEXT: br i1 [[CMP1_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GET_BLOCK_OP_PTRILI16ELM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEEDAT1_T2__EXIT_I:%.*]], label [[IF_THEN_I:%.*]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental6detail16get_block_op_ptrILi16ELm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEEDaT1_T2_.exit.i: // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef nonnull [[ADD_PTR_I_I_I]], i32 noundef 5) #[[ATTR6:[0-9]+]] // CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(1) [[CALL_I_I_I_I]], null // CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label [[IF_THEN_I]], label [[IF_END_I:%.*]] @@ -162,15 +162,15 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[CONV5_I_I:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], i64 [[CONV5_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] -// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_NS0_4SPANISO_XT1_EEESP_T3__EXIT:%.*]] +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] // CHECK: if.end.i: -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[CALL_I_I_I_I]], i32 noundef [[TMP3]]) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_NS0_4SPANISO_XT1_EEESP_T3__EXIT]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_NS0_4spanISO_XT1_EEESP_T3_.exit: +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: // CHECK-NEXT: ret void // Four shorts in blocked data layout could be stored as a single 64-bit @@ -178,8 +178,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, short, 4, plain_global_ptr, opt_blocked>( sycl::sub_group, span, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.4") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.10") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [4 x i16], align 2 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null @@ -189,50 +189,50 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META16:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META17:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP3]], 2 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I19:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I19]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I23:%.*]] = icmp ult i32 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I23]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA19:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA20:![0-9]+]] // CHECK-NEXT: [[ADD_I_I:%.*]] = or disjoint i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA19]] +// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP21:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7:[0-9]+]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA23:![0-9]+]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 4 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VALUES]], align 2, !tbaa [[TBAA25:![0-9]+]] +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VALUES]], align 2, !tbaa [[TBAA26:![0-9]+]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS1mT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], i64 noundef [[TMP6]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 -// CHECK-NEXT: [[ARRAYIDX_I20:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP5]], i64 [[CONV]] -// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20]], align 2, !tbaa [[TBAA19]] +// CHECK-NEXT: [[ARRAYIDX_I24:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP5]], i64 [[CONV]] +// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I24]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA19]] +// CHECK-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -241,8 +241,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, const short, 4, plain_global_ptr, opt_blocked>( sycl::sub_group, span, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.5") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.11") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [4 x i16], align 2 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null @@ -252,50 +252,50 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META27:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META28:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP3]], 2 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I19:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I19]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_T3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I23:%.*]] = icmp ult i32 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I23]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA19]] +// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[ADD_I_I:%.*]] = or disjoint i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA19]] +// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP30:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA31:![0-9]+]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA32:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 4 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VALUES]], align 2, !tbaa [[TBAA25]] +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VALUES]], align 2, !tbaa [[TBAA26]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS1mT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], i64 noundef [[TMP6]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 -// CHECK-NEXT: [[ARRAYIDX_I20:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP5]], i64 [[CONV]] -// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20]], align 2, !tbaa [[TBAA19]] +// CHECK-NEXT: [[ARRAYIDX_I24:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP5]], i64 [[CONV]] +// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I24]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA19]] +// CHECK-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -303,30 +303,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 3, plain_global_ptr, opt_blocked>( sycl::sub_group, span, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.6") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.12") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META34:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META35:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP2]], 3 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP37:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -335,30 +335,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 4, plain_global_ptr, opt_blocked>( sycl::sub_group, span, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.7") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.13") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META38:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META39:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP2]], 2 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM4EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM4EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ADD_I_I:%.*]] = or disjoint i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP41:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -366,30 +366,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 7, plain_global_ptr, opt_blocked>( sycl::sub_group, span, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.8") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.14") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META42:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META43:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP2]], 7 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 7 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM7EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM7EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP45:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -400,13 +400,13 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 2, plain_global_ptr, naive_striped>( sycl::sub_group, span, plain_global_ptr, naive_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.10") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.16") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA46:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META48:![0-9]+]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META51:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA47:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META49:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META52:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -418,21 +418,21 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw nsw i32 [[TMP2]], [[I_0]] // CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // Check that optimized implementation is selected. template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 2, plain_global_ptr, opt_striped>( sycl::sub_group, span, plain_global_ptr, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [2 x i32], align 4 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null @@ -442,51 +442,51 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META55:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META58:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META56:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META59:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I19:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I19]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I23:%.*]] = icmp ult i32 [[I_0_I]], 2 +// CHECK-NEXT: br i1 [[CMP_I23]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP61:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP62:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA46]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA47]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA25]] +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA26]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], <2 x i32> noundef [[TMP7]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 -// CHECK-NEXT: [[ARRAYIDX_I20:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP6]], i64 [[CONV]] -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I20]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[ARRAYIDX_I24:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP6]], i64 [[CONV]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I24]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -494,8 +494,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 2, plain_global_ptr, full_group_striped>( sycl::sub_group, span, plain_global_ptr, full_group_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.13") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESL_NS0_4spanISJ_XT1_EEESK_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.21") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [2 x i32], align 4 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null @@ -505,51 +505,51 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META63:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META66:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META64:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META67:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I19:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I19]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_T3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I23:%.*]] = icmp ult i32 [[I_0_I]], 2 +// CHECK-NEXT: br i1 [[CMP_I23]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESN_NS0_4SPANISL_XT1_EEESM_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP69:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP70:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA46]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA47]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA25]] +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA26]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], <2 x i32> noundef [[TMP7]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 -// CHECK-NEXT: [[ARRAYIDX_I20:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP6]], i64 [[CONV]] -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I20]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[ARRAYIDX_I24:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP6]], i64 [[CONV]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I24]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP70:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -560,35 +560,35 @@ using accessor_iter_t = accessor( sycl::sub_group, span, accessor_iter_t, full_group_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.13") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.21") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] -// CHECK-NEXT: [[AGG_TMP4_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[AGG_TMP4_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[AGG_TMP4_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP4_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP4_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] +// CHECK-NEXT: [[AGG_TMP4_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP4_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META71:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META74:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META72:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META75:![0-9]+]] // CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP4_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP4_SROA_2_0_COPYLOAD]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_NS0_4SPANISO_XT1_EEESP_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[CONV5_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP4]], i64 [[CONV5_I]] -// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP77:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_NS0_4spanISO_XT1_EEESP_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP78:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -598,70 +598,70 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< span, accessor_iter_t, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [2 x i32], align 4 -// CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[AGG_TMP_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] +// CHECK-NEXT: [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(4) [[ADD_PTR_I_I]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) // CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(4) [[ADD_PTR_I_I]] to i64 // CHECK-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP0]], 15 // CHECK-NEXT: [[CMP1_I_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-NEXT: br i1 [[CMP1_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GET_BLOCK_OP_PTRILI16ELM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEDAT1_T2__EXIT:%.*]], label [[IF_THEN:%.*]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental6detail16get_block_op_ptrILi16ELm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEEDaT1_T2_.exit: +// CHECK-NEXT: br i1 [[CMP1_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GET_BLOCK_OP_PTRILI16ELM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYEEEEEEEEDAT1_T2__EXIT:%.*]], label [[IF_THEN:%.*]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental6detail16get_block_op_ptrILi16ELm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEEDaT1_T2_.exit: // CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef nonnull [[ADD_PTR_I_I]], i32 noundef 5) #[[ATTR6]] // CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq ptr addrspace(1) [[CALL_I_I_I]], null // CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_THEN]], label [[IF_END:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META78:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META81:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META79:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META82:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[CONV5_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[ADD_PTR_I_I]], i64 [[CONV5_I]] -// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP84:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP85:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA46]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA47]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA25]] +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA26]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[CALL_I_I_I]], <2 x i32> noundef [[TMP7]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP6]], i64 [[CONV]] -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP85:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP86:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -670,8 +670,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, short, 4, plain_global_ptr, opt_striped>( sycl::sub_group, span, plain_global_ptr, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.4") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.10") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [4 x i16], align 2 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null @@ -681,51 +681,51 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META86:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META89:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META87:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META90:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I19:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I19]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I23:%.*]] = icmp ult i32 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I23]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA19]] +// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA19]] +// CHECK-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP92:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP93:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA23]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA24]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 4 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i16>, ptr [[VALUES]], align 2, !tbaa [[TBAA25]] +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i16>, ptr [[VALUES]], align 2, !tbaa [[TBAA26]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv4_tEvPU3AS1tT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], <4 x i16> noundef [[TMP7]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 -// CHECK-NEXT: [[ARRAYIDX_I20:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP6]], i64 [[CONV]] -// CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20]], align 2, !tbaa [[TBAA19]] +// CHECK-NEXT: [[ARRAYIDX_I24:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP6]], i64 [[CONV]] +// CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I24]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i16 [[TMP8]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA19]] +// CHECK-NEXT: store i16 [[TMP8]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP93:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP94:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -733,31 +733,31 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 3, plain_global_ptr, opt_striped>( sycl::sub_group, span, plain_global_ptr, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.6") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.12") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META94:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META97:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META95:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META98:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP100:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP101:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -765,31 +765,31 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 16, plain_global_ptr, opt_striped>( sycl::sub_group, span, plain_global_ptr, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.23") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META101:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META104:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META102:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META105:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 16 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM16EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM16EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP107:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP108:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -797,30 +797,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 11, plain_global_ptr, opt_striped>( sycl::sub_group, span, plain_global_ptr, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.16") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.24") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META108:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META111:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META109:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META112:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 11 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM11EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM11EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP114:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP115:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void From 9770438a25a6db915ebf0bf1b25552e25c673f66 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 18 Oct 2024 10:49:23 -0700 Subject: [PATCH 04/32] Fix rt_properties.get_property() in constexpr context Used to fail with gcc --- .../sycl/ext/oneapi/properties/properties.hpp | 8 +++- .../extensions/properties/new_properties.cpp | 37 +++++++++++++++---- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 0e506e9882294..b1ceae4ad6896 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -99,7 +99,13 @@ struct property_base : property_key_tag { protected: using key_t = property_key_t; constexpr property_t get_property(property_key_tag) const { - return *static_cast(this); + // https://godbolt.org/z/MY6849jGh for a reduced test reflecting original + // implementation that worked with clang/msvc and failed with gcc. + if constexpr (std::is_empty_v) { + return property_t{}; + } else { + return *static_cast(this); + } } }; } // namespace detail diff --git a/sycl/test/extensions/properties/new_properties.cpp b/sycl/test/extensions/properties/new_properties.cpp index f69ebe001cf8f..374ecfaa8cbf9 100644 --- a/sycl/test/extensions/properties/new_properties.cpp +++ b/sycl/test/extensions/properties/new_properties.cpp @@ -125,17 +125,40 @@ constexpr auto merge_properties( return 42; } static_assert(merge_properties(pl3, naive{}) == 42); -} +} // namespace test_group_load_store namespace test_merge_ctor { - template - struct property : detail::property_base> { - static constexpr int sort_key = N; - }; +template struct property : detail::property_base> { + static constexpr int sort_key = N; +}; + +constexpr properties pl1{property<1>{}, property<2>{}, property<3>{}}; +constexpr properties pl2{pl1, property<4>{}}; +} // namespace test_merge_ctor + +namespace test_compile_prop_in_runtime_list { +template +struct ct_prop : detail::property_base, struct ct_prop_key> { + static constexpr int sort_key = 1; - constexpr properties pl1{property<1>{}, property<2>{}, property<3>{}}; - constexpr properties pl2{pl1, property<4>{}}; + static constexpr auto value() { return N; } +}; +struct rt_prop : detail::property_base { + static constexpr int sort_key = 2; + rt_prop(int N) : x(N) {} + + int x; + + constexpr auto value() { return x; } +}; +void test() { + int x = 42; + properties pl{ct_prop<42>{}, rt_prop{x}}; + constexpr auto p = pl.get_property(); + static_assert(std::is_same_v>); + static_assert(p.value() == 42); } +} // namespace test_compile_prop_in_runtime_list int main() { test::test(); From 49d4fa03f256364403f3f6f917b81f9d6ec25426 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 18 Oct 2024 11:27:30 -0700 Subject: [PATCH 05/32] Automatic naming in the new_properties.cpp test --- .../extensions/properties/new_properties.cpp | 89 ++++++++++++------- 1 file changed, 58 insertions(+), 31 deletions(-) diff --git a/sycl/test/extensions/properties/new_properties.cpp b/sycl/test/extensions/properties/new_properties.cpp index 374ecfaa8cbf9..84f85535954ec 100644 --- a/sycl/test/extensions/properties/new_properties.cpp +++ b/sycl/test/extensions/properties/new_properties.cpp @@ -6,27 +6,37 @@ using namespace sycl::ext::oneapi::experimental::new_properties; using mock_property_sort_key_t = int; -namespace test_sorting { -template struct Property : detail::property_base> { - static constexpr mock_property_sort_key_t sort_key = N; +template constexpr auto generate_property_key_name() { +#if defined(__clang__) || defined(__GNUC__) + return __PRETTY_FUNCTION__; +#elif defined(_MSC_VER) + return __FUNCSIG__; +#else +#error "Unsupported compiler" +#endif +} + +template +struct named_property_base + : public detail::property_base { + static constexpr std::string_view sort_key = + generate_property_key_name(); }; + +namespace test_sorting { +template struct Property : named_property_base> {}; static_assert( std::is_same_v{}, Property<2>{}}), decltype(properties{Property<2>{}, Property<3>{}})>); } // namespace test_sorting namespace test { -struct property1 : detail::property_base { - static constexpr mock_property_sort_key_t sort_key = 1; -}; +struct property1 : named_property_base {}; template -struct property2 : detail::property_base, struct property2_key> { - static constexpr mock_property_sort_key_t sort_key = 2; -}; +struct property2 : named_property_base, struct property2_key> {}; -struct property3 : detail::property_base { - static constexpr mock_property_sort_key_t sort_key = 3; +struct property3 : named_property_base { property3(int x) : x(x) {} int x; }; @@ -51,8 +61,7 @@ void test() { } // namespace test namespace bench { -template struct property : detail::property_base> { - static constexpr mock_property_sort_key_t sort_key = 1000 + N; +template struct property : named_property_base> { static constexpr int value() { return N; } }; @@ -65,12 +74,8 @@ template void test(std::integer_sequence) { } // namespace bench namespace test_group_load_store { -struct naive : detail::property_base { - static constexpr mock_property_sort_key_t sort_key = 1; -}; -struct full_group : detail::property_base { - static constexpr mock_property_sort_key_t sort_key = 2; -}; +struct naive : named_property_base {}; +struct full_group : named_property_base {}; constexpr properties pl1{full_group{}}; constexpr properties pl2{pl1, naive{}}; static_assert(pl1.template has_property()); @@ -81,9 +86,8 @@ static_assert(pl2.template has_property()); enum class data_placement { blocked, striped }; template struct data_placement_property - : detail::property_base, - struct data_placement_property_key> { - static constexpr mock_property_sort_key_t sort_key = 3; + : named_property_base, + struct data_placement_property_key> { static constexpr bool is_blocked() { return placement == data_placement::blocked; } @@ -128,9 +132,7 @@ static_assert(merge_properties(pl3, naive{}) == 42); } // namespace test_group_load_store namespace test_merge_ctor { -template struct property : detail::property_base> { - static constexpr int sort_key = N; -}; +template struct property : named_property_base> {}; constexpr properties pl1{property<1>{}, property<2>{}, property<3>{}}; constexpr properties pl2{pl1, property<4>{}}; @@ -138,13 +140,10 @@ constexpr properties pl2{pl1, property<4>{}}; namespace test_compile_prop_in_runtime_list { template -struct ct_prop : detail::property_base, struct ct_prop_key> { - static constexpr int sort_key = 1; - +struct ct_prop : named_property_base, struct ct_prop_key> { static constexpr auto value() { return N; } }; -struct rt_prop : detail::property_base { - static constexpr int sort_key = 2; +struct rt_prop : named_property_base { rt_prop(int N) : x(N) {} int x; @@ -162,7 +161,35 @@ void test() { int main() { test::test(); - bench::test(std::make_integer_sequence{}); + bench::test(std::make_integer_sequence{}); + // More than 67 fails with clang + // clang-format off + // new_properties.cpp:165:10: note: in instantiation of function template specialization 'bench::test<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + // 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + // 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67>' requested here + // 165 | bench::test(std::make_integer_sequence{}); + // | ^ + // /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/char_traits.h:367:7: note: constexpr evaluation hit maximum step limit; + // possible infinite loop? + // 367 | { + // | ^ + // /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/char_traits.h:382:12: note: in call to 'lt(__PRETTY_FUNCTION__[22], + // __PRETTY_FUNCTION__[22])' + // 382 | if (lt(__s1[__i], __s2[__i])) + // | ^~~~~~~~~~~~~~~~~~~~~~~~ + // /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/string_view:320:14: note: in call to 'compare(&__PRETTY_FUNCTION__[0], + // &__PRETTY_FUNCTION__[0], 71)' + // 320 | int __ret = traits_type::compare(this->_M_str, __str._M_str, __rlen); + // | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/string_view:596:14: note: in call to '__x.compare({71, &__PRETTY_FUNCTION__[0]})' + // 596 | { return __x.compare(__y) < 0; } + // | ^~~~~~~~~~~~~~~~ + // /iusers/aeloviko/sycl/sycl/include/sycl/ext/oneapi/properties/properties.hpp:78:13: note: in call to 'operator<>({71, &__PRETTY_FUNCTION__[0]}, {71, &__PRETTY_FUNCTION__[0]})' + // 78 | if (to_sort[j].first < to_sort[i].first) + // | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // clang-format on + // bench::test(std::make_integer_sequence{}); properties empty_props{}; } From 570f9314b5c8c8b148e880dc16a152d27a332bd9 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 18 Oct 2024 11:34:53 -0700 Subject: [PATCH 06/32] Rename sort_key -> property_name --- .../sycl/ext/oneapi/experimental/group_load_store.hpp | 8 ++++---- sycl/include/sycl/ext/oneapi/properties/properties.hpp | 6 ++---- sycl/test/extensions/properties/new_properties.cpp | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp b/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp index 8fcccfed0820c..69be227d9d9e1 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp @@ -26,7 +26,7 @@ template struct data_placement_property : new_properties::detail::property_base< data_placement_property, struct data_placement_property_key> { - static constexpr new_properties::property_sort_key_t sort_key{ + static constexpr std::string_view property_name{ "sycl::ext::oneapi::experimental::data_placement_property_key"}; static constexpr bool is_blocked() { @@ -45,7 +45,7 @@ inline constexpr auto data_placement_striped = // Contiguous memory property struct contiguous_memory_property : new_properties::detail::property_base { - static constexpr new_properties::property_sort_key_t sort_key{ + static constexpr std::string_view property_name{ "sycl::ext::oneapi::experimental::contiguous_memory_property"}; }; @@ -54,7 +54,7 @@ inline constexpr contiguous_memory_property contiguous_memory; // Full group property struct full_group_property : new_properties::detail::property_base { - static constexpr new_properties::property_sort_key_t sort_key{ + static constexpr std::string_view property_name{ "sycl::ext::oneapi::experimental::full_group_property"}; }; @@ -64,7 +64,7 @@ namespace detail { // Naive implementation property struct naive_property : new_properties::detail::property_base { - static constexpr new_properties::property_sort_key_t sort_key{ + static constexpr std::string_view property_name{ "sycl::ext::oneapi::experimental::naive_property"}; }; diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index b1ceae4ad6896..fe9810a72ecde 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -64,7 +64,7 @@ struct properties_sorter, int idx = 0; int N = sizeof...(property_tys); // TODO: Use C++20 constexpr std::sort if available. - std::array to_sort{std::pair{property_tys::sort_key, idx++}...}; + std::array to_sort{std::pair{property_tys::property_name, idx++}...}; auto swap_pair = [](auto &x, auto &y) constexpr { auto tmp_first = x.first; auto tmp_second = x.second; @@ -110,8 +110,6 @@ struct property_base : property_key_tag { }; } // namespace detail -using property_sort_key_t = std::string_view; - template class properties; template struct is_property_list : std::false_type {}; @@ -134,7 +132,7 @@ class __SYCL_EBO properties> if constexpr (sizeof...(property_tys) == 0) { return true; } else { - const std::array sort_keys = {property_tys::sort_key...}; + const std::array sort_keys = {property_tys::property_name...}; // std::is_sorted isn't constexpr until C++20. if (sort_keys.empty()) return true; diff --git a/sycl/test/extensions/properties/new_properties.cpp b/sycl/test/extensions/properties/new_properties.cpp index 84f85535954ec..c36c5e1055aaf 100644 --- a/sycl/test/extensions/properties/new_properties.cpp +++ b/sycl/test/extensions/properties/new_properties.cpp @@ -19,7 +19,7 @@ template constexpr auto generate_property_key_name() { template struct named_property_base : public detail::property_base { - static constexpr std::string_view sort_key = + static constexpr std::string_view property_name = generate_property_key_name(); }; From ab038ba579b992e91d8dba2a41578142d23c3cad Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 18 Oct 2024 12:24:31 -0700 Subject: [PATCH 07/32] Better error message for duplicate property_key + negative test --- .../sycl/ext/oneapi/properties/properties.hpp | 68 ++++++++++++++----- .../properties/new_properties_negative.cpp | 37 ++++++++++ 2 files changed, 88 insertions(+), 17 deletions(-) create mode 100644 sycl/test/extensions/properties/new_properties_negative.cpp diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index fe9810a72ecde..741b0cf9141e4 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -108,9 +108,41 @@ struct property_base : property_key_tag { } } }; + +template +inline constexpr bool property_keys_are_unique = []() constexpr { + if constexpr (sizeof...(property_tys) == 0) { + return true; + } else { + const std::array keys = {property_tys::property_name...}; + auto N = keys.size(); + for (int i = 0; i < N; ++i) + for (int j = i + 1; j < N; ++j) + if (keys[i] == keys[j]) + return false; + + return true; + } +}(); + +template +inline constexpr bool properties_are_sorted = []() constexpr { + if constexpr (sizeof...(property_tys) == 0) { + return true; + } else { + const std::array sort_keys = {property_tys::property_name...}; + // std::is_sorted isn't constexpr until C++20. + if (sort_keys.empty()) + return true; + for (std::size_t idx = 1; idx < sort_keys.size(); ++idx) + if (sort_keys[idx - 1] >= sort_keys[idx]) + return false; + return true; + } +}(); } // namespace detail -template class properties; +template class properties; template struct is_property_list : std::false_type {}; template @@ -123,26 +155,28 @@ inline constexpr bool is_property_v = std::is_base_of_v && !is_property_list_v; +template <> class properties, void> { + template static constexpr bool has_property() { return false; } +}; + +template +class properties< + detail::properties_type_list, + std::enable_if_t>> { + static_assert((is_property_v && ...)); + static_assert(detail::property_keys_are_unique, + "Property keys must be unique"); +}; + template -class __SYCL_EBO properties> +class __SYCL_EBO + properties, + std::enable_if_t>> : public property_tys... { static_assert((is_property_v && ...)); static_assert( - []() constexpr { - if constexpr (sizeof...(property_tys) == 0) { - return true; - } else { - const std::array sort_keys = {property_tys::property_name...}; - // std::is_sorted isn't constexpr until C++20. - if (sort_keys.empty()) - return true; - for (std::size_t idx = 1; idx < sort_keys.size(); ++idx) - if (sort_keys[idx - 1] >= sort_keys[idx]) - return false; - return true; - } - }(), - "Properties must be sorted and non-repeating!"); + detail::properties_are_sorted, + "Properties must be sorted!"); static_assert( (std::is_base_of_v && ...)); using property_tys::get_property...; diff --git a/sycl/test/extensions/properties/new_properties_negative.cpp b/sycl/test/extensions/properties/new_properties_negative.cpp new file mode 100644 index 0000000000000..9ab5f13755780 --- /dev/null +++ b/sycl/test/extensions/properties/new_properties_negative.cpp @@ -0,0 +1,37 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s -Xclang -verify -Xclang -verify-ignore-unexpected=note + +#include + +using namespace sycl::ext::oneapi::experimental::new_properties; + +using mock_property_sort_key_t = int; + +template constexpr auto generate_property_key_name() { +#if defined(__clang__) || defined(__GNUC__) + return __PRETTY_FUNCTION__; +#elif defined(_MSC_VER) + return __FUNCSIG__; +#else +#error "Unsupported compiler" +#endif +} + +template +struct named_property_base + : public detail::property_base { + static constexpr std::string_view property_name = + generate_property_key_name(); +}; + +template struct property : named_property_base> {}; + +void test() { + // expected-error-re@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: Property keys must be unique}} + std::ignore = properties{property<1>{}, property<1>{}}; + + constexpr properties pl{property<1>{}, property<2>{}}; + // expected-error-re@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: Property keys must be unique}} + std::ignore = properties{pl, property<1>{}}; +} + + From 03253247562edc408c3686b8651a2265f77eb953 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 18 Oct 2024 12:29:18 -0700 Subject: [PATCH 08/32] Remove stale static_assert, a better check is few lines above using `is_property_v` --- sycl/include/sycl/ext/oneapi/properties/properties.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 741b0cf9141e4..cf045e4ef44f7 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -177,8 +177,6 @@ class __SYCL_EBO static_assert( detail::properties_are_sorted, "Properties must be sorted!"); - static_assert( - (std::is_base_of_v && ...)); using property_tys::get_property...; public: From d5a6746b2a419dd098a9d4293909421c676d9acb Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 18 Oct 2024 12:31:10 -0700 Subject: [PATCH 09/32] remove dead code --- sycl/include/sycl/ext/oneapi/properties/properties.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index cf045e4ef44f7..e8dbefd4dd78b 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -132,8 +132,6 @@ inline constexpr bool properties_are_sorted = []() constexpr { } else { const std::array sort_keys = {property_tys::property_name...}; // std::is_sorted isn't constexpr until C++20. - if (sort_keys.empty()) - return true; for (std::size_t idx = 1; idx < sort_keys.size(); ++idx) if (sort_keys[idx - 1] >= sort_keys[idx]) return false; From 38ee5e8b876b7e9d2fb45fc4837e0c10ff31c0c7 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 18 Oct 2024 12:40:29 -0700 Subject: [PATCH 10/32] More comments --- .../sycl/ext/oneapi/properties/properties.hpp | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index e8dbefd4dd78b..f015ef2ba52cb 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -63,7 +63,8 @@ struct properties_sorter, static constexpr auto sorted_indices = []() constexpr { int idx = 0; int N = sizeof...(property_tys); - // TODO: Use C++20 constexpr std::sort if available. + // std::sort isn't constexpr until C++20. Also, it's possible there will be + // a compiler builtin to sort types, in which case we should start using it. std::array to_sort{std::pair{property_tys::property_name, idx++}...}; auto swap_pair = [](auto &x, auto &y) constexpr { auto tmp_first = x.first; @@ -89,8 +90,10 @@ struct properties_sorter, nth_type_t...>; }; +// Is used to implement `is_property_v`. struct property_key_tag_base {}; +// We support incomplete property_key_t, so need to wrap it. template struct property_key_tag : property_key_tag_base {}; @@ -99,8 +102,10 @@ struct property_base : property_key_tag { protected: using key_t = property_key_t; constexpr property_t get_property(property_key_tag) const { - // https://godbolt.org/z/MY6849jGh for a reduced test reflecting original - // implementation that worked with clang/msvc and failed with gcc. + // In fact, `static_cast` below works just fine with clang/msvc but not with + // gcc, see https://godbolt.org/z/MY6849jGh for a reduced test. However, we + // need to support all ,so special case for compile-time properties (when + // `is_empty_v` is true). if constexpr (std::is_empty_v) { return property_t{}; } else { @@ -132,6 +137,11 @@ inline constexpr bool properties_are_sorted = []() constexpr { } else { const std::array sort_keys = {property_tys::property_name...}; // std::is_sorted isn't constexpr until C++20. + // + // Sorting is an implementation detail while uniqueness of the property_keys + // is an API restriction. This internal check actually combines both + // conditions as we expect that user error is handled before the internal + // `properties_are_sorted` assert is checked. for (std::size_t idx = 1; idx < sort_keys.size(); ++idx) if (sort_keys[idx - 1] >= sort_keys[idx]) return false; @@ -153,10 +163,14 @@ inline constexpr bool is_property_v = std::is_base_of_v && !is_property_list_v; +// Empty property list. template <> class properties, void> { template static constexpr bool has_property() { return false; } }; +// Base implementation to provide nice user error in case of mis-use. Without it +// an error "base class '' specified more than once as a direct base +// class" is reported prior to static_assert's error. template class properties< detail::properties_type_list, @@ -184,7 +198,6 @@ class __SYCL_EBO constexpr properties(unsorted_property_tys... props) : unsorted_property_tys(props)... {} - // TODO: add a unit-test for this. template < typename... other_property_list_tys, typename... other_property_tys, typename = std::enable_if_t<((is_property_v && ...))>> From 10b757fc25db9ff00dc15a80b5c764c0d6103cf2 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 18 Oct 2024 15:34:05 -0700 Subject: [PATCH 11/32] Make `properties::get_property` static for compile-time properties --- .../sycl/ext/oneapi/properties/properties.hpp | 27 ++++++++++++++++++- .../extensions/properties/new_properties.cpp | 12 +++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index f015ef2ba52cb..2044ce040e49c 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -220,7 +220,32 @@ class __SYCL_EBO properties>; } - template constexpr auto get_property() { + // Two methods below do the following (pseudocode): + // + // template + // using ret_t = decltype(this->get_property(key_tag{})); + // static constexpr auto get_property() requires(is_empty_v) { + // return ret_t{}; + // } + // constexpr auto get_property() requires(!is_empty_v) { + // return get_property(key_tag{}); + // } + template + static constexpr auto get_property() -> std::enable_if_t< + std::is_empty_v().get_property( + detail::property_key_tag{}))>, + decltype(std::declval().get_property( + detail::property_key_tag{}))> { + return decltype(std::declval().get_property( + detail::property_key_tag{})){}; + } + + template + constexpr auto get_property() const -> std::enable_if_t< + !std::is_empty_v().get_property( + detail::property_key_tag{}))>, + decltype(std::declval().get_property( + detail::property_key_tag{}))> { return get_property(detail::property_key_tag{}); } diff --git a/sycl/test/extensions/properties/new_properties.cpp b/sycl/test/extensions/properties/new_properties.cpp index c36c5e1055aaf..4e266d7240b9a 100644 --- a/sycl/test/extensions/properties/new_properties.cpp +++ b/sycl/test/extensions/properties/new_properties.cpp @@ -159,6 +159,18 @@ void test() { } } // namespace test_compile_prop_in_runtime_list +namespace test_static_get_property { + struct ct_prop : named_property_base {}; + struct rt_prop : named_property_base { + int x; + }; + void test() { + properties pl{ct_prop{}, rt_prop{}}; + constexpr auto c = decltype(pl)::get_property(); + auto r = pl.get_property(); + } +} + int main() { test::test(); bench::test(std::make_integer_sequence{}); From b44989252c372b13367e87fd298c6d63e06bf28e Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 18 Oct 2024 16:07:09 -0700 Subject: [PATCH 12/32] Better error message for duplicate property name --- .../sycl/ext/oneapi/properties/properties.hpp | 62 +++++++++++++------ .../properties/new_properties_negative.cpp | 32 +++++++++- 2 files changed, 73 insertions(+), 21 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 2044ce040e49c..ccf43858b6e0e 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -31,6 +31,10 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { namespace new_properties { + +template +class __SYCL_EBO properties; + namespace detail { template struct properties_type_list; @@ -112,18 +116,22 @@ struct property_base : property_key_tag { return *static_cast(this); } } + + // For key_t access in error reporting specialization. + template + friend class __SYCL_EBO new_properties::properties; }; template -inline constexpr bool property_keys_are_unique = []() constexpr { +inline constexpr bool property_names_are_unique = []() constexpr { if constexpr (sizeof...(property_tys) == 0) { return true; } else { - const std::array keys = {property_tys::property_name...}; - auto N = keys.size(); + const std::array names = {property_tys::property_name...}; + auto N = names.size(); for (int i = 0; i < N; ++i) for (int j = i + 1; j < N; ++j) - if (keys[i] == keys[j]) + if (names[i] == names[j]) return false; return true; @@ -135,23 +143,21 @@ inline constexpr bool properties_are_sorted = []() constexpr { if constexpr (sizeof...(property_tys) == 0) { return true; } else { - const std::array sort_keys = {property_tys::property_name...}; + const std::array sort_names = {property_tys::property_name...}; // std::is_sorted isn't constexpr until C++20. // - // Sorting is an implementation detail while uniqueness of the property_keys - // is an API restriction. This internal check actually combines both - // conditions as we expect that user error is handled before the internal - // `properties_are_sorted` assert is checked. - for (std::size_t idx = 1; idx < sort_keys.size(); ++idx) - if (sort_keys[idx - 1] >= sort_keys[idx]) + // Sorting is an implementation detail while uniqueness of the + // property_name's is an API restriction. This internal check actually + // combines both conditions as we expect that user error is handled before + // the internal `properties_are_sorted` assert is checked. + for (std::size_t idx = 1; idx < sort_names.size(); ++idx) + if (sort_names[idx - 1] >= sort_names[idx]) return false; return true; } }(); } // namespace detail -template class properties; - template struct is_property_list : std::false_type {}; template struct is_property_list> : std::true_type {}; @@ -164,7 +170,7 @@ inline constexpr bool is_property_v = !is_property_list_v; // Empty property list. -template <> class properties, void> { +template <> class __SYCL_EBO properties, void> { template static constexpr bool has_property() { return false; } }; @@ -172,12 +178,32 @@ template <> class properties, void> { // an error "base class '' specified more than once as a direct base // class" is reported prior to static_assert's error. template -class properties< +class __SYCL_EBO properties< detail::properties_type_list, - std::enable_if_t>> { + std::enable_if_t>> { + + // This is a separate specialization to report an error, we can afford doing + // extra work to provide nice error message without sacrificing compile time + // on non-exceptional path. Let's find *a* pair of properties that failed the + // check. Note that there might be multiple duplicate names, we're only + // reporting one instance. Once user addresses that, the next pair will be + // reported. + static constexpr auto conflict = []() constexpr { + const std::array keys = {property_tys::property_name...}; + auto N = keys.size(); + for (int i = 0; i < N; ++i) + for (int j = i + 1; j < N; ++j) + if (keys[i] == keys[j]) + return std::pair{i, j}; + }(); + using first_type = detail::nth_type_t; + using second_type = detail::nth_type_t; + static_assert( + !std::is_same_v, + "Duplicate property!"); + static_assert(first_type::property_name != second_type::property_name, + "Property name collision between different property keys!"); static_assert((is_property_v && ...)); - static_assert(detail::property_keys_are_unique, - "Property keys must be unique"); }; template diff --git a/sycl/test/extensions/properties/new_properties_negative.cpp b/sycl/test/extensions/properties/new_properties_negative.cpp index 9ab5f13755780..eff82943520e1 100644 --- a/sycl/test/extensions/properties/new_properties_negative.cpp +++ b/sycl/test/extensions/properties/new_properties_negative.cpp @@ -25,13 +25,39 @@ struct named_property_base template struct property : named_property_base> {}; +template +struct property_with_key + : named_property_base, struct prop_key_t> {}; + +namespace library_a { +struct prop : detail::property_base { + // Wrong, violates the extension specification! Property name must include + // library namespace to avoid collisions with other libraries! + static constexpr std::string_view property_name{"prop"}; +}; +} +namespace library_b { +struct prop : detail::property_base { + // Wrong, violates the extension specification! Property name must include + // library namespace to avoid collisions with other libraries! + static constexpr std::string_view property_name{"prop"}; +}; +} + void test() { - // expected-error-re@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: Property keys must be unique}} + // expected-error@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement '!std::is_same_v, property<1>>': Duplicate property!}} std::ignore = properties{property<1>{}, property<1>{}}; constexpr properties pl{property<1>{}, property<2>{}}; - // expected-error-re@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: Property keys must be unique}} - std::ignore = properties{pl, property<1>{}}; + // expected-error@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement '!std::is_same_v, property<2>>': Duplicate property!}} + std::ignore = properties{pl, property<2>{}}; + + // Unfortunately, C++ front end doesn't use qualified name for "prop" below... + // expected-error@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement 'prop::property_name != prop::property_name': Property name collision between different property keys!}} + std::ignore = properties{library_a::prop{}, library_b::prop{}}; + + // expected-error@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement '!std::is_same_v': Duplicate property!}} + std::ignore = properties{property_with_key<1>{}, property_with_key<2>{}}; } From e455c33c59a3fe3799be3f1e494a8da622cb9797 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 18 Oct 2024 16:31:19 -0700 Subject: [PATCH 13/32] Cleanup test a bit --- .../extensions/properties/new_properties.cpp | 63 +------------------ 1 file changed, 3 insertions(+), 60 deletions(-) diff --git a/sycl/test/extensions/properties/new_properties.cpp b/sycl/test/extensions/properties/new_properties.cpp index 4e266d7240b9a..04854e0225304 100644 --- a/sycl/test/extensions/properties/new_properties.cpp +++ b/sycl/test/extensions/properties/new_properties.cpp @@ -4,8 +4,6 @@ using namespace sycl::ext::oneapi::experimental::new_properties; -using mock_property_sort_key_t = int; - template constexpr auto generate_property_key_name() { #if defined(__clang__) || defined(__GNUC__) return __PRETTY_FUNCTION__; @@ -73,69 +71,14 @@ template void test(std::integer_sequence) { } } // namespace bench -namespace test_group_load_store { -struct naive : named_property_base {}; -struct full_group : named_property_base {}; -constexpr properties pl1{full_group{}}; -constexpr properties pl2{pl1, naive{}}; -static_assert(pl1.template has_property()); -static_assert(!pl1.template has_property()); -static_assert(pl2.template has_property()); -static_assert(pl2.template has_property()); - -enum class data_placement { blocked, striped }; -template -struct data_placement_property - : named_property_base, - struct data_placement_property_key> { - static constexpr bool is_blocked() { - return placement == data_placement::blocked; - } -}; -inline constexpr data_placement_property blocked; -inline constexpr data_placement_property striped; - -static_assert(properties{naive{}, blocked} - .get_property() - .is_blocked()); -static_assert(!properties{naive{}, striped} - .get_property() - .is_blocked()); -static_assert( - properties{naive{}, blocked} - .get_property_or_default_to(blocked) - .is_blocked()); -static_assert( - !properties{naive{}, data_placement_property{}} - .get_property_or_default_to( - blocked) - .is_blocked()); -static_assert( - properties{naive{}} - .get_property_or_default_to(blocked) - .is_blocked()); -static_assert( - !properties{naive{}} - .get_property_or_default_to( - striped) - .is_blocked()); - -constexpr properties pl3{full_group{}, blocked}; -// constexpr properties pl4{pl3, naive{}}; -template -constexpr auto merge_properties( - properties>, - other_property_tys...) { - return 42; -} -static_assert(merge_properties(pl3, naive{}) == 42); -} // namespace test_group_load_store - namespace test_merge_ctor { template struct property : named_property_base> {}; constexpr properties pl1{property<1>{}, property<2>{}, property<3>{}}; constexpr properties pl2{pl1, property<4>{}}; +static_assert(!pl1.has_property>()); +static_assert(pl2.has_property>()); +static_assert(pl2.has_property>()); } // namespace test_merge_ctor namespace test_compile_prop_in_runtime_list { From 2508ba0c99330507a57398474442970a587e6731 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 21 Oct 2024 08:51:43 -0700 Subject: [PATCH 14/32] Minor changes, mostly comments --- .../sycl/ext/oneapi/properties/properties.hpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index ccf43858b6e0e..a78c8a5a2f77e 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -181,6 +181,7 @@ template class __SYCL_EBO properties< detail::properties_type_list, std::enable_if_t>> { + static_assert((is_property_v && ...)); // This is a separate specialization to report an error, we can afford doing // extra work to provide nice error message without sacrificing compile time @@ -206,10 +207,14 @@ class __SYCL_EBO properties< static_assert((is_property_v && ...)); }; +// NOTE: Meta-function to implement CTAD rules isn't allowed to return +// `properties` and it's impossible to return a pack as well. As +// such, we're forced to have an extra level of `detail::properties_type_list` +// for the purpose of providing CTAD rules. template -class __SYCL_EBO - properties, - std::enable_if_t>> +class __SYCL_EBO properties< + detail::properties_type_list, + std::enable_if_t>> : public property_tys... { static_assert((is_property_v && ...)); static_assert( @@ -253,7 +258,7 @@ class __SYCL_EBO // static constexpr auto get_property() requires(is_empty_v) { // return ret_t{}; // } - // constexpr auto get_property() requires(!is_empty_v) { + // constexpr auto get_property() const requires(!is_empty_v) { // return get_property(key_tag{}); // } template @@ -275,6 +280,8 @@ class __SYCL_EBO return get_property(detail::property_key_tag{}); } + // TODO: Do we need separate `static` overload if we decide to keep this + // interface? template constexpr auto get_property_or_default_to(default_property_t default_property) { @@ -305,7 +312,7 @@ properties(properties>, using empty_properties_t = decltype(properties{}); -template struct is_property_key_of : std::false_type {}; +template struct is_property_of : std::false_type {}; } // namespace new_properties } // namespace ext::oneapi::experimental } // namespace _V1 From c74fe5dd04c34620fd6df4fa96f392ca3fc4ad7b Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 21 Oct 2024 11:20:12 -0700 Subject: [PATCH 15/32] Port kernel_bundle to new properties --- .../sycl/ext/oneapi/properties/properties.hpp | 18 +++- sycl/include/sycl/kernel_bundle.hpp | 93 ++++++++----------- .../KernelCompiler/kernel_compiler_opencl.cpp | 7 +- .../KernelCompiler/kernel_compiler_sycl.cpp | 15 +-- .../KernelCompiler/sycl_device_flags.cpp | 4 +- .../abi/sycl_classes_abi_neutral_test.cpp | 21 +++-- .../kernel_compiler_constraints.cpp | 28 +++--- .../extensions/properties/new_properties.cpp | 10 ++ 8 files changed, 106 insertions(+), 90 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index a78c8a5a2f77e..42d7b7aad9c1e 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -14,7 +14,7 @@ #include #include - +#include // For old properties: #include @@ -171,6 +171,7 @@ inline constexpr bool is_property_v = // Empty property list. template <> class __SYCL_EBO properties, void> { +public: template static constexpr bool has_property() { return false; } }; @@ -312,7 +313,20 @@ properties(properties>, using empty_properties_t = decltype(properties{}); -template struct is_property_of : std::false_type {}; +template +struct all_properties_in : std::false_type{}; +template +struct all_properties_in< + properties>, + allowed_property_keys...> + : std::bool_constant<((sycl::detail::check_type_in_v< + property_tys, allowed_property_keys...> && + ...))> {}; + +template +inline constexpr bool all_properties_in_v = + all_properties_in, + allowed_property_keys...>::value; } // namespace new_properties } // namespace ext::oneapi::experimental } // namespace _V1 diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 1237bc0651b40..33bd41c3fc6eb 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -892,16 +892,14 @@ build(const kernel_bundle &InputBundle, namespace ext::oneapi::experimental { -namespace detail { -struct create_bundle_from_source_props; -struct build_source_bundle_props; -} // namespace detail - ///////////////////////// // PropertyT syclex::include_files ///////////////////////// struct include_files - : detail::run_time_property_key { + : new_properties::detail::property_base { + static constexpr std::string_view property_name{ + "sycl::ext::oneapi::experimental::include_files"}; + include_files(); include_files(const std::string &name, const std::string &content) { record.emplace_back(std::make_pair(name, content)); @@ -911,46 +909,35 @@ struct include_files } std::vector> record; }; -using include_files_key = include_files; - -template <> -struct is_property_key_of - : std::true_type {}; ///////////////////////// // PropertyT syclex::build_options ///////////////////////// -struct build_options - : detail::run_time_property_key { +struct build_options : new_properties::detail::property_base { + static constexpr std::string_view property_name{ + "sycl::ext::oneapi::experimental::build_options"}; std::vector opts; build_options(const std::string &optsArg) : opts{optsArg} {} build_options(const std::vector &optsArg) : opts(optsArg) {} }; -using build_options_key = build_options; - -template <> -struct is_property_key_of - : std::true_type {}; ///////////////////////// // PropertyT syclex::save_log ///////////////////////// -struct save_log : detail::run_time_property_key { +struct save_log : new_properties::detail::property_base { + static constexpr std::string_view property_name{ + "sycl::ext::oneapi::experimental::save_log"}; std::string *log; save_log(std::string *logArg) : log(logArg) {} }; -using save_log_key = save_log; - -template <> -struct is_property_key_of - : std::true_type {}; ///////////////////////// // PropertyT syclex::registered_kernel_names ///////////////////////// struct registered_kernel_names - : detail::run_time_property_key { + : new_properties::detail::property_base { + static constexpr std::string_view property_name{ + "sycl::ext::oneapi::experimental::registered_kernel_names"}; std::vector kernel_names; registered_kernel_names() {} registered_kernel_names(const std::string &knArg) : kernel_names{knArg} {} @@ -958,14 +945,17 @@ struct registered_kernel_names : kernel_names(knsArg) {} void add(const std::string &name) { kernel_names.push_back(name); } }; -using registered_kernel_names_key = registered_kernel_names; - -template <> -struct is_property_key_of : std::true_type { -}; namespace detail { +template +inline constexpr bool are_properties_valid_for_create_bundle_from_source = + new_properties::all_properties_in_v; + +template +inline constexpr bool are_properties_valid_for_build_source_bundle = + new_properties::all_properties_in_v; + // forward decls __SYCL_EXPORT bool is_source_kernel_bundle_supported(backend BE, source_language Language); @@ -1054,12 +1044,10 @@ build_from_source(kernel_bundle &SourceKB, ///////////////////////// // syclex::create_kernel_bundle_from_source ///////////////////////// -template < - typename PropertyListT = empty_properties_t, - typename = std::enable_if_t< - is_property_list_v && - detail::all_props_are_keys_of::value>> +template >> kernel_bundle create_kernel_bundle_from_source( const context &SyclContext, source_language Language, const std::string &Source, PropertyListT props = {}) { @@ -1073,12 +1061,10 @@ kernel_bundle create_kernel_bundle_from_source( } #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) -template < - typename PropertyListT = empty_properties_t, - typename = std::enable_if_t< - is_property_list_v && - detail::all_props_are_keys_of::value>> +template >> kernel_bundle create_kernel_bundle_from_source( const context &SyclContext, source_language Language, const std::vector &Bytes, PropertyListT props = {}) { @@ -1096,12 +1082,10 @@ kernel_bundle create_kernel_bundle_from_source( // syclex::build(source_kb) => exe_kb ///////////////////////// -template && - detail::all_props_are_keys_of::value>> - +template < + typename PropertyListT = new_properties::empty_properties_t, + typename = std::enable_if_t< + detail::are_properties_valid_for_build_source_bundle>> kernel_bundle build(kernel_bundle &SourceKB, const std::vector &Devices, PropertyListT props = {}) { @@ -1122,11 +1106,10 @@ build(kernel_bundle &SourceKB, RegisteredKernelNamesVec); } -template && - detail::all_props_are_keys_of::value>> +template < + typename PropertyListT = new_properties::empty_properties_t, + typename = std::enable_if_t< + detail::are_properties_valid_for_build_source_bundle>> kernel_bundle build(kernel_bundle &SourceKB, PropertyListT props = {}) { diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp index cfe2824ec0564..7702d76ec482b 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp @@ -105,9 +105,10 @@ void test_build_and_run() { sycl::backend beRes = kbSrc.get_backend(); assert(beRes == ctx.get_backend()); - exe_kb kbExe2 = syclex::build( - kbSrc, devs, - syclex::properties{syclex::build_options{flags}, syclex::save_log{&log}}); + exe_kb kbExe2 = + syclex::build(kbSrc, devs, + syclex::new_properties::properties{ + syclex::build_options{flags}, syclex::save_log{&log}}); bool hasMyKernel = kbExe2.ext_oneapi_has_kernel("my_kernel"); bool hasHerKernel = kbExe2.ext_oneapi_has_kernel("her_kernel"); diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl.cpp index 511f713b7c95c..878c932b1733b 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl.cpp @@ -138,7 +138,7 @@ void test_build_and_run() { incFiles.add("intermediate/PlusEm.h", PlusEmH); source_kb kbSrc = syclex::create_kernel_bundle_from_source( ctx, syclex::source_language::sycl, SYCLSource, - syclex::properties{incFiles}); + syclex::new_properties::properties{incFiles}); // Double check kernel_bundle.get_source() / get_backend(). sycl::context ctxRes = kbSrc.get_context(); @@ -153,10 +153,11 @@ void test_build_and_run() { std::string log; std::vector flags{"-g", "-fno-fast-math"}; std::vector devs = kbSrc.get_devices(); - exe_kb kbExe2 = syclex::build( - kbSrc, devs, - syclex::properties{syclex::build_options{flags}, syclex::save_log{&log}, - syclex::registered_kernel_names{"ff_templated"}}); + exe_kb kbExe2 = + syclex::build(kbSrc, devs, + syclex::new_properties::properties{ + syclex::build_options{flags}, syclex::save_log{&log}, + syclex::registered_kernel_names{"ff_templated"}}); assert(log.find("warning: 'this_nd_item<1>' is deprecated") != std::string::npos); @@ -231,8 +232,8 @@ void test_esimd() { source_kb kbSrc = syclex::create_kernel_bundle_from_source( ctx, syclex::source_language::sycl, ESIMDSource); - exe_kb kbExe = - syclex::build(kbSrc, syclex::properties{syclex::save_log{&log}}); + exe_kb kbExe = syclex::build( + kbSrc, syclex::new_properties::properties{syclex::save_log{&log}}); // extern "C" was used, so the name "vector_add_esimd" is not mangled and can // be used directly. diff --git a/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp b/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp index 9982324f2fc76..69eb3f9dd61e7 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp @@ -129,8 +129,8 @@ int main(int argc, char *argv[]) { // Flags with and without space, inner quotes. std::vector flags{"-Xs '-doubleGRF'", "-Xs'-Xfinalizer \"-printregusage\"'"}; - exe_kb kbExe = - syclex::build(kbSrc, syclex::properties{syclex::build_options{flags}}); + exe_kb kbExe = syclex::build( + kbSrc, syclex::new_properties::properties{syclex::build_options{flags}}); sycl::kernel k = kbExe.ext_oneapi_get_kernel("add_thirty"); diff --git a/sycl/test/abi/sycl_classes_abi_neutral_test.cpp b/sycl/test/abi/sycl_classes_abi_neutral_test.cpp index 071481d0d1be0..8f089946de3e9 100644 --- a/sycl/test/abi/sycl_classes_abi_neutral_test.cpp +++ b/sycl/test/abi/sycl_classes_abi_neutral_test.cpp @@ -15,9 +15,10 @@ // New exclusions are NOT ALLOWED to this file unless it is guaranteed that data // member is not crossing ABI boundary. All current exclusions are listed below. -// CHECK: 0 | struct sycl::ext::oneapi::experimental::build_options -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) +// CHECK: 0 | struct sycl::ext::oneapi::experimental::build_options +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_base (base) (empty) +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag (base) (empty) +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag_base (base) (empty) // CHECK-NEXT: 0 | class std::vector > opts // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl @@ -25,9 +26,10 @@ // CHECK-NEXT: 0 | class {{(std::__new_allocator|__gnu_cxx::new_allocator)}} > (base) (empty) // CHECK-NEXT: 0 | {{(struct std::_Vector_base, class std::allocator > >::_Vector_impl_data \(base\)|pointer _M_start)}} -// CHECK: 0 | struct sycl::ext::oneapi::experimental::include_files -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) +// CHECK: 0 | struct sycl::ext::oneapi::experimental::include_files +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_base (base) (empty) +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag (base) (empty) +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag_base (base) (empty) // CHECK-NEXT: 0 | class std::vector, class std::basic_string > > record // CHECK-NEXT: 0 | struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > >::_Vector_impl _M_impl @@ -35,9 +37,10 @@ // CHECK-NEXT: 0 | class {{(std::__new_allocator|__gnu_cxx::new_allocator)}}, class std::basic_string > > (base) (empty) // CHECK-NEXT: 0 | {{(struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > >::_Vector_impl_data \(base\)|pointer _M_start)}} -// CHECK: 0 | struct sycl::ext::oneapi::experimental::registered_kernel_names -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) +// CHECK: 0 | struct sycl::ext::oneapi::experimental::registered_kernel_names +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_base (base) (empty) +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag (base) (empty) +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag_base (base) (empty) // CHECK-NEXT: 0 | class std::vector > kernel_names // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl diff --git a/sycl/test/extensions/kernel_compiler_constraints.cpp b/sycl/test/extensions/kernel_compiler_constraints.cpp index d497c042060b9..a68ee6d96ecc3 100644 --- a/sycl/test/extensions/kernel_compiler_constraints.cpp +++ b/sycl/test/extensions/kernel_compiler_constraints.cpp @@ -8,15 +8,20 @@ // RUN: %clangxx -fsyntax-only -fsycl -Xclang -verify -Xclang -verify-ignore-unexpected=note %s -// kernel_bundles sporting the new bundle_state::ext_oneapi_source should NOT +// kernel_bundles supporting the new bundle_state::ext_oneapi_source should NOT // support several member functions. This test confirms that. #include +namespace syclex = sycl::ext::oneapi::experimental; + +struct some_property : syclex::new_properties::detail::property_base { + static constexpr std::string_view property_name{"::some_property"}; +}; + int main() { #ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER - namespace syclex = sycl::ext::oneapi::experimental; using source_kb = sycl::kernel_bundle; sycl::queue q; @@ -75,34 +80,33 @@ int main() { syclex::build(kbSrc); // expected-error@+1 {{no matching function for call to 'build'}} - syclex::build(kbSrc, - syclex::properties{syclex::usm_kind}); + syclex::build(kbSrc, syclex::new_properties::properties{some_property{}}); // OK - syclex::build(kbSrc, syclex::properties{syclex::build_options{flags}, + syclex::build(kbSrc, syclex::new_properties::properties{syclex::build_options{flags}, syclex::save_log{&log}}); // expected-error@+1 {{no matching function for call to 'build'}} - syclex::build(kbSrc, syclex::properties{ + syclex::build(kbSrc, syclex::new_properties::properties{ syclex::build_options{flags}, syclex::save_log{&log}, - syclex::usm_kind}); + some_property{}}); // OK syclex::build(kbSrc, devices); // expected-error@+1 {{no matching function for call to 'build'}} syclex::build(kbSrc, devices, - syclex::properties{syclex::usm_kind}); + syclex::new_properties::properties{some_property{}}); // OK syclex::build( kbSrc, devices, - syclex::properties{syclex::build_options{flags}, syclex::save_log{&log}}); + syclex::new_properties::properties{syclex::build_options{flags}, syclex::save_log{&log}}); // expected-error@+1 {{no matching function for call to 'build'}} syclex::build(kbSrc, devices, - syclex::properties{syclex::build_options{flags}, - syclex::save_log{&log}, - syclex::usm_kind}); + syclex::new_properties::properties{syclex::build_options{flags}, + syclex::save_log{&log}, + some_property{}}); #endif } diff --git a/sycl/test/extensions/properties/new_properties.cpp b/sycl/test/extensions/properties/new_properties.cpp index 04854e0225304..d3a64eaeeb7fe 100644 --- a/sycl/test/extensions/properties/new_properties.cpp +++ b/sycl/test/extensions/properties/new_properties.cpp @@ -114,6 +114,16 @@ namespace test_static_get_property { } } +namespace test_trait { +struct prop : named_property_base {}; +struct prop2 : named_property_base {}; +constexpr properties pl{prop{}}; +using ty = std::remove_const_t; +static_assert(all_properties_in_v); +static_assert(all_properties_in_v); +static_assert(!all_properties_in_v); +} // namespace test_trait + int main() { test::test(); bench::test(std::make_integer_sequence{}); From bfe15cfa0c79d87a557d7e6db9aeef3a08903703 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Tue, 22 Oct 2024 14:43:57 -0700 Subject: [PATCH 16/32] Show how LLVM IR attributes will be implemented in new approach --- .../sycl/ext/oneapi/properties/properties.hpp | 4 ++ .../properties/new_properties_llvm_ir.cpp | 53 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 sycl/test/extensions/properties/new_properties_llvm_ir.cpp diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 42d7b7aad9c1e..28172b150db45 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -120,6 +120,10 @@ struct property_base : property_key_tag { // For key_t access in error reporting specialization. template friend class __SYCL_EBO new_properties::properties; + +public: + static constexpr const char *ir_attribute_name = ""; + static constexpr std::nullptr_t ir_attribute_value = nullptr; }; template diff --git a/sycl/test/extensions/properties/new_properties_llvm_ir.cpp b/sycl/test/extensions/properties/new_properties_llvm_ir.cpp new file mode 100644 index 0000000000000..2143e011649e5 --- /dev/null +++ b/sycl/test/extensions/properties/new_properties_llvm_ir.cpp @@ -0,0 +1,53 @@ +// RUN: %clangxx -fsycl -fsycl-device-only -O0 -emit-llvm -S %s -o - | FileCheck %s + +// CHECK: @fg_int = linkonce_odr dso_local addrspace(1) global %struct.fake_device_global { i32 43 }, align 4 #[[ATTR:[0-9]*]] +// CHECK: attributes #[[ATTR]] = { "llvm-ir-prop"="42" } + +#include + +using namespace sycl::ext::oneapi::experimental::new_properties; + +template constexpr auto generate_property_key_name() { +#if defined(__clang__) || defined(__GNUC__) + return __PRETTY_FUNCTION__; +#elif defined(_MSC_VER) + return __FUNCSIG__; +#else +#error "Unsupported compiler" +#endif +} + +template +struct named_property_base + : public detail::property_base { + static constexpr std::string_view property_name = + generate_property_key_name(); +}; + +template struct prop : named_property_base, struct prop_key> { + static constexpr const char *ir_attribute_name = "llvm-ir-prop"; + static constexpr int ir_attribute_value = N; +}; + +struct property_withour_ir_attribute + : named_property_base {}; + +template struct fake_device_global; +template +struct + [[__sycl_detail__::global_variable_allowed, + __sycl_detail__::add_ir_attributes_global_variable( + property_tys::ir_attribute_name..., + property_tys:: + ir_attribute_value...)]] fake_device_global>> { + T value; +}; + +constexpr auto pl = properties{prop<42>{}, property_withour_ir_attribute{}}; +using pl_t = std::remove_const_t; + +fake_device_global fg_int{43}; + +SYCL_EXTERNAL auto foo() { (void)fg_int; } From c4f184302ad8c76232f21cba5eeb4f56dcbd5d60 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Tue, 22 Oct 2024 16:05:32 -0700 Subject: [PATCH 17/32] Add operator+ --- .../sycl/ext/oneapi/properties/properties.hpp | 81 +++++++++++++++---- .../extensions/properties/new_properties.cpp | 23 ++++++ 2 files changed, 88 insertions(+), 16 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 28172b150db45..1f5d6432436f9 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -31,12 +31,28 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { namespace new_properties { +namespace detail { +template struct properties_type_list; + +// Is used to implement `is_property_v`. +struct property_key_tag_base {}; +} template class __SYCL_EBO properties; +template struct is_property_list : std::false_type {}; +template +struct is_property_list> : std::true_type {}; +template +inline constexpr bool is_property_list_v = is_property_list::value; + +template +inline constexpr bool is_property_v = + std::is_base_of_v && + !is_property_list_v; + namespace detail { -template struct properties_type_list; #if __has_builtin(__type_pack_element) template @@ -94,9 +110,6 @@ struct properties_sorter, nth_type_t...>; }; -// Is used to implement `is_property_v`. -struct property_key_tag_base {}; - // We support incomplete property_key_t, so need to wrap it. template struct property_key_tag : property_key_tag_base {}; @@ -124,6 +137,27 @@ struct property_base : property_key_tag { public: static constexpr const char *ir_attribute_name = ""; static constexpr std::nullptr_t ir_attribute_value = nullptr; + + // this_property_t is to disable ADL - properties{property{}} is inherited + // from property. + + template + friend constexpr std::enable_if_t< + std::is_same_v && + is_property_v, + decltype(properties{std::declval(), + std::declval()})> + operator+(const this_property_t &lhs, const other_property_t &rhs) { + return properties{lhs, rhs}; + } + + template + friend constexpr std::enable_if_t< + std::is_same_v, + properties>> + operator+(const this_property_t &lhs) { + return properties>{lhs}; + } }; template @@ -162,21 +196,19 @@ inline constexpr bool properties_are_sorted = []() constexpr { }(); } // namespace detail -template struct is_property_list : std::false_type {}; -template -struct is_property_list> : std::true_type {}; -template -inline constexpr bool is_property_list_v = is_property_list::value; - -template -inline constexpr bool is_property_v = - std::is_base_of_v && - !is_property_list_v; - // Empty property list. template <> class __SYCL_EBO properties, void> { public: template static constexpr bool has_property() { return false; } + + // TODO: How does this work without qualified name? + template + friend constexpr std::enable_if_t< + is_property_v, + properties>> + operator+(const properties &, const other_property_t &rhs) { + return properties{rhs}; + } }; // Base implementation to provide nice user error in case of mis-use. Without it @@ -230,10 +262,13 @@ class __SYCL_EBO properties< public: template && ...))>> + ((is_property_v && ...)) && + sizeof...(unsorted_property_tys) == sizeof...(property_tys)>> constexpr properties(unsorted_property_tys... props) : unsorted_property_tys(props)... {} + // TODO: not sure if that is needed if we'd have operator| or operator+. + // TODO: sizeof... check. template < typename... other_property_list_tys, typename... other_property_tys, typename = std::enable_if_t<((is_property_v && ...))>> @@ -295,6 +330,20 @@ class __SYCL_EBO properties< else return default_property; } + + // TODO: Use more effective insert sort for single-property insertion. + + // Need to use qualified type to force CTAD instead of using *current* + // properties instantiation. + template + friend constexpr std::enable_if_t< + is_property_v, + decltype(ext::oneapi::experimental::new_properties::properties{ + std::declval()..., std::declval()})> + operator+(const properties &lhs, const other_property_t &rhs) { + return ext::oneapi::experimental::new_properties::properties{ + static_cast(lhs)..., rhs}; + } }; template ); static_assert(!all_properties_in_v); } // namespace test_trait +namespace test_combine_op { +struct prop : named_property_base {}; +struct prop2 : named_property_base {}; +using pl = decltype(properties{prop{}, prop2{}}); + +static_assert(std::is_same_v); +static_assert(std::is_same_v); +static_assert(std::is_same_v); +static_assert( + std::is_same_v); +static_assert( + std::is_same_v); + +static_assert(std::is_same_v); +static_assert(std::is_same_v); + +static_assert(std::is_same_v); +static_assert(std::is_same_v); + +static_assert(std::is_same_v); +} + int main() { test::test(); bench::test(std::make_integer_sequence{}); From 497b80d9fc79ea023127a8c68e0a53cb62c41c5e Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Tue, 22 Oct 2024 16:14:04 -0700 Subject: [PATCH 18/32] Fix inheritance --- .../sycl/ext/oneapi/properties/properties.hpp | 4 +++- .../extensions/properties/new_properties.cpp | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 1f5d6432436f9..1c8b204baa67e 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -252,13 +252,15 @@ template class __SYCL_EBO properties< detail::properties_type_list, std::enable_if_t>> - : public property_tys... { + : private property_tys... { static_assert((is_property_v && ...)); static_assert( detail::properties_are_sorted, "Properties must be sorted!"); using property_tys::get_property...; + template friend class __SYCL_EBO properties; + public: template ); static_assert(std::is_same_v); } +namespace test_inheritance_visibility { +template struct prop : named_property_base> { + static constexpr int value = N; +}; + +template struct has_value : std::false_type {}; +template +struct has_value> : std::true_type {}; +template inline constexpr bool has_value_v = has_value::value; + +constexpr properties pl1{prop<1>{}}; +constexpr properties pl2{prop<1>{}, prop<2>{}}; + +static_assert(has_value_v>); +static_assert(!has_value_v); +static_assert(!has_value_v); +} + int main() { test::test(); bench::test(std::make_integer_sequence{}); From b0b59ad9095817cfe2c4e85b2aa542a37f5d30f6 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Wed, 23 Oct 2024 11:28:55 -0700 Subject: [PATCH 19/32] Open question --- .../sycl/ext/oneapi/properties/properties.hpp | 30 ++- .../new_properties_open_question.cpp | 185 ++++++++++++++++++ 2 files changed, 205 insertions(+), 10 deletions(-) create mode 100644 sycl/test/extensions/properties/new_properties_open_question.cpp diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 1c8b204baa67e..e38414a117aa0 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -118,7 +118,7 @@ template struct property_base : property_key_tag { protected: using key_t = property_key_t; - constexpr property_t get_property(property_key_tag) const { + constexpr property_t get_property_impl(property_key_tag) const { // In fact, `static_cast` below works just fine with clang/msvc but not with // gcc, see https://godbolt.org/z/MY6849jGh for a reduced test. However, we // need to support all ,so special case for compile-time properties (when @@ -257,7 +257,7 @@ class __SYCL_EBO properties< static_assert( detail::properties_are_sorted, "Properties must be sorted!"); - using property_tys::get_property...; + using property_tys::get_property_impl...; template friend class __SYCL_EBO properties; @@ -296,30 +296,30 @@ class __SYCL_EBO properties< // Two methods below do the following (pseudocode): // // template - // using ret_t = decltype(this->get_property(key_tag{})); + // using ret_t = decltype(this->get_property_impl(key_tag{})); // static constexpr auto get_property() requires(is_empty_v) { // return ret_t{}; // } // constexpr auto get_property() const requires(!is_empty_v) { - // return get_property(key_tag{}); + // return get_property_impl(key_tag{}); // } template static constexpr auto get_property() -> std::enable_if_t< - std::is_empty_v().get_property( + std::is_empty_v().get_property_impl( detail::property_key_tag{}))>, - decltype(std::declval().get_property( + decltype(std::declval().get_property_impl( detail::property_key_tag{}))> { - return decltype(std::declval().get_property( + return decltype(std::declval().get_property_impl( detail::property_key_tag{})){}; } template constexpr auto get_property() const -> std::enable_if_t< - !std::is_empty_v().get_property( + !std::is_empty_v().get_property_impl( detail::property_key_tag{}))>, - decltype(std::declval().get_property( + decltype(std::declval().get_property_impl( detail::property_key_tag{}))> { - return get_property(detail::property_key_tag{}); + return get_property_impl(detail::property_key_tag{}); } // TODO: Do we need separate `static` overload if we decide to keep this @@ -346,6 +346,16 @@ class __SYCL_EBO properties< return ext::oneapi::experimental::new_properties::properties{ static_cast(lhs)..., rhs}; } + + template + friend constexpr auto + operator+(const properties &lhs, + const ext::oneapi::experimental::new_properties::properties< + detail::properties_type_list> &rhs) { + return ext::oneapi::experimental::new_properties::properties{ + static_cast(lhs)..., + static_cast(rhs)...}; + } }; template + +using namespace sycl::ext::oneapi::experimental::new_properties; + +template constexpr auto generate_property_key_name() { +#if defined(__clang__) || defined(__GNUC__) + return __PRETTY_FUNCTION__; +#elif defined(_MSC_VER) + return __FUNCSIG__; +#else +#error "Unsupported compiler" +#endif +} + +template +struct named_property_base + : public detail::property_base { + static constexpr std::string_view property_name = + generate_property_key_name(); +}; + +struct prop_key {}; + +template auto foo(PropList props = {}) { + static_assert(props.template has_property()); + static_assert(props.template get_property().value == 42); +} + +namespace approach_one { +// Don't provide `inline constexpr` property variables at all, they don't +// provide enough benefits now that `property_value` thing was eliminated. + +template struct prop : named_property_base, prop_key> { + static constexpr int value = N; +}; +struct other_prop : named_property_base {}; + +void bar() { + foo(properties{prop<42>{}}); + foo(properties{prop<42>{}, other_prop{}}); +} +} // namespace approach_one + +namespace approach_two { +// Keep providing `inline constexpr` "shortcuts". +template +struct prop_property : named_property_base, prop_key> { + static constexpr int value = N; +}; +struct other_prop_property : named_property_base {}; + +template inline constexpr prop_property prop{}; +inline constexpr other_prop_property other_prop{}; + +void bar() { + // Still need `properties` here. + foo(properties{prop<42>}); + foo(properties{prop<42>, other_prop}); +} +} // namespace approach_two +namespace approach_three { +// Use operator+ to create `properties` property list from individual +// properties. +template +struct prop_property : named_property_base, prop_key> { + static constexpr int value = N; +}; +struct other_prop_property : named_property_base {}; + +template inline constexpr prop_property prop{}; +inline constexpr other_prop_property other_prop{}; + +void bar() { + // Unary `+` has a bit of hacky feeling... + foo(+prop<42>); + // More than one property in a property list looks very natural. + // Alternatively, that can be `operator|` but it has no unary version. + foo(prop<42> + other_prop); +} +} // namespace approach_three + +namespace approach_four { +// "Duck-typing" - make individual properties behave almost like a property +// list. + +// This will be part of the implementation's `detail::property_base` but I'm +// keeping it in the test for now until the future direction is chosen. +template +struct adjusted_property_base + : public named_property_base { + template static constexpr bool has_property() { + return std::is_same_v; + } + + // Technically it should be two version static/non-static depending on + // `std::is_empty_v`, skipped here for brevity. + template >> + static constexpr property_t get_property() { + return property_t{}; + } +}; + +template +struct prop_property : adjusted_property_base, prop_key> { + static constexpr int value = N; +}; +struct other_prop_property : adjusted_property_base {}; + +template inline constexpr prop_property prop{}; +inline constexpr other_prop_property other_prop{}; + +void bar() { + // "Duck-typing" here. + foo(prop<42>); + + // Now nothing prevents us from using `operator|` here that someone might + // found more natural. Not doing for simplicity for now. + foo(prop<42> + other_prop); +} + +// Problem: +template struct fake_device_global; +template +struct + [[__sycl_detail__::global_variable_allowed, + __sycl_detail__::add_ir_attributes_global_variable( + // This attribute needs pack expansion and can't be made working with + // "duck-typing". + property_tys::ir_attribute_name..., + property_tys:: + ir_attribute_value...)]] fake_device_global>> { + T value; +}; + +// Needs this: +fake_device_global})> fg_int1{43}; + +#if 0 +// This fails. Note that "decltype" might be implicit through CTAD. +fake_device_global)> fg_int{43}; +#endif +} // namespace approach_four + +namespace approach_five { +// Same as approach three but make shortcuts property lists instead of +// individual properties. +template +struct prop_property : named_property_base, prop_key> { + static constexpr int value = N; +}; +struct other_prop_property : named_property_base {}; + +template inline constexpr auto prop = properties{prop_property{}}; +inline constexpr auto other_prop = properties{other_prop_property{}}; + +void bar() { + foo(prop<42>); + foo(prop<42> + other_prop); +} + +// Problem: +struct rt_prop_property : named_property_base { + int x; +}; + +// What should we do in the shortcut? On the other hand, maybe it's a more +// generic problem and such shortcuts are impossible/meaningless for run-time +// properties anyway. + +// Another side of the problem: +template inline constexpr auto expected_behavior = prop_property{}; +static_assert(expected_behavior<42>.value == 42); + +#if 0 +// This fails, `properties` don't expose individual property's interfaces. +// Approach six - modify `properties` to use "public" inheritance from a +// property + expose its ctors. +static_assert(prop<42>.value == 42); +#endif +} From d0509f6a5ee4ddcf1cba7f9e52f657a81778fe76 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Wed, 23 Oct 2024 11:59:08 -0700 Subject: [PATCH 20/32] clang-format + minor comment addition --- .../sycl/ext/oneapi/properties/properties.hpp | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index e38414a117aa0..414b55dddec76 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -9,9 +9,9 @@ #pragma once #include +#include #include #include -#include #include #include @@ -26,7 +26,6 @@ #include // for enable_if_t #include // for tuple - namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { @@ -36,7 +35,7 @@ template struct properties_type_list; // Is used to implement `is_property_v`. struct property_key_tag_base {}; -} +} // namespace detail template class __SYCL_EBO properties; @@ -114,6 +113,15 @@ struct properties_sorter, template struct property_key_tag : property_key_tag_base {}; +// NOTE: each property_t subclass must provide +// +// static constexpr std::string_view +// property_name{" struct property_base : property_key_tag { protected: @@ -254,9 +262,8 @@ class __SYCL_EBO properties< std::enable_if_t>> : private property_tys... { static_assert((is_property_v && ...)); - static_assert( - detail::properties_are_sorted, - "Properties must be sorted!"); + static_assert(detail::properties_are_sorted, + "Properties must be sorted!"); using property_tys::get_property_impl...; template friend class __SYCL_EBO properties; @@ -379,7 +386,7 @@ properties(properties>, using empty_properties_t = decltype(properties{}); template -struct all_properties_in : std::false_type{}; +struct all_properties_in : std::false_type {}; template struct all_properties_in< properties>, From ad4d84782aa1361f1a12cafc94a0dc49b79c2ca0 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 25 Oct 2024 13:59:14 -0700 Subject: [PATCH 21/32] Revert changes in existing properties, still working on POC --- .../oneapi/experimental/group_load_store.hpp | 111 ++--- sycl/include/sycl/kernel_bundle.hpp | 93 ++-- .../GroupAlgorithm/load_store/basic.cpp | 6 +- .../load_store/odd_sized_type.cpp | 6 +- .../GroupAlgorithm/load_store/odd_wg_size.cpp | 6 +- .../GroupAlgorithm/load_store/partial_sg.cpp | 6 +- .../KernelCompiler/kernel_compiler_opencl.cpp | 7 +- .../KernelCompiler/kernel_compiler_sycl.cpp | 15 +- .../KernelCompiler/sycl_device_flags.cpp | 4 +- .../abi/sycl_classes_abi_neutral_test.cpp | 21 +- sycl/test/check_device_code/group_load.cpp | 382 +++++++-------- sycl/test/check_device_code/group_store.cpp | 454 +++++++++--------- .../kernel_compiler_constraints.cpp | 28 +- 13 files changed, 565 insertions(+), 574 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp b/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp index 69be227d9d9e1..a4af373753c10 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp @@ -19,56 +19,45 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { -// Data placement property + enum class data_placement_enum { blocked, striped }; -template -struct data_placement_property : new_properties::detail::property_base< - data_placement_property, - struct data_placement_property_key> { - static constexpr std::string_view property_name{ - "sycl::ext::oneapi::experimental::data_placement_property_key"}; - - static constexpr bool is_blocked() { - return Placement == data_placement_enum::blocked; - } +struct data_placement_key + : detail::compile_time_property_key { + template + using value_t = + property_value(Placement)>>; }; template -inline constexpr data_placement_property data_placement; - -inline constexpr auto data_placement_blocked = - data_placement; -inline constexpr auto data_placement_striped = - data_placement; - -// Contiguous memory property -struct contiguous_memory_property - : new_properties::detail::property_base { - static constexpr std::string_view property_name{ - "sycl::ext::oneapi::experimental::contiguous_memory_property"}; +inline constexpr data_placement_key::value_t data_placement; + +inline constexpr data_placement_key::value_t + data_placement_blocked; +inline constexpr data_placement_key::value_t + data_placement_striped; + +struct contiguous_memory_key + : detail::compile_time_property_key { + using value_t = property_value; }; -inline constexpr contiguous_memory_property contiguous_memory; +inline constexpr contiguous_memory_key::value_t contiguous_memory; -// Full group property -struct full_group_property - : new_properties::detail::property_base { - static constexpr std::string_view property_name{ - "sycl::ext::oneapi::experimental::full_group_property"}; +struct full_group_key + : detail::compile_time_property_key { + using value_t = property_value; }; -inline constexpr full_group_property full_group; +inline constexpr full_group_key::value_t full_group; namespace detail { -// Naive implementation property -struct naive_property - : new_properties::detail::property_base { - static constexpr std::string_view property_name{ - "sycl::ext::oneapi::experimental::naive_property"}; +struct naive_key : detail::compile_time_property_key { + using value_t = property_value; }; - -inline constexpr naive_property naive; +inline constexpr naive_key::value_t naive; using namespace sycl::detail; } // namespace detail @@ -105,10 +94,11 @@ inline constexpr bool verify_store_types = std::is_default_constructible_v; template constexpr bool isBlocked(Properties properties) { - return properties - .template get_property_or_default_to( - data_placement_blocked) - .is_blocked(); + if constexpr (properties.template has_property()) + return properties.template get_property() == + data_placement_blocked; + else + return true; } template @@ -201,13 +191,13 @@ auto get_block_op_ptr(IteratorT iter, [[maybe_unused]] Properties props) { if constexpr (!is_spir || !BlkInfo::has_builtin) { return nullptr; - } else if constexpr (!props.template has_property()) { + } else if constexpr (!props.template has_property()) { return nullptr; } else if constexpr (detail::is_multi_ptr_v) { return get_block_op_ptr( iter.get_decorated(), props); } else if constexpr (!std::is_pointer_v) { - if constexpr (props.template has_property()) + if constexpr (props.template has_property()) return get_block_op_ptr(&*iter, props); else @@ -241,29 +231,30 @@ auto get_block_op_ptr(IteratorT iter, [[maybe_unused]] Properties props) { // Load API span overload. template + typename Properties = decltype(properties())> std::enable_if_t && detail::is_generic_group_v> group_load(Group g, InputIteratorT in_ptr, span out, Properties props = {}) { constexpr bool blocked = detail::isBlocked(props); + using use_naive = + detail::merged_properties_t; - if constexpr (props.template has_property()) { + if constexpr (props.template has_property()) { group_barrier(g); for (int i = 0; i < out.size(); ++i) out[i] = in_ptr[detail::get_mem_idx(g, i)]; group_barrier(g); return; } else if constexpr (!std::is_same_v) { - return group_load(g, in_ptr, out, - new_properties::properties{props, detail::naive}); + return group_load(g, in_ptr, out, use_naive{}); } else { auto ptr = detail::get_block_op_ptr<4 /* load align */, ElementsPerWorkItem>( in_ptr, props); if (!ptr) - return group_load(g, in_ptr, out, - new_properties::properties{props, detail::naive}); + return group_load(g, in_ptr, out, use_naive{}); if constexpr (!std::is_same_v) { // Do optimized load. @@ -312,30 +303,30 @@ group_load(Group g, InputIteratorT in_ptr, // Store API span overload. template + typename Properties = decltype(properties())> std::enable_if_t && detail::is_generic_group_v> group_store(Group g, const span in, OutputIteratorT out_ptr, Properties props = {}) { constexpr bool blocked = detail::isBlocked(props); + using use_naive = + detail::merged_properties_t; - - if constexpr (props.template has_property()) { + if constexpr (props.template has_property()) { group_barrier(g); for (int i = 0; i < in.size(); ++i) out_ptr[detail::get_mem_idx(g, i)] = in[i]; group_barrier(g); return; } else if constexpr (!std::is_same_v) { - return group_store(g, in, out_ptr, - new_properties::properties{props, detail::naive}); + return group_store(g, in, out_ptr, use_naive{}); } else { auto ptr = detail::get_block_op_ptr<16 /* store align */, ElementsPerWorkItem>( out_ptr, props); if (!ptr) - return group_store(g, in, out_ptr, - new_properties::properties{props, detail::naive}); + return group_store(g, in, out_ptr, use_naive{}); if constexpr (!std::is_same_v) { // Do optimized store. @@ -359,7 +350,7 @@ group_store(Group g, const span in, // Load API scalar. template + typename Properties = decltype(properties())> std::enable_if_t && detail::is_generic_group_v> group_load(Group g, InputIteratorT in_ptr, OutputT &out, @@ -369,7 +360,7 @@ group_load(Group g, InputIteratorT in_ptr, OutputT &out, // Store API scalar. template + typename Properties = decltype(properties())> std::enable_if_t && detail::is_generic_group_v> group_store(Group g, const InputT &in, OutputIteratorT out_ptr, @@ -379,7 +370,7 @@ group_store(Group g, const InputT &in, OutputIteratorT out_ptr, // Load API sycl::vec overload. template + typename Properties = decltype(properties())> std::enable_if_t && detail::is_generic_group_v> group_load(Group g, InputIteratorT in_ptr, sycl::vec &out, @@ -389,7 +380,7 @@ group_load(Group g, InputIteratorT in_ptr, sycl::vec &out, // Store API sycl::vec overload. template + typename Properties = decltype(properties())> std::enable_if_t && detail::is_generic_group_v> group_store(Group g, const sycl::vec &in, OutputIteratorT out_ptr, diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 33bd41c3fc6eb..1237bc0651b40 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -892,14 +892,16 @@ build(const kernel_bundle &InputBundle, namespace ext::oneapi::experimental { +namespace detail { +struct create_bundle_from_source_props; +struct build_source_bundle_props; +} // namespace detail + ///////////////////////// // PropertyT syclex::include_files ///////////////////////// struct include_files - : new_properties::detail::property_base { - static constexpr std::string_view property_name{ - "sycl::ext::oneapi::experimental::include_files"}; - + : detail::run_time_property_key { include_files(); include_files(const std::string &name, const std::string &content) { record.emplace_back(std::make_pair(name, content)); @@ -909,35 +911,46 @@ struct include_files } std::vector> record; }; +using include_files_key = include_files; + +template <> +struct is_property_key_of + : std::true_type {}; ///////////////////////// // PropertyT syclex::build_options ///////////////////////// -struct build_options : new_properties::detail::property_base { - static constexpr std::string_view property_name{ - "sycl::ext::oneapi::experimental::build_options"}; +struct build_options + : detail::run_time_property_key { std::vector opts; build_options(const std::string &optsArg) : opts{optsArg} {} build_options(const std::vector &optsArg) : opts(optsArg) {} }; +using build_options_key = build_options; + +template <> +struct is_property_key_of + : std::true_type {}; ///////////////////////// // PropertyT syclex::save_log ///////////////////////// -struct save_log : new_properties::detail::property_base { - static constexpr std::string_view property_name{ - "sycl::ext::oneapi::experimental::save_log"}; +struct save_log : detail::run_time_property_key { std::string *log; save_log(std::string *logArg) : log(logArg) {} }; +using save_log_key = save_log; + +template <> +struct is_property_key_of + : std::true_type {}; ///////////////////////// // PropertyT syclex::registered_kernel_names ///////////////////////// struct registered_kernel_names - : new_properties::detail::property_base { - static constexpr std::string_view property_name{ - "sycl::ext::oneapi::experimental::registered_kernel_names"}; + : detail::run_time_property_key { std::vector kernel_names; registered_kernel_names() {} registered_kernel_names(const std::string &knArg) : kernel_names{knArg} {} @@ -945,17 +958,14 @@ struct registered_kernel_names : kernel_names(knsArg) {} void add(const std::string &name) { kernel_names.push_back(name); } }; +using registered_kernel_names_key = registered_kernel_names; -namespace detail { -template -inline constexpr bool are_properties_valid_for_create_bundle_from_source = - new_properties::all_properties_in_v; - -template -inline constexpr bool are_properties_valid_for_build_source_bundle = - new_properties::all_properties_in_v; +template <> +struct is_property_key_of : std::true_type { +}; +namespace detail { // forward decls __SYCL_EXPORT bool is_source_kernel_bundle_supported(backend BE, source_language Language); @@ -1044,10 +1054,12 @@ build_from_source(kernel_bundle &SourceKB, ///////////////////////// // syclex::create_kernel_bundle_from_source ///////////////////////// -template >> +template < + typename PropertyListT = empty_properties_t, + typename = std::enable_if_t< + is_property_list_v && + detail::all_props_are_keys_of::value>> kernel_bundle create_kernel_bundle_from_source( const context &SyclContext, source_language Language, const std::string &Source, PropertyListT props = {}) { @@ -1061,10 +1073,12 @@ kernel_bundle create_kernel_bundle_from_source( } #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) -template >> +template < + typename PropertyListT = empty_properties_t, + typename = std::enable_if_t< + is_property_list_v && + detail::all_props_are_keys_of::value>> kernel_bundle create_kernel_bundle_from_source( const context &SyclContext, source_language Language, const std::vector &Bytes, PropertyListT props = {}) { @@ -1082,10 +1096,12 @@ kernel_bundle create_kernel_bundle_from_source( // syclex::build(source_kb) => exe_kb ///////////////////////// -template < - typename PropertyListT = new_properties::empty_properties_t, - typename = std::enable_if_t< - detail::are_properties_valid_for_build_source_bundle>> +template && + detail::all_props_are_keys_of::value>> + kernel_bundle build(kernel_bundle &SourceKB, const std::vector &Devices, PropertyListT props = {}) { @@ -1106,10 +1122,11 @@ build(kernel_bundle &SourceKB, RegisteredKernelNamesVec); } -template < - typename PropertyListT = new_properties::empty_properties_t, - typename = std::enable_if_t< - detail::are_properties_valid_for_build_source_bundle>> +template && + detail::all_props_are_keys_of::value>> kernel_bundle build(kernel_bundle &SourceKB, PropertyListT props = {}) { diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/basic.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/basic.cpp index e79b1c58e3a8d..1028da48f6051 100644 --- a/sycl/test-e2e/GroupAlgorithm/load_store/basic.cpp +++ b/sycl/test-e2e/GroupAlgorithm/load_store/basic.cpp @@ -49,10 +49,8 @@ int main() { int data[elems_per_wi]; - auto blocked = sycl_exp::new_properties::properties{ - sycl_exp::data_placement_blocked}; - auto striped = sycl_exp::new_properties::properties{ - sycl_exp::data_placement_striped}; + auto blocked = sycl_exp::properties{sycl_exp::data_placement_blocked}; + auto striped = sycl_exp::properties{sycl_exp::data_placement_striped}; // default sycl_exp::group_load(g, input.begin() + offset, span{data}); diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/odd_sized_type.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/odd_sized_type.cpp index 7f97942b36f44..98cdc1eb9f47a 100644 --- a/sycl/test-e2e/GroupAlgorithm/load_store/odd_sized_type.cpp +++ b/sycl/test-e2e/GroupAlgorithm/load_store/odd_sized_type.cpp @@ -64,10 +64,8 @@ int main() { S data[elems_per_wi]; - auto blocked = sycl_exp::new_properties::properties{ - sycl_exp::data_placement_blocked}; - auto striped = sycl_exp::new_properties::properties{ - sycl_exp::data_placement_striped}; + auto blocked = sycl_exp::properties{sycl_exp::data_placement_blocked}; + auto striped = sycl_exp::properties{sycl_exp::data_placement_striped}; // blocked sycl_exp::group_load(g, input.begin(), span{data}, blocked); diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp index 7aaa21ecec960..adce5e9f588bc 100644 --- a/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp +++ b/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp @@ -48,10 +48,8 @@ template void test(queue &q) { int data[elems_per_wi]; - auto blocked = sycl_exp::new_properties::properties{ - sycl_exp::data_placement_blocked}; - auto striped = sycl_exp::new_properties::properties{ - sycl_exp::data_placement_striped}; + auto blocked = sycl_exp::properties{sycl_exp::data_placement_blocked}; + auto striped = sycl_exp::properties{sycl_exp::data_placement_striped}; // blocked sycl_exp::group_load(g, input.begin() + offset, span{data}, blocked); diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp index 2012db6183500..72b87364f0ee9 100644 --- a/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp +++ b/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp @@ -45,10 +45,8 @@ template void test(queue &q) { int data[elems_per_wi]; - auto blocked = sycl_exp::new_properties::properties{ - sycl_exp::data_placement_blocked}; - auto striped = sycl_exp::new_properties::properties{ - sycl_exp::data_placement_striped}; + auto blocked = sycl_exp::properties{sycl_exp::data_placement_blocked}; + auto striped = sycl_exp::properties{sycl_exp::data_placement_striped}; // blocked sycl_exp::group_load(sg, input.begin() + offset, span{data}, blocked); diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp index 7702d76ec482b..cfe2824ec0564 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_opencl.cpp @@ -105,10 +105,9 @@ void test_build_and_run() { sycl::backend beRes = kbSrc.get_backend(); assert(beRes == ctx.get_backend()); - exe_kb kbExe2 = - syclex::build(kbSrc, devs, - syclex::new_properties::properties{ - syclex::build_options{flags}, syclex::save_log{&log}}); + exe_kb kbExe2 = syclex::build( + kbSrc, devs, + syclex::properties{syclex::build_options{flags}, syclex::save_log{&log}}); bool hasMyKernel = kbExe2.ext_oneapi_has_kernel("my_kernel"); bool hasHerKernel = kbExe2.ext_oneapi_has_kernel("her_kernel"); diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl.cpp index 878c932b1733b..511f713b7c95c 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl.cpp @@ -138,7 +138,7 @@ void test_build_and_run() { incFiles.add("intermediate/PlusEm.h", PlusEmH); source_kb kbSrc = syclex::create_kernel_bundle_from_source( ctx, syclex::source_language::sycl, SYCLSource, - syclex::new_properties::properties{incFiles}); + syclex::properties{incFiles}); // Double check kernel_bundle.get_source() / get_backend(). sycl::context ctxRes = kbSrc.get_context(); @@ -153,11 +153,10 @@ void test_build_and_run() { std::string log; std::vector flags{"-g", "-fno-fast-math"}; std::vector devs = kbSrc.get_devices(); - exe_kb kbExe2 = - syclex::build(kbSrc, devs, - syclex::new_properties::properties{ - syclex::build_options{flags}, syclex::save_log{&log}, - syclex::registered_kernel_names{"ff_templated"}}); + exe_kb kbExe2 = syclex::build( + kbSrc, devs, + syclex::properties{syclex::build_options{flags}, syclex::save_log{&log}, + syclex::registered_kernel_names{"ff_templated"}}); assert(log.find("warning: 'this_nd_item<1>' is deprecated") != std::string::npos); @@ -232,8 +231,8 @@ void test_esimd() { source_kb kbSrc = syclex::create_kernel_bundle_from_source( ctx, syclex::source_language::sycl, ESIMDSource); - exe_kb kbExe = syclex::build( - kbSrc, syclex::new_properties::properties{syclex::save_log{&log}}); + exe_kb kbExe = + syclex::build(kbSrc, syclex::properties{syclex::save_log{&log}}); // extern "C" was used, so the name "vector_add_esimd" is not mangled and can // be used directly. diff --git a/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp b/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp index 69eb3f9dd61e7..9982324f2fc76 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp @@ -129,8 +129,8 @@ int main(int argc, char *argv[]) { // Flags with and without space, inner quotes. std::vector flags{"-Xs '-doubleGRF'", "-Xs'-Xfinalizer \"-printregusage\"'"}; - exe_kb kbExe = syclex::build( - kbSrc, syclex::new_properties::properties{syclex::build_options{flags}}); + exe_kb kbExe = + syclex::build(kbSrc, syclex::properties{syclex::build_options{flags}}); sycl::kernel k = kbExe.ext_oneapi_get_kernel("add_thirty"); diff --git a/sycl/test/abi/sycl_classes_abi_neutral_test.cpp b/sycl/test/abi/sycl_classes_abi_neutral_test.cpp index 8f089946de3e9..071481d0d1be0 100644 --- a/sycl/test/abi/sycl_classes_abi_neutral_test.cpp +++ b/sycl/test/abi/sycl_classes_abi_neutral_test.cpp @@ -15,10 +15,9 @@ // New exclusions are NOT ALLOWED to this file unless it is guaranteed that data // member is not crossing ABI boundary. All current exclusions are listed below. -// CHECK: 0 | struct sycl::ext::oneapi::experimental::build_options -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_base (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag_base (base) (empty) +// CHECK: 0 | struct sycl::ext::oneapi::experimental::build_options +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) // CHECK-NEXT: 0 | class std::vector > opts // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl @@ -26,10 +25,9 @@ // CHECK-NEXT: 0 | class {{(std::__new_allocator|__gnu_cxx::new_allocator)}} > (base) (empty) // CHECK-NEXT: 0 | {{(struct std::_Vector_base, class std::allocator > >::_Vector_impl_data \(base\)|pointer _M_start)}} -// CHECK: 0 | struct sycl::ext::oneapi::experimental::include_files -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_base (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag_base (base) (empty) +// CHECK: 0 | struct sycl::ext::oneapi::experimental::include_files +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) // CHECK-NEXT: 0 | class std::vector, class std::basic_string > > record // CHECK-NEXT: 0 | struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > >::_Vector_impl _M_impl @@ -37,10 +35,9 @@ // CHECK-NEXT: 0 | class {{(std::__new_allocator|__gnu_cxx::new_allocator)}}, class std::basic_string > > (base) (empty) // CHECK-NEXT: 0 | {{(struct std::_Vector_base, class std::basic_string >, class std::allocator, class std::basic_string > > >::_Vector_impl_data \(base\)|pointer _M_start)}} -// CHECK: 0 | struct sycl::ext::oneapi::experimental::registered_kernel_names -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_base (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag (base) (empty) -// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::new_properties::detail::property_key_tag_base (base) (empty) +// CHECK: 0 | struct sycl::ext::oneapi::experimental::registered_kernel_names +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::run_time_property_key (base) (empty) +// CHECK-NEXT: 0 | struct sycl::ext::oneapi::experimental::detail::property_key_base_tag (base) (empty) // CHECK-NEXT: 0 | class std::vector > kernel_names // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > > (base) // CHECK-NEXT: 0 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl diff --git a/sycl/test/check_device_code/group_load.cpp b/sycl/test/check_device_code/group_load.cpp index 18ffb6d84eed8..f1f9650b69393 100644 --- a/sycl/test/check_device_code/group_load.cpp +++ b/sycl/test/check_device_code/group_load.cpp @@ -16,22 +16,22 @@ namespace oneapi_exp = sycl::ext::oneapi::experimental; using namespace sycl::ext::oneapi::experimental; using full_group_blocked = - decltype(new_properties::properties(full_group, data_placement_blocked)); + decltype(properties(full_group, data_placement_blocked)); -using naive_blocked = decltype(new_properties::properties( - oneapi_exp::detail::naive, data_placement_blocked)); +using naive_blocked = + decltype(properties(oneapi_exp::detail::naive, data_placement_blocked)); -using opt_blocked = decltype(new_properties::properties( - full_group, contiguous_memory, data_placement_blocked)); +using opt_blocked = + decltype(properties(full_group, contiguous_memory, data_placement_blocked)); using full_group_striped = - decltype(new_properties::properties(full_group, data_placement_striped)); + decltype(properties(full_group, data_placement_striped)); -using naive_striped = decltype(new_properties::properties( - oneapi_exp::detail::naive, data_placement_striped)); +using naive_striped = + decltype(properties(oneapi_exp::detail::naive, data_placement_striped)); -using opt_striped = decltype(new_properties::properties( - full_group, contiguous_memory, data_placement_striped)); +using opt_striped = + decltype(properties(full_group, contiguous_memory, data_placement_striped)); template using plain_global_ptr = typename sycl::detail::DecoratedType< @@ -41,15 +41,15 @@ using plain_global_ptr = typename sycl::detail::DecoratedType< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, naive_blocked>( sycl::sub_group, plain_global_ptr, int &, naive_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_RSL_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_SL_RSM_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4:[0-9]+]] // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -57,26 +57,26 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, opt_blocked>( sycl::sub_group, plain_global_ptr, int &, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_RSL_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_RSN_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) -// CHECK-NEXT: [[CALL6_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: store i32 [[CALL6_I]], ptr addrspace(4) [[OUT]], align 4 +// CHECK-NEXT: [[CALL4_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: store i32 [[CALL4_I]], ptr addrspace(4) [[OUT]], align 4 // CHECK-NEXT: ret void // Check that contiguous_memory can be auto-detected. template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, full_group_blocked>( sycl::sub_group, plain_global_ptr, int &, full_group_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESL_SJ_RSK_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.8") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_RSL_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) -// CHECK-NEXT: [[CALL6_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: store i32 [[CALL6_I]], ptr addrspace(4) [[OUT]], align 4 +// CHECK-NEXT: [[CALL4_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: store i32 [[CALL4_I]], ptr addrspace(4) [[OUT]], align 4 // CHECK-NEXT: ret void // SYCL 2020's accessor can't be statically known to be contiguous. @@ -87,19 +87,19 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load( sycl::sub_group, accessor_iter_t, int &, full_group_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiNS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_SL_RSM_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.8") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_RSN_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA12:![0-9]+]] +// CHECK-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA11:![0-9]+]] // CHECK-NEXT: [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14:![0-9]+]] +// CHECK-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13:![0-9]+]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[CONV3_I_I:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP1_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP1_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[ADD_PTR_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP1]], i64 [[CONV3_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -108,12 +108,12 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, accessor_iter_t, int, opt_blocked>(sycl::sub_group, accessor_iter_t, int &, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiNS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_RSN_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_SO_RSP_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[AGG_TMP1_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[AGG_TMP1_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP1_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[AGG_TMP1_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP1_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(4) [[ADD_PTR_I_I_I]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) @@ -125,15 +125,15 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[CONV3_I_I:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], i64 [[CONV3_I_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[OUT]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM1ENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM1ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_SO_NS0_4SPANISP_XT2_EEET3__EXIT:%.*]] // CHECK: if.end.i: -// CHECK-NEXT: [[CALL8_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[CALL_I_I_I_I]]) #[[ATTR4]] -// CHECK-NEXT: store i32 [[CALL8_I]], ptr addrspace(4) [[OUT]], align 4 -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM1ENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm1ENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: +// CHECK-NEXT: [[CALL6_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[CALL_I_I_I_I]]) #[[ATTR4]] +// CHECK-NEXT: store i32 [[CALL6_I]], ptr addrspace(4) [[OUT]], align 4 +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM1ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_SO_NS0_4SPANISP_XT2_EEET3__EXIT]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm1ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEET3_.exit: // CHECK-NEXT: ret void // Run-time alignment check is needed if type's alignment is less than BlockRead @@ -141,8 +141,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, char, opt_blocked>( sycl::sub_group, plain_global_ptr, char &, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_RSL_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 1 dereferenceable(1) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_RSN_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr addrspace(4) noundef align 1 dereferenceable(1) [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) @@ -155,15 +155,15 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA16:![0-9]+]] -// CHECK-NEXT: store i8 [[TMP2]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA16]] +// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA15:![0-9]+]] +// CHECK-NEXT: store i8 [[TMP2]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA15]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM1ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESM_SK_NS0_4SPANISL_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM1ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] // CHECK: if.end.i: -// CHECK-NEXT: [[CALL6_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z30__spirv_SubgroupBlockReadINTELIhET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: store i8 [[CALL6_I]], ptr addrspace(4) [[OUT]], align 1 -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM1ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESM_SK_NS0_4SPANISL_XT2_EEET3__EXIT]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm1ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_.exit: +// CHECK-NEXT: [[CALL4_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z30__spirv_SubgroupBlockReadINTELIhET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: store i8 [[CALL4_I]], ptr addrspace(4) [[OUT]], align 1 +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM1ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm1ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: // CHECK-NEXT: ret void // Four shorts in blocked data layout could be loaded as a single 64-bit @@ -171,8 +171,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, short, 4, opt_blocked>( sycl::sub_group, plain_global_ptr, span, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.11") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.5") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) @@ -181,33 +181,33 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META18:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META17:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP3]], 2 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I18:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I18]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I14:%.*]] = icmp ult i32 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I14]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ADD_I_I:%.*]] = or disjoint i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA21:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA20:![0-9]+]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA21]] +// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP23:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: -// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef i64 @_Z30__spirv_SubgroupBlockReadINTELImET_PU3AS1Km(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA25:![0-9]+]] -// CHECK-NEXT: store i64 [[CALL6]], ptr addrspace(4) [[TMP5]], align 2 +// CHECK-NEXT: [[CALL4:%.*]] = tail call spir_func noundef i64 @_Z30__spirv_SubgroupBlockReadINTELImET_PU3AS1Km(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA24:![0-9]+]] +// CHECK-NEXT: store i64 [[CALL4]], ptr addrspace(4) [[TMP5]], align 2 // CHECK-NEXT: br label [[CLEANUP]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -216,30 +216,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 3, opt_blocked>( sycl::sub_group, plain_global_ptr, span, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.12") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.6") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META27:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META26:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP2]], 3 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP30:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -248,30 +248,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 4, opt_blocked>( sycl::sub_group, plain_global_ptr, span, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.13") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.7") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META31:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META30:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP2]], 2 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM4ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM4ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ADD_I_I:%.*]] = or disjoint i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP34:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -279,30 +279,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 7, opt_blocked>( sycl::sub_group, plain_global_ptr, span, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm7ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.14") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm7ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.8") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META35:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META34:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP2]], 7 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 7 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM7ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM7ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP38:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm7ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm7ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -313,13 +313,13 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 2, naive_striped>( sycl::sub_group, plain_global_ptr, span, naive_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.16") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_SL_NS0_4spanISM_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.10") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META39:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META42:![0-9]+]] -// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA45:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META38:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META41:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA44:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -334,38 +334,38 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP0]], [[MUL_I]] // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[CONV]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // Check that optimized implementation is selected. template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 2, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: cleanup: // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) -// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA45]] -// CHECK-NEXT: store <2 x i32> [[CALL6]], ptr addrspace(4) [[TMP0]], align 4 +// CHECK-NEXT: [[CALL4:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA44]] +// CHECK-NEXT: store <2 x i32> [[CALL4]], ptr addrspace(4) [[TMP0]], align 4 // CHECK-NEXT: ret void // Check that contiguous_memory can be auto-detected. template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 2, full_group_striped>( sycl::sub_group, plain_global_ptr, span, full_group_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESL_SJ_NS0_4spanISK_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.21") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.13") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: cleanup: // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) -// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA45]] -// CHECK-NEXT: store <2 x i32> [[CALL6]], ptr addrspace(4) [[TMP0]], align 4 +// CHECK-NEXT: [[CALL4:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA44]] +// CHECK-NEXT: store <2 x i32> [[CALL4]], ptr addrspace(4) [[TMP0]], align 4 // CHECK-NEXT: ret void // SYCL 2020's accessor can't be statically known to be contiguous. @@ -375,35 +375,35 @@ using accessor_iter_t = accessor( sycl::sub_group, accessor_iter_t, span, full_group_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_SL_NS0_4spanISM_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.21") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.13") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP3_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[AGG_TMP3_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[AGG_TMP3_SROA_2_0_IN_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP3_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP3_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[AGG_TMP3_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP3_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META48:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META51:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META47:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META50:![0-9]+]] // CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP3_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP3_SROA_2_0_COPYLOAD]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM2ENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_SN_NS0_4SPANISO_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM2ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[CONV3_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP4]], i64 [[CONV3_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP54:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_SN_NS0_4spanISO_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP53:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_14full_group_keyEJEEENSC_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -413,12 +413,12 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< accessor_iter_t, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[IN_PTR]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[AGG_TMP_SROA_2_0_IN_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0_IN_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] // CHECK-NEXT: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(4) [[ADD_PTR_I_I]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) @@ -426,34 +426,34 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq ptr addrspace(1) [[CALL_I_I_I]], null // CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META55:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META58:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META54:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META57:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM2ENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_SO_NS0_4SPANISP_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM2ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[CONV3_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[ADD_PTR_I_I]], i64 [[CONV3_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP61:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP60:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: -// CHECK-NEXT: [[CALL8:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[CALL_I_I_I]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA45]] -// CHECK-NEXT: store <2 x i32> [[CALL8]], ptr addrspace(4) [[TMP5]], align 4 +// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[CALL_I_I_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA44]] +// CHECK-NEXT: store <2 x i32> [[CALL6]], ptr addrspace(4) [[TMP5]], align 4 // CHECK-NEXT: br label [[CLEANUP]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -463,8 +463,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, char, 2, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm2ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.23") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) @@ -473,34 +473,34 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META62:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META65:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META61:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META64:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I18:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I18]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM2ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I14:%.*]] = icmp ult i32 [[I_0_I]], 2 +// CHECK-NEXT: br i1 [[CMP_I14]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM2ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I]], align 1, !tbaa [[TBAA16]] +// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I]], align 1, !tbaa [[TBAA15]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: store i8 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA16]] +// CHECK-NEXT: store i8 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA15]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP68:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm2ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP67:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm2ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: -// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef <2 x i8> @_Z30__spirv_SubgroupBlockReadINTELIDv2_hET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA69:![0-9]+]] -// CHECK-NEXT: store <2 x i8> [[CALL6]], ptr addrspace(4) [[TMP6]], align 1 +// CHECK-NEXT: [[CALL4:%.*]] = tail call spir_func noundef <2 x i8> @_Z30__spirv_SubgroupBlockReadINTELIDv2_hET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA68:![0-9]+]] +// CHECK-NEXT: store <2 x i8> [[CALL4]], ptr addrspace(4) [[TMP6]], align 1 // CHECK-NEXT: br label [[CLEANUP]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -510,8 +510,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, short, 4, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.11") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.5") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[IN_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) @@ -520,34 +520,34 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META71:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META74:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META70:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META73:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I18:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I18]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I14:%.*]] = icmp ult i32 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I14]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA21]] +// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA21]] +// CHECK-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA20]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP77:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP76:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: -// CHECK-NEXT: [[CALL6:%.*]] = tail call spir_func noundef <4 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv4_tET_PU3AS1Kt(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA25]] -// CHECK-NEXT: store <4 x i16> [[CALL6]], ptr addrspace(4) [[TMP6]], align 2 +// CHECK-NEXT: [[CALL4:%.*]] = tail call spir_func noundef <4 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv4_tET_PU3AS1Kt(ptr addrspace(1) noundef nonnull [[IN_PTR]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[OUT]], align 8, !tbaa [[TBAA24]] +// CHECK-NEXT: store <4 x i16> [[CALL4]], ptr addrspace(4) [[TMP6]], align 2 // CHECK-NEXT: br label [[CLEANUP]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -556,31 +556,31 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 3, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.12") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.6") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META78:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META81:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META77:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META80:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP84:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP83:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -588,31 +588,31 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 16, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm16ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.24") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm16ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.16") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META85:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META88:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META84:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META87:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 16 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM16ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM16ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP91:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm16ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP90:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm16ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void @@ -620,30 +620,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_load< sycl::sub_group, plain_global_ptr, int, 11, opt_striped>( sycl::sub_group, plain_global_ptr, span, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm11ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_SK_NS0_4spanISL_XT2_EEET3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.25") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META17]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm11ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(1) noundef [[IN_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::span.17") align 8 [[OUT:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META92:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META95:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META91:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META94:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 11 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM11ENS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_SM_NS0_4SPANISN_XT2_EEET3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM11ENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEET3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[IN_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP98:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm11ENS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEET3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP97:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm11ENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEET3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR4]] // CHECK-NEXT: ret void diff --git a/sycl/test/check_device_code/group_store.cpp b/sycl/test/check_device_code/group_store.cpp index 7647530f6446a..6533a5d40f9f3 100644 --- a/sycl/test/check_device_code/group_store.cpp +++ b/sycl/test/check_device_code/group_store.cpp @@ -16,22 +16,22 @@ namespace oneapi_exp = sycl::ext::oneapi::experimental; using namespace sycl::ext::oneapi::experimental; using full_group_blocked = - decltype(new_properties::properties(full_group, data_placement_blocked)); + decltype(properties(full_group, data_placement_blocked)); -using naive_blocked = decltype(new_properties::properties( - oneapi_exp::detail::naive, data_placement_blocked)); +using naive_blocked = + decltype(properties(oneapi_exp::detail::naive, data_placement_blocked)); -using opt_blocked = decltype(new_properties::properties( - full_group, contiguous_memory, data_placement_blocked)); +using opt_blocked = + decltype(properties(full_group, contiguous_memory, data_placement_blocked)); using full_group_striped = - decltype(new_properties::properties(full_group, data_placement_striped)); + decltype(properties(full_group, data_placement_striped)); -using naive_striped = decltype(new_properties::properties( - oneapi_exp::detail::naive, data_placement_striped)); +using naive_striped = + decltype(properties(oneapi_exp::detail::naive, data_placement_striped)); -using opt_striped = decltype(new_properties::properties( - full_group, contiguous_memory, data_placement_striped)); +using opt_striped = + decltype(properties(full_group, contiguous_memory, data_placement_striped)); template using plain_global_ptr = typename sycl::detail::DecoratedType< @@ -41,15 +41,15 @@ using plain_global_ptr = typename sycl::detail::DecoratedType< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, plain_global_ptr, naive_blocked>( sycl::sub_group, const int &, plain_global_ptr, naive_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_RKSK_SL_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_RKSL_SM_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5:[0-9]+]] // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -57,8 +57,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, plain_global_ptr, opt_blocked>( sycl::sub_group, const int &, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_RKSK_SL_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_RKSM_SN_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) @@ -71,23 +71,23 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] -// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESN_NS0_4SPANISL_XT1_EEESM_T3__EXIT:%.*]] +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_T3__EXIT:%.*]] // CHECK: if.end.i: -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], i32 noundef [[TMP3]]) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESN_NS0_4SPANISL_XT1_EEESM_T3__EXIT]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1EPU3AS1iNS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_.exit: +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_T3__EXIT]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_.exit: // CHECK-NEXT: ret void // Check that contiguous_memory can be auto-detected. template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, plain_global_ptr, full_group_blocked>( sycl::sub_group, const int &, plain_global_ptr, full_group_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESL_RKSJ_SK_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.8") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_RKSK_SL_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) @@ -100,15 +100,15 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] -// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESM_NS0_4SPANISK_XT1_EEESL_T3__EXIT:%.*]] +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESN_NS0_4SPANISL_XT1_EEESM_T3__EXIT:%.*]] // CHECK: if.end.i: -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], i32 noundef [[TMP3]]) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESM_NS0_4SPANISK_XT1_EEESL_T3__EXIT]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1EPU3AS1iNS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_.exit: +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESN_NS0_4SPANISL_XT1_EEESM_T3__EXIT]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_.exit: // CHECK-NEXT: ret void // SYCL 2020's accessor can't be statically known to be contiguous. @@ -118,19 +118,19 @@ using accessor_iter_t = accessor( sycl::sub_group, const int &, accessor_iter_t, full_group_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiNS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_RKSK_SL_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.8") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiNS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_RKSL_SM_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP2_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA12:![0-9]+]] +// CHECK-NEXT: [[AGG_TMP2_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11:![0-9]+]] // CHECK-NEXT: [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14:![0-9]+]] +// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13:![0-9]+]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[CONV5_I_I:%.*]] = sext i32 [[TMP0]] to i64 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP2_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP2_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[ADD_PTR_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP1]], i64 [[CONV5_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] -// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -140,20 +140,20 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< const int &, accessor_iter_t, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiNS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_RKSL_SM_T2_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META6]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiNS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_RKSN_SO_T2_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPERTIES:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META5]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[AGG_TMP2_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[AGG_TMP2_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[AGG_TMP2_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP2_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[AGG_TMP2_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP2_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(4) [[ADD_PTR_I_I_I]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(4) [[ADD_PTR_I_I_I]] to i64 // CHECK-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 // CHECK-NEXT: [[CMP1_I_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-NEXT: br i1 [[CMP1_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GET_BLOCK_OP_PTRILI16ELM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEEDAT1_T2__EXIT_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental6detail16get_block_op_ptrILi16ELm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEEDaT1_T2_.exit.i: +// CHECK-NEXT: br i1 [[CMP1_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GET_BLOCK_OP_PTRILI16ELM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEDAT1_T2__EXIT_I:%.*]], label [[IF_THEN_I:%.*]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental6detail16get_block_op_ptrILi16ELm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEEDaT1_T2_.exit.i: // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef nonnull [[ADD_PTR_I_I_I]], i32 noundef 5) #[[ATTR6:[0-9]+]] // CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(1) [[CALL_I_I_I_I]], null // CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label [[IF_THEN_I]], label [[IF_END_I:%.*]] @@ -162,15 +162,15 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 // CHECK-NEXT: [[CONV5_I_I:%.*]] = sext i32 [[TMP1]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[ADD_PTR_I_I_I]], i64 [[CONV5_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] -// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_NS0_4SPANISO_XT1_EEESP_T3__EXIT:%.*]] // CHECK: if.end.i: -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[IN]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[CALL_I_I_I_I]], i32 noundef [[TMP3]]) #[[ATTR5]] -// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINSA_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINSA_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: +// CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKILM1ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_NS0_4SPANISO_XT1_EEESP_T3__EXIT]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_NS0_4spanISO_XT1_EEESP_T3_.exit: // CHECK-NEXT: ret void // Four shorts in blocked data layout could be stored as a single 64-bit @@ -178,8 +178,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, short, 4, plain_global_ptr, opt_blocked>( sycl::sub_group, span, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.10") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.4") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [4 x i16], align 2 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null @@ -189,50 +189,50 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META17:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META16:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP3]], 2 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I23:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I23]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I19:%.*]] = icmp ult i32 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I19]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA20:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA19:![0-9]+]] // CHECK-NEXT: [[ADD_I_I:%.*]] = or disjoint i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA19]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP22:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7:[0-9]+]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA24:![0-9]+]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA23:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 4 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VALUES]], align 2, !tbaa [[TBAA26:![0-9]+]] +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VALUES]], align 2, !tbaa [[TBAA25:![0-9]+]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS1mT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], i64 noundef [[TMP6]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 -// CHECK-NEXT: [[ARRAYIDX_I24:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP5]], i64 [[CONV]] -// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I24]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: [[ARRAYIDX_I20:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP5]], i64 [[CONV]] +// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20]], align 2, !tbaa [[TBAA19]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA19]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -241,8 +241,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, const short, 4, plain_global_ptr, opt_blocked>( sycl::sub_group, span, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.11") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.5") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [4 x i16], align 2 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null @@ -252,50 +252,50 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META28:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META27:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP3]], 2 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I23:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I23]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_T3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I19:%.*]] = icmp ult i32 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I19]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA19]] // CHECK-NEXT: [[ADD_I_I:%.*]] = or disjoint i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA19]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP31:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA32:![0-9]+]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA31:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 4 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VALUES]], align 2, !tbaa [[TBAA26]] +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[VALUES]], align 2, !tbaa [[TBAA25]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS1mT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], i64 noundef [[TMP6]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 -// CHECK-NEXT: [[ARRAYIDX_I24:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP5]], i64 [[CONV]] -// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I24]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: [[ARRAYIDX_I20:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP5]], i64 [[CONV]] +// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20]], align 2, !tbaa [[TBAA19]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA19]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -303,30 +303,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 3, plain_global_ptr, opt_blocked>( sycl::sub_group, span, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.12") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.6") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META35:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META34:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP2]], 3 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP38:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -335,30 +335,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 4, plain_global_ptr, opt_blocked>( sycl::sub_group, span, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.13") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.7") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META39:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META38:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = shl i32 [[TMP2]], 2 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM4EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM4EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ADD_I_I:%.*]] = or disjoint i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -366,30 +366,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 7, plain_global_ptr, opt_blocked>( sycl::sub_group, span, plain_global_ptr, opt_blocked); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.14") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.2") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.8") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.0") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META43:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META42:![0-9]+]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP2]], 7 // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 7 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM7EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME0EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM7EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[MUL_I_I]], [[I_0_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP46:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE0EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -400,13 +400,13 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 2, plain_global_ptr, naive_striped>( sycl::sub_group, span, plain_global_ptr, naive_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.16") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.10") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA47:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META49:![0-9]+]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META52:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA46:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META48:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META51:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -418,21 +418,21 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 [[CONV]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[MUL_I:%.*]] = mul nuw nsw i32 [[TMP2]], [[I_0]] // CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], [[MUL_I]] // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD_I]] to i64 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM]] -// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // Check that optimized implementation is selected. template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 2, plain_global_ptr, opt_striped>( sycl::sub_group, span, plain_global_ptr, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [2 x i32], align 4 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null @@ -442,51 +442,51 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META56:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META59:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META55:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META58:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I23:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I23]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I19:%.*]] = icmp ult i32 [[I_0_I]], 2 +// CHECK-NEXT: br i1 [[CMP_I19]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP62:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP61:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA47]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA46]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA26]] +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA25]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], <2 x i32> noundef [[TMP7]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 -// CHECK-NEXT: [[ARRAYIDX_I24:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP6]], i64 [[CONV]] -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I24]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[ARRAYIDX_I20:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP6]], i64 [[CONV]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I20]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -494,8 +494,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 2, plain_global_ptr, full_group_striped>( sycl::sub_group, span, plain_global_ptr, full_group_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESL_NS0_4spanISJ_XT1_EEESK_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.21") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.13") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [2 x i32], align 4 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null @@ -505,51 +505,51 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META64:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META67:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META63:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META66:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I23:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I23]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESN_NS0_4SPANISL_XT1_EEESM_T3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I19:%.*]] = icmp ult i32 [[I_0_I]], 2 +// CHECK-NEXT: br i1 [[CMP_I19]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP70:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP69:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA47]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA46]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA26]] +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA25]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], <2 x i32> noundef [[TMP7]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 -// CHECK-NEXT: [[ARRAYIDX_I24:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP6]], i64 [[CONV]] -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I24]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[ARRAYIDX_I20:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP6]], i64 [[CONV]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I20]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP70:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -560,35 +560,35 @@ using accessor_iter_t = accessor( sycl::sub_group, span, accessor_iter_t, full_group_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.21") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.13") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] -// CHECK-NEXT: [[AGG_TMP4_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] +// CHECK-NEXT: [[AGG_TMP4_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[AGG_TMP4_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP4_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP4_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[AGG_TMP4_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP4_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META72:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META75:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META71:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META74:![0-9]+]] // CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP4_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP4_SROA_2_0_COPYLOAD]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESQ_NS0_4SPANISO_XT1_EEESP_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[CONV5_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP4]], i64 [[CONV5_I]] -// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP78:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP77:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESQ_NS0_4spanISO_XT1_EEESP_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -598,70 +598,70 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< span, accessor_iter_t, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESN_NS0_4spanISL_XT1_EEESM_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.9") align 8 [[IN:%.*]], ptr noundef byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [2 x i32], align 4 -// CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr [[OUT_PTR]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[AGG_TMP_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OUT_PTR]], i64 8 -// CHECK-NEXT: [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0_OUT_PTR_ASCAST_SROA_IDX]], align 8, !tbaa [[TBAA13]] // CHECK-NEXT: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]] // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(4) [[ADD_PTR_I_I]], null // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) // CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(4) [[ADD_PTR_I_I]] to i64 // CHECK-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP0]], 15 // CHECK-NEXT: [[CMP1_I_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-NEXT: br i1 [[CMP1_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GET_BLOCK_OP_PTRILI16ELM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYEEEEEEEEDAT1_T2__EXIT:%.*]], label [[IF_THEN:%.*]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental6detail16get_block_op_ptrILi16ELm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEEDaT1_T2_.exit: +// CHECK-NEXT: br i1 [[CMP1_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GET_BLOCK_OP_PTRILI16ELM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEDAT1_T2__EXIT:%.*]], label [[IF_THEN:%.*]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental6detail16get_block_op_ptrILi16ELm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEEDaT1_T2_.exit: // CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef nonnull [[ADD_PTR_I_I]], i32 noundef 5) #[[ATTR6]] // CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq ptr addrspace(1) [[CALL_I_I_I]], null // CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_THEN]], label [[IF_END:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META79:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META82:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META78:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META81:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 2 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_14NEW_PROPERTIES10PROPERTIESINS9_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2ENS0_6DETAIL17ACCESSOR_ITERATORIILI1EEENS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[CONV5_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ADD_PTR_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[ADD_PTR_I_I]], i64 [[CONV5_I]] -// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ADD_PTR_I_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP85:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_14new_properties10propertiesINS9_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP84:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA47]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA46]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 2 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA26]] +// CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[VALUES]], align 4, !tbaa [[TBAA25]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[CALL_I_I_I]], <2 x i32> noundef [[TMP7]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP6]], i64 [[CONV]] -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP86:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP85:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -670,8 +670,8 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, short, 4, plain_global_ptr, opt_striped>( sycl::sub_group, span, plain_global_ptr, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.10") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.4") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VALUES:%.*]] = alloca [4 x i16], align 2 // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne ptr addrspace(1) [[OUT_PTR]], null @@ -681,51 +681,51 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< // CHECK-NEXT: [[CMP1_I_NOT:%.*]] = icmp eq i64 [[REM_I]], 0 // CHECK-NEXT: br i1 [[CMP1_I_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META87:![0-9]+]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META90:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META86:![0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META89:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I23:%.*]] = icmp ult i32 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I23]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] +// CHECK-NEXT: [[CMP_I19:%.*]] = icmp ult i32 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I19]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP2]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA19]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I]], align 2, !tbaa [[TBAA19]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP93:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP92:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: br label [[CLEANUP:%.*]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA24]] +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr [[IN]], align 8, !tbaa [[TBAA23]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: // CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] // CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], 4 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] // CHECK: for.cond.cleanup: -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i16>, ptr [[VALUES]], align 2, !tbaa [[TBAA26]] +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i16>, ptr [[VALUES]], align 2, !tbaa [[TBAA25]] // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv4_tEvPU3AS1tT_(ptr addrspace(1) noundef nonnull [[OUT_PTR]], <4 x i16> noundef [[TMP7]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES]]) #[[ATTR7]] // CHECK-NEXT: br label [[CLEANUP]] // CHECK: for.body: // CHECK-NEXT: [[CONV:%.*]] = zext nneg i32 [[I_0]] to i64 -// CHECK-NEXT: [[ARRAYIDX_I24:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP6]], i64 [[CONV]] -// CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I24]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: [[ARRAYIDX_I20:%.*]] = getelementptr inbounds i16, ptr addrspace(4) [[TMP6]], i64 [[CONV]] +// CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20]], align 2, !tbaa [[TBAA19]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES]], i64 0, i64 [[CONV]] -// CHECK-NEXT: store i16 [[TMP8]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA20]] +// CHECK-NEXT: store i16 [[TMP8]], ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA19]] // CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP94:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP93:![0-9]+]] // CHECK: cleanup: // CHECK-NEXT: ret void @@ -733,31 +733,31 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 3, plain_global_ptr, opt_striped>( sycl::sub_group, span, plain_global_ptr, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.12") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.6") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META95:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META98:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META94:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META97:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP101:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP100:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -765,31 +765,31 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 16, plain_global_ptr, opt_striped>( sycl::sub_group, span, plain_global_ptr, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.23") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.15") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META102:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META105:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META101:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META104:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 16 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM16EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM16EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP108:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP107:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void @@ -797,30 +797,30 @@ template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< template SYCL_EXTERNAL void sycl::ext::oneapi::experimental::group_store< sycl::sub_group, int, 11, plain_global_ptr, opt_striped>( sycl::sub_group, span, plain_global_ptr, opt_striped); -// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESM_NS0_4spanISK_XT1_EEESL_T3_( -// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.24") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::new_properties::properties.19") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META16]] !sycl_fixed_targets [[META7]] { +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_( +// CHECK-SAME: ptr noundef byval(%"struct.sycl::_V1::sub_group") align 1 [[G:%.*]], ptr noundef byval(%"class.sycl::_V1::span.16") align 8 [[IN:%.*]], ptr addrspace(1) noundef [[OUT_PTR:%.*]], ptr noundef byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.11") align 1 [[PROPS:%.*]]) local_unnamed_addr #[[ATTR0]] comdat !srcloc [[META15]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA12]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[IN]], align 8, !tbaa [[TBAA11]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META109:![0-9]+]] -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META112:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA7]], !noalias [[META108:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA7]], !noalias [[META111:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 11 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM11EPU3AS1INS3_14NEW_PROPERTIES10PROPERTIESINS8_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_26CONTIGUOUS_MEMORY_PROPERTYENS3_23DATA_PLACEMENT_PROPERTYILNS3_19DATA_PLACEMENT_ENUME1EEENS3_19FULL_GROUP_PROPERTYENS3_6DETAIL14NAIVE_PROPERTYEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESO_NS0_4SPANISM_XT1_EEESN_T3__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM11EPU3AS1INS3_10PROPERTIESIST5TUPLEIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSA_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSA_INS3_14FULL_GROUP_KEYEJEEENSA_INS3_6DETAIL9NAIVE_KEYEJEEEEEEEEENST9ENABLE_IFIXAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_T3__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I]] // CHECK-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] // CHECK-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_PTR]], i64 [[IDXPROM_I]] -// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA7]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP115:![0-9]+]] -// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS1iNS3_14new_properties10propertiesINS8_6detail20properties_type_listIJNS3_26contiguous_memory_propertyENS3_23data_placement_propertyILNS3_19data_placement_enumE1EEENS3_19full_group_propertyENS3_6detail14naive_propertyEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESO_NS0_4spanISM_XT1_EEESN_T3_.exit: +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP114:![0-9]+]] +// CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS1iNS3_10propertiesISt5tupleIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS3_6detail9naive_keyEJEEEEEEEEENSt9enable_ifIXaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_T3_.exit: // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]] // CHECK-NEXT: ret void diff --git a/sycl/test/extensions/kernel_compiler_constraints.cpp b/sycl/test/extensions/kernel_compiler_constraints.cpp index a68ee6d96ecc3..d497c042060b9 100644 --- a/sycl/test/extensions/kernel_compiler_constraints.cpp +++ b/sycl/test/extensions/kernel_compiler_constraints.cpp @@ -8,20 +8,15 @@ // RUN: %clangxx -fsyntax-only -fsycl -Xclang -verify -Xclang -verify-ignore-unexpected=note %s -// kernel_bundles supporting the new bundle_state::ext_oneapi_source should NOT +// kernel_bundles sporting the new bundle_state::ext_oneapi_source should NOT // support several member functions. This test confirms that. #include -namespace syclex = sycl::ext::oneapi::experimental; - -struct some_property : syclex::new_properties::detail::property_base { - static constexpr std::string_view property_name{"::some_property"}; -}; - int main() { #ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER + namespace syclex = sycl::ext::oneapi::experimental; using source_kb = sycl::kernel_bundle; sycl::queue q; @@ -80,33 +75,34 @@ int main() { syclex::build(kbSrc); // expected-error@+1 {{no matching function for call to 'build'}} - syclex::build(kbSrc, syclex::new_properties::properties{some_property{}}); + syclex::build(kbSrc, + syclex::properties{syclex::usm_kind}); // OK - syclex::build(kbSrc, syclex::new_properties::properties{syclex::build_options{flags}, + syclex::build(kbSrc, syclex::properties{syclex::build_options{flags}, syclex::save_log{&log}}); // expected-error@+1 {{no matching function for call to 'build'}} - syclex::build(kbSrc, syclex::new_properties::properties{ + syclex::build(kbSrc, syclex::properties{ syclex::build_options{flags}, syclex::save_log{&log}, - some_property{}}); + syclex::usm_kind}); // OK syclex::build(kbSrc, devices); // expected-error@+1 {{no matching function for call to 'build'}} syclex::build(kbSrc, devices, - syclex::new_properties::properties{some_property{}}); + syclex::properties{syclex::usm_kind}); // OK syclex::build( kbSrc, devices, - syclex::new_properties::properties{syclex::build_options{flags}, syclex::save_log{&log}}); + syclex::properties{syclex::build_options{flags}, syclex::save_log{&log}}); // expected-error@+1 {{no matching function for call to 'build'}} syclex::build(kbSrc, devices, - syclex::new_properties::properties{syclex::build_options{flags}, - syclex::save_log{&log}, - some_property{}}); + syclex::properties{syclex::build_options{flags}, + syclex::save_log{&log}, + syclex::usm_kind}); #endif } From 93eeea0503d1ab3abf410c772c4766626b02d530 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 25 Oct 2024 14:17:40 -0700 Subject: [PATCH 22/32] Move to `new_properties.hpp` --- .../ext/oneapi/properties/new_properties.hpp | 395 ++++++++++++++++++ .../sycl/ext/oneapi/properties/properties.hpp | 388 +---------------- .../extensions/properties/new_properties.cpp | 2 +- .../properties/new_properties_llvm_ir.cpp | 2 +- .../properties/new_properties_negative.cpp | 10 +- .../new_properties_open_question.cpp | 2 +- .../include_deps/sycl_detail_core.hpp.cpp | 1 + 7 files changed, 405 insertions(+), 395 deletions(-) create mode 100644 sycl/include/sycl/ext/oneapi/properties/new_properties.hpp diff --git a/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp b/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp new file mode 100644 index 0000000000000..714e15a39be8e --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp @@ -0,0 +1,395 @@ +//==-------- new_properties.hpp --- SYCL extended property list ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { +namespace new_properties { +namespace detail { +template struct properties_type_list; + +// Is used to implement `is_property_v`. +struct property_key_tag_base {}; +} // namespace detail + +template +class __SYCL_EBO properties; + +template struct is_property_list : std::false_type {}; +template +struct is_property_list> : std::true_type {}; +template +inline constexpr bool is_property_list_v = is_property_list::value; + +template +inline constexpr bool is_property_v = + std::is_base_of_v && + !is_property_list_v; + +namespace detail { + +#if __has_builtin(__type_pack_element) +template +using nth_type_t = __type_pack_element; +#else +template struct nth_type { + using type = typename nth_type::type; +}; + +template struct nth_type<0, T, Ts...> { + using type = T; +}; + +template +using nth_type_t = typename nth_type::type; +#endif + +template struct properties_sorter; + +// Specialization to avoid zero-size array creation. +template <> struct properties_sorter> { + using type = properties_type_list<>; +}; + +template +struct properties_sorter, + property_tys...> { + static constexpr auto sorted_indices = []() constexpr { + int idx = 0; + int N = sizeof...(property_tys); + // std::sort isn't constexpr until C++20. Also, it's possible there will be + // a compiler builtin to sort types, in which case we should start using it. + std::array to_sort{std::pair{property_tys::property_name, idx++}...}; + auto swap_pair = [](auto &x, auto &y) constexpr { + auto tmp_first = x.first; + auto tmp_second = x.second; + x.first = y.first; + x.second = y.second; + y.first = tmp_first; + y.second = tmp_second; + }; + for (int i = 0; i < N; ++i) + for (int j = i; j < N; ++j) + if (to_sort[j].first < to_sort[i].first) + swap_pair(to_sort[i], to_sort[j]); + + std::array sorted_indices{}; + for (int i = 0; i < N; ++i) + sorted_indices[i] = to_sort[i].second; + + return sorted_indices; + }(); + + using type = properties_type_list< + nth_type_t...>; +}; + +// We support incomplete property_key_t, so need to wrap it. +template +struct property_key_tag : property_key_tag_base {}; + +// NOTE: each property_t subclass must provide +// +// static constexpr std::string_view +// property_name{" +struct property_base : property_key_tag { +protected: + using key_t = property_key_t; + constexpr property_t get_property_impl(property_key_tag) const { + // In fact, `static_cast` below works just fine with clang/msvc but not with + // gcc, see https://godbolt.org/z/MY6849jGh for a reduced test. However, we + // need to support all ,so special case for compile-time properties (when + // `is_empty_v` is true). + if constexpr (std::is_empty_v) { + return property_t{}; + } else { + return *static_cast(this); + } + } + + // For key_t access in error reporting specialization. + template + friend class __SYCL_EBO new_properties::properties; + +public: + static constexpr const char *ir_attribute_name = ""; + static constexpr std::nullptr_t ir_attribute_value = nullptr; + + // this_property_t is to disable ADL - properties{property{}} is inherited + // from property. + + template + friend constexpr std::enable_if_t< + std::is_same_v && + is_property_v, + decltype(properties{std::declval(), + std::declval()})> + operator+(const this_property_t &lhs, const other_property_t &rhs) { + return properties{lhs, rhs}; + } + + template + friend constexpr std::enable_if_t< + std::is_same_v, + properties>> + operator+(const this_property_t &lhs) { + return properties>{lhs}; + } +}; + +template +inline constexpr bool property_names_are_unique = []() constexpr { + if constexpr (sizeof...(property_tys) == 0) { + return true; + } else { + const std::array names = {property_tys::property_name...}; + auto N = names.size(); + for (int i = 0; i < N; ++i) + for (int j = i + 1; j < N; ++j) + if (names[i] == names[j]) + return false; + + return true; + } +}(); + +template +inline constexpr bool properties_are_sorted = []() constexpr { + if constexpr (sizeof...(property_tys) == 0) { + return true; + } else { + const std::array sort_names = {property_tys::property_name...}; + // std::is_sorted isn't constexpr until C++20. + // + // Sorting is an implementation detail while uniqueness of the + // property_name's is an API restriction. This internal check actually + // combines both conditions as we expect that user error is handled before + // the internal `properties_are_sorted` assert is checked. + for (std::size_t idx = 1; idx < sort_names.size(); ++idx) + if (sort_names[idx - 1] >= sort_names[idx]) + return false; + return true; + } +}(); +} // namespace detail + +// Empty property list. +template <> class __SYCL_EBO properties, void> { +public: + template static constexpr bool has_property() { return false; } + + // TODO: How does this work without qualified name? + template + friend constexpr std::enable_if_t< + is_property_v, + properties>> + operator+(const properties &, const other_property_t &rhs) { + return properties{rhs}; + } +}; + +// Base implementation to provide nice user error in case of mis-use. Without it +// an error "base class '' specified more than once as a direct base +// class" is reported prior to static_assert's error. +template +class __SYCL_EBO properties< + detail::properties_type_list, + std::enable_if_t>> { + static_assert((is_property_v && ...)); + + // This is a separate specialization to report an error, we can afford doing + // extra work to provide nice error message without sacrificing compile time + // on non-exceptional path. Let's find *a* pair of properties that failed the + // check. Note that there might be multiple duplicate names, we're only + // reporting one instance. Once user addresses that, the next pair will be + // reported. + static constexpr auto conflict = []() constexpr { + const std::array keys = {property_tys::property_name...}; + auto N = keys.size(); + for (int i = 0; i < N; ++i) + for (int j = i + 1; j < N; ++j) + if (keys[i] == keys[j]) + return std::pair{i, j}; + }(); + using first_type = detail::nth_type_t; + using second_type = detail::nth_type_t; + static_assert( + !std::is_same_v, + "Duplicate property!"); + static_assert(first_type::property_name != second_type::property_name, + "Property name collision between different property keys!"); + static_assert((is_property_v && ...)); +}; + +// NOTE: Meta-function to implement CTAD rules isn't allowed to return +// `properties` and it's impossible to return a pack as well. As +// such, we're forced to have an extra level of `detail::properties_type_list` +// for the purpose of providing CTAD rules. +template +class __SYCL_EBO properties< + detail::properties_type_list, + std::enable_if_t>> + : private property_tys... { + static_assert((is_property_v && ...)); + static_assert(detail::properties_are_sorted, + "Properties must be sorted!"); + using property_tys::get_property_impl...; + + template friend class __SYCL_EBO properties; + +public: + template && ...)) && + sizeof...(unsorted_property_tys) == sizeof...(property_tys)>> + constexpr properties(unsorted_property_tys... props) + : unsorted_property_tys(props)... {} + + // TODO: not sure if that is needed if we'd have operator| or operator+. + // TODO: sizeof... check. + template < + typename... other_property_list_tys, typename... other_property_tys, + typename = std::enable_if_t<((is_property_v && ...))>> + constexpr properties( + properties> + other_properties, + other_property_tys... props) + : other_property_list_tys( + static_cast(other_properties))..., + other_property_tys(props)... {} + + // TODO: Do we need this? If so, is separate CTAD needed? + // template + // properties(unsorted_property_tys &&...props) + // : unsorted_property_tys(std::forward(props))... + // {} + + template static constexpr bool has_property() { + return std::is_base_of_v, + properties>; + } + + // Two methods below do the following (pseudocode): + // + // template + // using ret_t = decltype(this->get_property_impl(key_tag{})); + // static constexpr auto get_property() requires(is_empty_v) { + // return ret_t{}; + // } + // constexpr auto get_property() const requires(!is_empty_v) { + // return get_property_impl(key_tag{}); + // } + template + static constexpr auto get_property() -> std::enable_if_t< + std::is_empty_v().get_property_impl( + detail::property_key_tag{}))>, + decltype(std::declval().get_property_impl( + detail::property_key_tag{}))> { + return decltype(std::declval().get_property_impl( + detail::property_key_tag{})){}; + } + + template + constexpr auto get_property() const -> std::enable_if_t< + !std::is_empty_v().get_property_impl( + detail::property_key_tag{}))>, + decltype(std::declval().get_property_impl( + detail::property_key_tag{}))> { + return get_property_impl(detail::property_key_tag{}); + } + + // TODO: Do we need separate `static` overload if we decide to keep this + // interface? + template + constexpr auto + get_property_or_default_to(default_property_t default_property) { + if constexpr (has_property()) + return get_property(); + else + return default_property; + } + + // TODO: Use more effective insert sort for single-property insertion. + + // Need to use qualified type to force CTAD instead of using *current* + // properties instantiation. + template + friend constexpr std::enable_if_t< + is_property_v, + decltype(ext::oneapi::experimental::new_properties::properties{ + std::declval()..., std::declval()})> + operator+(const properties &lhs, const other_property_t &rhs) { + return ext::oneapi::experimental::new_properties::properties{ + static_cast(lhs)..., rhs}; + } + + template + friend constexpr auto + operator+(const properties &lhs, + const ext::oneapi::experimental::new_properties::properties< + detail::properties_type_list> &rhs) { + return ext::oneapi::experimental::new_properties::properties{ + static_cast(lhs)..., + static_cast(rhs)...}; + } +}; + +template && ...))>> +properties(unsorted_property_tys...) + -> properties, + unsorted_property_tys...>::type>; + +template < + typename... other_property_list_tys, typename... other_property_tys, + typename = std::enable_if_t<((is_property_v && ...))>> +properties(properties>, + other_property_tys...) + -> properties, + other_property_list_tys..., other_property_tys...>::type>; + +using empty_properties_t = decltype(properties{}); + +template +struct all_properties_in : std::false_type {}; +template +struct all_properties_in< + properties>, + allowed_property_keys...> + : std::bool_constant<((sycl::detail::check_type_in_v< + property_tys, allowed_property_keys...> && + ...))> {}; + +template +inline constexpr bool all_properties_in_v = + all_properties_in, + allowed_property_keys...>::value; +} // namespace new_properties +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 414b55dddec76..6281a0afa6683 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -8,16 +8,8 @@ #pragma once -#include -#include -#include -#include - -#include -#include - -// For old properties: #include +#include #include // for IsRuntimePr... #include // for Sorted, Mer... #include // for property_value @@ -26,384 +18,6 @@ #include // for enable_if_t #include // for tuple -namespace sycl { -inline namespace _V1 { -namespace ext::oneapi::experimental { -namespace new_properties { -namespace detail { -template struct properties_type_list; - -// Is used to implement `is_property_v`. -struct property_key_tag_base {}; -} // namespace detail - -template -class __SYCL_EBO properties; - -template struct is_property_list : std::false_type {}; -template -struct is_property_list> : std::true_type {}; -template -inline constexpr bool is_property_list_v = is_property_list::value; - -template -inline constexpr bool is_property_v = - std::is_base_of_v && - !is_property_list_v; - -namespace detail { - -#if __has_builtin(__type_pack_element) -template -using nth_type_t = __type_pack_element; -#else -template struct nth_type { - using type = typename nth_type::type; -}; - -template struct nth_type<0, T, Ts...> { - using type = T; -}; - -template -using nth_type_t = typename nth_type::type; -#endif - -template struct properties_sorter; - -// Specialization to avoid zero-size array creation. -template <> struct properties_sorter> { - using type = properties_type_list<>; -}; - -template -struct properties_sorter, - property_tys...> { - static constexpr auto sorted_indices = []() constexpr { - int idx = 0; - int N = sizeof...(property_tys); - // std::sort isn't constexpr until C++20. Also, it's possible there will be - // a compiler builtin to sort types, in which case we should start using it. - std::array to_sort{std::pair{property_tys::property_name, idx++}...}; - auto swap_pair = [](auto &x, auto &y) constexpr { - auto tmp_first = x.first; - auto tmp_second = x.second; - x.first = y.first; - x.second = y.second; - y.first = tmp_first; - y.second = tmp_second; - }; - for (int i = 0; i < N; ++i) - for (int j = i; j < N; ++j) - if (to_sort[j].first < to_sort[i].first) - swap_pair(to_sort[i], to_sort[j]); - - std::array sorted_indices{}; - for (int i = 0; i < N; ++i) - sorted_indices[i] = to_sort[i].second; - - return sorted_indices; - }(); - - using type = properties_type_list< - nth_type_t...>; -}; - -// We support incomplete property_key_t, so need to wrap it. -template -struct property_key_tag : property_key_tag_base {}; - -// NOTE: each property_t subclass must provide -// -// static constexpr std::string_view -// property_name{" -struct property_base : property_key_tag { -protected: - using key_t = property_key_t; - constexpr property_t get_property_impl(property_key_tag) const { - // In fact, `static_cast` below works just fine with clang/msvc but not with - // gcc, see https://godbolt.org/z/MY6849jGh for a reduced test. However, we - // need to support all ,so special case for compile-time properties (when - // `is_empty_v` is true). - if constexpr (std::is_empty_v) { - return property_t{}; - } else { - return *static_cast(this); - } - } - - // For key_t access in error reporting specialization. - template - friend class __SYCL_EBO new_properties::properties; - -public: - static constexpr const char *ir_attribute_name = ""; - static constexpr std::nullptr_t ir_attribute_value = nullptr; - - // this_property_t is to disable ADL - properties{property{}} is inherited - // from property. - - template - friend constexpr std::enable_if_t< - std::is_same_v && - is_property_v, - decltype(properties{std::declval(), - std::declval()})> - operator+(const this_property_t &lhs, const other_property_t &rhs) { - return properties{lhs, rhs}; - } - - template - friend constexpr std::enable_if_t< - std::is_same_v, - properties>> - operator+(const this_property_t &lhs) { - return properties>{lhs}; - } -}; - -template -inline constexpr bool property_names_are_unique = []() constexpr { - if constexpr (sizeof...(property_tys) == 0) { - return true; - } else { - const std::array names = {property_tys::property_name...}; - auto N = names.size(); - for (int i = 0; i < N; ++i) - for (int j = i + 1; j < N; ++j) - if (names[i] == names[j]) - return false; - - return true; - } -}(); - -template -inline constexpr bool properties_are_sorted = []() constexpr { - if constexpr (sizeof...(property_tys) == 0) { - return true; - } else { - const std::array sort_names = {property_tys::property_name...}; - // std::is_sorted isn't constexpr until C++20. - // - // Sorting is an implementation detail while uniqueness of the - // property_name's is an API restriction. This internal check actually - // combines both conditions as we expect that user error is handled before - // the internal `properties_are_sorted` assert is checked. - for (std::size_t idx = 1; idx < sort_names.size(); ++idx) - if (sort_names[idx - 1] >= sort_names[idx]) - return false; - return true; - } -}(); -} // namespace detail - -// Empty property list. -template <> class __SYCL_EBO properties, void> { -public: - template static constexpr bool has_property() { return false; } - - // TODO: How does this work without qualified name? - template - friend constexpr std::enable_if_t< - is_property_v, - properties>> - operator+(const properties &, const other_property_t &rhs) { - return properties{rhs}; - } -}; - -// Base implementation to provide nice user error in case of mis-use. Without it -// an error "base class '' specified more than once as a direct base -// class" is reported prior to static_assert's error. -template -class __SYCL_EBO properties< - detail::properties_type_list, - std::enable_if_t>> { - static_assert((is_property_v && ...)); - - // This is a separate specialization to report an error, we can afford doing - // extra work to provide nice error message without sacrificing compile time - // on non-exceptional path. Let's find *a* pair of properties that failed the - // check. Note that there might be multiple duplicate names, we're only - // reporting one instance. Once user addresses that, the next pair will be - // reported. - static constexpr auto conflict = []() constexpr { - const std::array keys = {property_tys::property_name...}; - auto N = keys.size(); - for (int i = 0; i < N; ++i) - for (int j = i + 1; j < N; ++j) - if (keys[i] == keys[j]) - return std::pair{i, j}; - }(); - using first_type = detail::nth_type_t; - using second_type = detail::nth_type_t; - static_assert( - !std::is_same_v, - "Duplicate property!"); - static_assert(first_type::property_name != second_type::property_name, - "Property name collision between different property keys!"); - static_assert((is_property_v && ...)); -}; - -// NOTE: Meta-function to implement CTAD rules isn't allowed to return -// `properties` and it's impossible to return a pack as well. As -// such, we're forced to have an extra level of `detail::properties_type_list` -// for the purpose of providing CTAD rules. -template -class __SYCL_EBO properties< - detail::properties_type_list, - std::enable_if_t>> - : private property_tys... { - static_assert((is_property_v && ...)); - static_assert(detail::properties_are_sorted, - "Properties must be sorted!"); - using property_tys::get_property_impl...; - - template friend class __SYCL_EBO properties; - -public: - template && ...)) && - sizeof...(unsorted_property_tys) == sizeof...(property_tys)>> - constexpr properties(unsorted_property_tys... props) - : unsorted_property_tys(props)... {} - - // TODO: not sure if that is needed if we'd have operator| or operator+. - // TODO: sizeof... check. - template < - typename... other_property_list_tys, typename... other_property_tys, - typename = std::enable_if_t<((is_property_v && ...))>> - constexpr properties( - properties> - other_properties, - other_property_tys... props) - : other_property_list_tys( - static_cast(other_properties))..., - other_property_tys(props)... {} - - // TODO: Do we need this? If so, is separate CTAD needed? - // template - // properties(unsorted_property_tys &&...props) - // : unsorted_property_tys(std::forward(props))... - // {} - - template static constexpr bool has_property() { - return std::is_base_of_v, - properties>; - } - - // Two methods below do the following (pseudocode): - // - // template - // using ret_t = decltype(this->get_property_impl(key_tag{})); - // static constexpr auto get_property() requires(is_empty_v) { - // return ret_t{}; - // } - // constexpr auto get_property() const requires(!is_empty_v) { - // return get_property_impl(key_tag{}); - // } - template - static constexpr auto get_property() -> std::enable_if_t< - std::is_empty_v().get_property_impl( - detail::property_key_tag{}))>, - decltype(std::declval().get_property_impl( - detail::property_key_tag{}))> { - return decltype(std::declval().get_property_impl( - detail::property_key_tag{})){}; - } - - template - constexpr auto get_property() const -> std::enable_if_t< - !std::is_empty_v().get_property_impl( - detail::property_key_tag{}))>, - decltype(std::declval().get_property_impl( - detail::property_key_tag{}))> { - return get_property_impl(detail::property_key_tag{}); - } - - // TODO: Do we need separate `static` overload if we decide to keep this - // interface? - template - constexpr auto - get_property_or_default_to(default_property_t default_property) { - if constexpr (has_property()) - return get_property(); - else - return default_property; - } - - // TODO: Use more effective insert sort for single-property insertion. - - // Need to use qualified type to force CTAD instead of using *current* - // properties instantiation. - template - friend constexpr std::enable_if_t< - is_property_v, - decltype(ext::oneapi::experimental::new_properties::properties{ - std::declval()..., std::declval()})> - operator+(const properties &lhs, const other_property_t &rhs) { - return ext::oneapi::experimental::new_properties::properties{ - static_cast(lhs)..., rhs}; - } - - template - friend constexpr auto - operator+(const properties &lhs, - const ext::oneapi::experimental::new_properties::properties< - detail::properties_type_list> &rhs) { - return ext::oneapi::experimental::new_properties::properties{ - static_cast(lhs)..., - static_cast(rhs)...}; - } -}; - -template && ...))>> -properties(unsorted_property_tys...) - -> properties, - unsorted_property_tys...>::type>; - -template < - typename... other_property_list_tys, typename... other_property_tys, - typename = std::enable_if_t<((is_property_v && ...))>> -properties(properties>, - other_property_tys...) - -> properties, - other_property_list_tys..., other_property_tys...>::type>; - -using empty_properties_t = decltype(properties{}); - -template -struct all_properties_in : std::false_type {}; -template -struct all_properties_in< - properties>, - allowed_property_keys...> - : std::bool_constant<((sycl::detail::check_type_in_v< - property_tys, allowed_property_keys...> && - ...))> {}; - -template -inline constexpr bool all_properties_in_v = - all_properties_in, - allowed_property_keys...>::value; -} // namespace new_properties -} // namespace ext::oneapi::experimental -} // namespace _V1 -} // namespace sycl - namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { diff --git a/sycl/test/extensions/properties/new_properties.cpp b/sycl/test/extensions/properties/new_properties.cpp index 4814ba7d6e8ac..1ec2af62f2bcc 100644 --- a/sycl/test/extensions/properties/new_properties.cpp +++ b/sycl/test/extensions/properties/new_properties.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsycl -fsyntax-only %s -#include +#include using namespace sycl::ext::oneapi::experimental::new_properties; diff --git a/sycl/test/extensions/properties/new_properties_llvm_ir.cpp b/sycl/test/extensions/properties/new_properties_llvm_ir.cpp index 2143e011649e5..7a753e2bf6d64 100644 --- a/sycl/test/extensions/properties/new_properties_llvm_ir.cpp +++ b/sycl/test/extensions/properties/new_properties_llvm_ir.cpp @@ -3,7 +3,7 @@ // CHECK: @fg_int = linkonce_odr dso_local addrspace(1) global %struct.fake_device_global { i32 43 }, align 4 #[[ATTR:[0-9]*]] // CHECK: attributes #[[ATTR]] = { "llvm-ir-prop"="42" } -#include +#include using namespace sycl::ext::oneapi::experimental::new_properties; diff --git a/sycl/test/extensions/properties/new_properties_negative.cpp b/sycl/test/extensions/properties/new_properties_negative.cpp index eff82943520e1..62ebde37b3348 100644 --- a/sycl/test/extensions/properties/new_properties_negative.cpp +++ b/sycl/test/extensions/properties/new_properties_negative.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsycl -fsyntax-only %s -Xclang -verify -Xclang -verify-ignore-unexpected=note -#include +#include using namespace sycl::ext::oneapi::experimental::new_properties; @@ -45,18 +45,18 @@ struct prop : detail::property_base { } void test() { - // expected-error@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement '!std::is_same_v, property<1>>': Duplicate property!}} + // expected-error@sycl/ext/oneapi/properties/new_properties.hpp:* {{static assertion failed due to requirement '!std::is_same_v, property<1>>': Duplicate property!}} std::ignore = properties{property<1>{}, property<1>{}}; constexpr properties pl{property<1>{}, property<2>{}}; - // expected-error@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement '!std::is_same_v, property<2>>': Duplicate property!}} + // expected-error@sycl/ext/oneapi/properties/new_properties.hpp:* {{static assertion failed due to requirement '!std::is_same_v, property<2>>': Duplicate property!}} std::ignore = properties{pl, property<2>{}}; // Unfortunately, C++ front end doesn't use qualified name for "prop" below... - // expected-error@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement 'prop::property_name != prop::property_name': Property name collision between different property keys!}} + // expected-error@sycl/ext/oneapi/properties/new_properties.hpp:* {{static assertion failed due to requirement 'prop::property_name != prop::property_name': Property name collision between different property keys!}} std::ignore = properties{library_a::prop{}, library_b::prop{}}; - // expected-error@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement '!std::is_same_v': Duplicate property!}} + // expected-error@sycl/ext/oneapi/properties/new_properties.hpp:* {{static assertion failed due to requirement '!std::is_same_v': Duplicate property!}} std::ignore = properties{property_with_key<1>{}, property_with_key<2>{}}; } diff --git a/sycl/test/extensions/properties/new_properties_open_question.cpp b/sycl/test/extensions/properties/new_properties_open_question.cpp index 3447d6c84eeb6..ad1e27eef0e61 100644 --- a/sycl/test/extensions/properties/new_properties_open_question.cpp +++ b/sycl/test/extensions/properties/new_properties_open_question.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsycl -fsyntax-only %s -#include +#include using namespace sycl::ext::oneapi::experimental::new_properties; diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index d7141579c0f48..22126c64b1110 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -138,6 +138,7 @@ // CHECK-NEXT: ext/oneapi/properties/property_value.hpp // CHECK-NEXT: ext/oneapi/properties/property_utils.hpp // CHECK-NEXT: ext/oneapi/properties/properties.hpp +// CHECK-NEXT: ext/oneapi/properties/new_properties.hpp // CHECK-NEXT: ext/oneapi/experimental/graph.hpp // CHECK-NEXT: handler.hpp // CHECK-NEXT: detail/cl.h From 0c80e0e3c7041a3c49bbcd948851d23a399940a5 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 28 Oct 2024 09:51:10 -0700 Subject: [PATCH 23/32] Remove stuff: * Unary plus * "merge" ctor - binary operator+ is used instead * include of the new header in `properties.hpp` * simplify get_property_impl --- .../ext/oneapi/properties/new_properties.hpp | 47 +------------------ .../sycl/ext/oneapi/properties/properties.hpp | 2 +- .../extensions/properties/new_properties.cpp | 13 ++--- .../properties/new_properties_negative.cpp | 2 +- .../new_properties_open_question.cpp | 3 +- 5 files changed, 10 insertions(+), 57 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp b/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp index 714e15a39be8e..465963cd21f52 100644 --- a/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp @@ -117,15 +117,7 @@ struct property_base : property_key_tag { protected: using key_t = property_key_t; constexpr property_t get_property_impl(property_key_tag) const { - // In fact, `static_cast` below works just fine with clang/msvc but not with - // gcc, see https://godbolt.org/z/MY6849jGh for a reduced test. However, we - // need to support all ,so special case for compile-time properties (when - // `is_empty_v` is true). - if constexpr (std::is_empty_v) { - return property_t{}; - } else { - return *static_cast(this); - } + return *static_cast(this); } // For key_t access in error reporting specialization. @@ -148,14 +140,6 @@ struct property_base : property_key_tag { operator+(const this_property_t &lhs, const other_property_t &rhs) { return properties{lhs, rhs}; } - - template - friend constexpr std::enable_if_t< - std::is_same_v, - properties>> - operator+(const this_property_t &lhs) { - return properties>{lhs}; - } }; template @@ -266,25 +250,6 @@ class __SYCL_EBO properties< constexpr properties(unsorted_property_tys... props) : unsorted_property_tys(props)... {} - // TODO: not sure if that is needed if we'd have operator| or operator+. - // TODO: sizeof... check. - template < - typename... other_property_list_tys, typename... other_property_tys, - typename = std::enable_if_t<((is_property_v && ...))>> - constexpr properties( - properties> - other_properties, - other_property_tys... props) - : other_property_list_tys( - static_cast(other_properties))..., - other_property_tys(props)... {} - - // TODO: Do we need this? If so, is separate CTAD needed? - // template - // properties(unsorted_property_tys &&...props) - // : unsorted_property_tys(std::forward(props))... - // {} - template static constexpr bool has_property() { return std::is_base_of_v, properties>; @@ -363,16 +328,6 @@ properties(unsorted_property_tys...) std::make_integer_sequence, unsorted_property_tys...>::type>; -template < - typename... other_property_list_tys, typename... other_property_tys, - typename = std::enable_if_t<((is_property_v && ...))>> -properties(properties>, - other_property_tys...) - -> properties, - other_property_list_tys..., other_property_tys...>::type>; - using empty_properties_t = decltype(properties{}); template diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index 6281a0afa6683..1e195e6a521fd 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -9,7 +9,7 @@ #pragma once #include -#include +// #include #include // for IsRuntimePr... #include // for Sorted, Mer... #include // for property_value diff --git a/sycl/test/extensions/properties/new_properties.cpp b/sycl/test/extensions/properties/new_properties.cpp index 1ec2af62f2bcc..0628621b45193 100644 --- a/sycl/test/extensions/properties/new_properties.cpp +++ b/sycl/test/extensions/properties/new_properties.cpp @@ -71,15 +71,15 @@ template void test(std::integer_sequence) { } } // namespace bench -namespace test_merge_ctor { +namespace test_operator_plus { template struct property : named_property_base> {}; constexpr properties pl1{property<1>{}, property<2>{}, property<3>{}}; -constexpr properties pl2{pl1, property<4>{}}; +constexpr properties pl2 = pl1 + properties{property<4>{}}; static_assert(!pl1.has_property>()); static_assert(pl2.has_property>()); static_assert(pl2.has_property>()); -} // namespace test_merge_ctor +} // namespace test_operator_plus namespace test_compile_prop_in_runtime_list { template @@ -138,11 +138,8 @@ static_assert( static_assert( std::is_same_v); -static_assert(std::is_same_v); -static_assert(std::is_same_v); - -static_assert(std::is_same_v); -static_assert(std::is_same_v); +static_assert(std::is_same_v); +static_assert(std::is_same_v); static_assert(std::is_same_v); } diff --git a/sycl/test/extensions/properties/new_properties_negative.cpp b/sycl/test/extensions/properties/new_properties_negative.cpp index 62ebde37b3348..f6e2004ba08fa 100644 --- a/sycl/test/extensions/properties/new_properties_negative.cpp +++ b/sycl/test/extensions/properties/new_properties_negative.cpp @@ -50,7 +50,7 @@ void test() { constexpr properties pl{property<1>{}, property<2>{}}; // expected-error@sycl/ext/oneapi/properties/new_properties.hpp:* {{static assertion failed due to requirement '!std::is_same_v, property<2>>': Duplicate property!}} - std::ignore = properties{pl, property<2>{}}; + std::ignore = pl + properties{property<2>{}}; // Unfortunately, C++ front end doesn't use qualified name for "prop" below... // expected-error@sycl/ext/oneapi/properties/new_properties.hpp:* {{static assertion failed due to requirement 'prop::property_name != prop::property_name': Property name collision between different property keys!}} diff --git a/sycl/test/extensions/properties/new_properties_open_question.cpp b/sycl/test/extensions/properties/new_properties_open_question.cpp index ad1e27eef0e61..f7ebfd3041dfb 100644 --- a/sycl/test/extensions/properties/new_properties_open_question.cpp +++ b/sycl/test/extensions/properties/new_properties_open_question.cpp @@ -74,7 +74,8 @@ inline constexpr other_prop_property other_prop{}; void bar() { // Unary `+` has a bit of hacky feeling... - foo(+prop<42>); + // foo(+prop<42>); + // More than one property in a property list looks very natural. // Alternatively, that can be `operator|` but it has no unary version. foo(prop<42> + other_prop); From a6f440ba9fd2624fcfd816602df2e87680483de2 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 28 Oct 2024 10:17:21 -0700 Subject: [PATCH 24/32] `detail::key` helper --- .../sycl/ext/oneapi/properties/new_properties.hpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp b/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp index 465963cd21f52..8681ac1713843 100644 --- a/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp @@ -176,6 +176,9 @@ inline constexpr bool properties_are_sorted = []() constexpr { return true; } }(); + +template +inline constexpr property_key_tag key{}; } // namespace detail // Empty property list. @@ -268,20 +271,20 @@ class __SYCL_EBO properties< template static constexpr auto get_property() -> std::enable_if_t< std::is_empty_v().get_property_impl( - detail::property_key_tag{}))>, + detail::key))>, decltype(std::declval().get_property_impl( - detail::property_key_tag{}))> { + detail::key))> { return decltype(std::declval().get_property_impl( - detail::property_key_tag{})){}; + detail::key)){}; } template constexpr auto get_property() const -> std::enable_if_t< !std::is_empty_v().get_property_impl( - detail::property_key_tag{}))>, + detail::key))>, decltype(std::declval().get_property_impl( - detail::property_key_tag{}))> { - return get_property_impl(detail::property_key_tag{}); + detail::key))> { + return get_property_impl(detail::key); } // TODO: Do we need separate `static` overload if we decide to keep this From 46dae6e295a722bfd99862aaaf7a2364f4a40f99 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 28 Oct 2024 10:19:34 -0700 Subject: [PATCH 25/32] Drop `get_property_or_default_to` for now --- .../sycl/ext/oneapi/properties/new_properties.hpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp b/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp index 8681ac1713843..fb9a23f9c2675 100644 --- a/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp @@ -287,17 +287,6 @@ class __SYCL_EBO properties< return get_property_impl(detail::key); } - // TODO: Do we need separate `static` overload if we decide to keep this - // interface? - template - constexpr auto - get_property_or_default_to(default_property_t default_property) { - if constexpr (has_property()) - return get_property(); - else - return default_property; - } - // TODO: Use more effective insert sort for single-property insertion. // Need to use qualified type to force CTAD instead of using *current* From 07ac818193b2b3b8e2c8092b64aa5d99c45f9651 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 28 Oct 2024 10:33:57 -0700 Subject: [PATCH 26/32] Support for implicit key --- .../ext/oneapi/properties/new_properties.hpp | 76 +++++++++++++------ .../extensions/properties/new_properties.cpp | 50 +++++++++++- 2 files changed, 100 insertions(+), 26 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp b/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp index fb9a23f9c2675..d77758742e66f 100644 --- a/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/new_properties.hpp @@ -142,6 +142,10 @@ struct property_base : property_key_tag { } }; +template