diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_reduction_properties.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_reduction_properties.asciidoc similarity index 85% rename from sycl/doc/extensions/proposed/sycl_ext_oneapi_reduction_properties.asciidoc rename to sycl/doc/extensions/experimental/sycl_ext_oneapi_reduction_properties.asciidoc index f025e3dc805cf..38e56117ee84f 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_reduction_properties.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_reduction_properties.asciidoc @@ -49,12 +49,12 @@ This extension also depends on the following other SYCL extensions: == Status -This is a proposed extension specification, intended to gather community -feedback. Interfaces defined in this specification may not be implemented yet -or may be in a preliminary state. The specification itself may also change in -incompatible ways before it is finalized. *Shipping software products should -not rely on APIs defined in this specification.* - +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. +*Shipping software products should not rely on APIs defined in this +specification.* == Overview @@ -87,7 +87,8 @@ implementation supports. |Description |1 -|Initial version of this extension. +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. |=== === `reduction` overload @@ -111,7 +112,7 @@ template __unspecified__ reduction(T* var, BinaryOperation combiner, PropertyList properties); -template +template __unspecified__ reduction(span vars, BinaryOperation combiner, PropertyList properties); @@ -124,7 +125,7 @@ template __unspecified__ reduction(T* var, const T& identity, BinaryOperation combiner, PropertyList properties); -template +template __unspecified__ reduction(span vars, const T& identity, BinaryOperation combiner, PropertyList properties); @@ -132,6 +133,17 @@ __unspecified__ reduction(span vars, const T& identity, } ---- +_Constraints_: Available only when `PropertyList` is an instance of +`sycl::ext::oneapi::experimental::properties` which contains no properties +other than those listed below in the section "Reduction properties". + +The `reduction` functions that take no `identity` parameter have the following +clause: + +_Mandates_: If `properties` contains the `initialize_to_identity` property, +then the identity of the `BinaryOperation` operation must be identifiable via +the `known_identity` trait class. + === Reduction properties New `reduction` properties are introduced to allow developers to constrain @@ -139,13 +151,9 @@ reduction algorithm selection based on desired behavior(s). Compile-time properties corresponding to existing runtime properties are also introduced to ensure that all information can be passed via a single property list. -If a reduction kernel is submitted to a device that cannot satisfy the -request for specific reduction behavior(s), the implementation must throw an -`exception` with the `errc::feature_not_supported` error code. - [source,c++] ---- -namespace sycl::ext::oneapi { +namespace sycl::ext::oneapi::experimental { struct deterministic_key { using value_t = property_value; @@ -192,7 +200,7 @@ use of atomic operations, etc. _{endnote}_] [source,c++] ---- -using syclex = sycl::ext::oneapi::experimental; +namespace syclex = sycl::ext::oneapi::experimental; float sum(sycl::queue q, float* input, size_t N) { @@ -205,10 +213,10 @@ float sum(sycl::queue q, float* input, size_t N) { h.parallel_for(N, reduction, [=](size_t i, auto& reducer) { reducer += input[i]; }); - } + }); } return result; - + } ... diff --git a/sycl/include/sycl/ext/oneapi/experimental/reduction_properties.hpp b/sycl/include/sycl/ext/oneapi/experimental/reduction_properties.hpp new file mode 100644 index 0000000000000..a96378c522f82 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/reduction_properties.hpp @@ -0,0 +1,143 @@ +//==------- properties.hpp - SYCL properties associated with reductions ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once +#define SYCL_EXT_ONEAPI_REDUCTION_PROPERTIES + +#include +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext { +namespace oneapi { +namespace experimental { + +struct deterministic_key + : detail::compile_time_property_key { + using value_t = property_value; +}; +inline constexpr deterministic_key::value_t deterministic; + +struct initialize_to_identity_key + : detail::compile_time_property_key< + detail::PropKind::InitializeToIdentity> { + using value_t = property_value; +}; +inline constexpr initialize_to_identity_key::value_t initialize_to_identity; + +} // namespace experimental +} // namespace oneapi +} // namespace ext + +namespace detail { + +template +auto WrapOp(BinaryOperation combiner, PropertyList properties) { + if constexpr (properties.template has_property< + ext::oneapi::experimental::deterministic_key>()) { + return DeterministicOperatorWrapper(combiner); + } else { + return combiner; + } +} + +template +void CheckReductionIdentity(PropertyList properties) { + if constexpr (properties.template has_property< + ext::oneapi::experimental::initialize_to_identity_key>()) { + static_assert(has_known_identity_v, + "initialize_to_identity requires an identity value."); + } +} + +template +property_list GetReductionPropertyList(PropertyList properties) { + if constexpr (properties.template has_property< + ext::oneapi::experimental::initialize_to_identity_key>()) { + return sycl::property::reduction::initialize_to_identity{}; + } + return {}; +} + +template struct DeterministicOperatorWrapper { + + DeterministicOperatorWrapper(BinaryOperation BinOp = BinaryOperation()) + : BinOp(BinOp) {} + + template + std::invoke_result_t operator()(Args... args) { + return BinOp(std::forward(args)...); + } + + BinaryOperation BinOp; +}; + +template +struct IsDeterministicOperator> + : std::true_type {}; + +} // namespace detail + +template +auto reduction(BufferT vars, handler &cgh, BinaryOperation combiner, + PropertyList properties) { + detail::CheckReductionIdentity( + properties); + auto WrappedOp = detail::WrapOp(combiner, properties); + auto RuntimeProps = detail::GetReductionPropertyList(properties); + return reduction(vars, cgh, WrappedOp, RuntimeProps); +} + +template +auto reduction(T *var, BinaryOperation combiner, PropertyList properties) { + detail::CheckReductionIdentity(properties); + auto WrappedOp = detail::WrapOp(combiner, properties); + auto RuntimeProps = detail::GetReductionPropertyList(properties); + return reduction(var, WrappedOp, RuntimeProps); +} + +template +auto reduction(span vars, BinaryOperation combiner, + PropertyList properties) { + detail::CheckReductionIdentity(properties); + auto WrappedOp = detail::WrapOp(combiner, properties); + auto RuntimeProps = detail::GetReductionPropertyList(properties); + return reduction(vars, WrappedOp, RuntimeProps); +} + +template +auto reduction(BufferT vars, handler &cgh, + const typename BufferT::value_type &identity, + BinaryOperation combiner, PropertyList properties) { + auto WrappedOp = detail::WrapOp(combiner, properties); + auto RuntimeProps = detail::GetReductionPropertyList(properties); + return reduction(vars, cgh, identity, WrappedOp, RuntimeProps); +} + +template +auto reduction(T *var, const T &identity, BinaryOperation combiner, + PropertyList properties) { + auto WrappedOp = detail::WrapOp(combiner, properties); + auto RuntimeProps = detail::GetReductionPropertyList(properties); + return reduction(var, identity, WrappedOp, RuntimeProps); +} + +template +auto reduction(span vars, const T &identity, + BinaryOperation combiner, PropertyList properties) { + auto WrappedOp = detail::WrapOp(combiner, properties); + auto RuntimeProps = detail::GetReductionPropertyList(properties); + return reduction(vars, identity, WrappedOp, RuntimeProps); +} + +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 351bac8044d98..2a9ae7f5dbab3 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -215,8 +215,10 @@ enum PropKind : uint32_t { MaxWorkGroupSize = 74, MaxLinearWorkGroupSize = 75, Prefetch = 76, + Deterministic = 77, + InitializeToIdentity = 78, // PropKindSize must always be the last value. - PropKindSize = 77, + PropKindSize = 79, }; struct property_key_base_tag {}; diff --git a/sycl/include/sycl/known_identity.hpp b/sycl/include/sycl/known_identity.hpp index 32575b94faccf..3aecad3188e49 100644 --- a/sycl/include/sycl/known_identity.hpp +++ b/sycl/include/sycl/known_identity.hpp @@ -25,54 +25,48 @@ namespace sycl { inline namespace _V1 { namespace detail { +// Forward declaration for deterministic reductions. +template struct DeterministicOperatorWrapper; + +template class... KnownOperation> +using IsKnownOp = std::bool_constant<( + (std::is_same_v> || + std::is_same_v> || + std::is_same_v>> || + std::is_same_v>>) || + ...)>; + template -using IsPlus = - std::bool_constant> || - std::is_same_v>>; +using IsPlus = IsKnownOp; template -using IsMultiplies = - std::bool_constant> || - std::is_same_v>>; +using IsMultiplies = IsKnownOp; template -using IsMinimum = - std::bool_constant> || - std::is_same_v>>; +using IsMinimum = IsKnownOp; template -using IsMaximum = - std::bool_constant> || - std::is_same_v>>; +using IsMaximum = IsKnownOp; template -using IsBitAND = - std::bool_constant> || - std::is_same_v>>; +using IsBitAND = IsKnownOp; template -using IsBitOR = - std::bool_constant> || - std::is_same_v>>; +using IsBitOR = IsKnownOp; template -using IsBitXOR = - std::bool_constant> || - std::is_same_v>>; +using IsBitXOR = IsKnownOp; template -using IsLogicalAND = std::bool_constant< - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v>>; +using IsLogicalAND = + IsKnownOp; template using IsLogicalOR = - std::bool_constant> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v>>; + IsKnownOp; // Use SFINAE so that the "true" branch could be implemented in // include/sycl/stl_wrappers/complex that would only be available if STL's diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 6f4df586ef478..f84940d81d162 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -77,20 +77,24 @@ template struct IsDeterministicOperator : std::true_type {}; +#else +// Each operator declares whether determinism is required. +template struct IsDeterministicOperator : std::false_type {}; +#endif + // This type trait is used to detect if the atomic operation BinaryOperation // used with operands of the type T is available for using in reduction. // The order in which the atomic operations are performed may be arbitrary and // thus may cause different results from run to run even on the same elements -// and on same device. The macro SYCL_REDUCTION_DETERMINISTIC prohibits using -// atomic operations for reduction and helps to produce stable results. -// SYCL_REDUCTION_DETERMINISTIC is a short term solution, which perhaps become -// deprecated eventually and is replaced by a sycl property passed to reduction. +// and on same device. template using IsReduOptForFastAtomicFetch = -#ifdef SYCL_REDUCTION_DETERMINISTIC - std::bool_constant; -#else - std::bool_constant<((is_sgenfloat_v && sizeof(T) == 4) || + std::bool_constant::value && + ((is_sgenfloat_v && sizeof(T) == 4) || is_sgeninteger_v) && IsValidAtomicType::value && (IsPlus::value || @@ -99,44 +103,33 @@ using IsReduOptForFastAtomicFetch = IsBitOR::value || IsBitXOR::value || IsBitAND::value)>; -#endif // This type trait is used to detect if the atomic operation BinaryOperation // used with operands of the type T is available for using in reduction, in // addition to the cases covered by "IsReduOptForFastAtomicFetch", if the device // has the atomic64 aspect. This type trait should only be used if the device // has the atomic64 aspect. Note that this type trait is currently a subset of -// IsReduOptForFastReduce. The macro SYCL_REDUCTION_DETERMINISTIC prohibits -// using the reduce_over_group() algorithm to produce stable results across same -// type devices. +// IsReduOptForFastReduce. template using IsReduOptForAtomic64Op = -#ifdef SYCL_REDUCTION_DETERMINISTIC - std::bool_constant; -#else - std::bool_constant<(IsPlus::value || + std::bool_constant::value && + (IsPlus::value || IsMinimum::value || IsMaximum::value) && is_sgenfloat_v && sizeof(T) == 8>; -#endif // This type trait is used to detect if the group algorithm reduce() used with // operands of the type T and the operation BinaryOperation is available // for using in reduction. -// The macro SYCL_REDUCTION_DETERMINISTIC prohibits using the reduce() algorithm -// to produce stable results across same type devices. template using IsReduOptForFastReduce = -#ifdef SYCL_REDUCTION_DETERMINISTIC - std::bool_constant; -#else - std::bool_constant<((is_sgeninteger_v && + std::bool_constant::value && + ((is_sgeninteger_v && (sizeof(T) == 4 || sizeof(T) == 8)) || is_sgenfloat_v) && (IsPlus::value || IsMinimum::value || IsMaximum::value)>; -#endif // std::tuple seems to be a) too heavy and b) not copyable to device now // Thus sycl::detail::tuple is used instead. diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 8c3c5c388cf4c..846c4fb175c38 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -98,6 +98,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/test-e2e/Reduction/reduction_deterministic.cpp b/sycl/test-e2e/Reduction/reduction_deterministic.cpp new file mode 100644 index 0000000000000..d4b319ddbf657 --- /dev/null +++ b/sycl/test-e2e/Reduction/reduction_deterministic.cpp @@ -0,0 +1,48 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include + +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +float sum(sycl::queue q, float *array, size_t N) { + + sycl::buffer input_buf{array, N}; + sycl::buffer result_buf{1}; + + sycl::host_accessor{result_buf}[0] = 0; + + q.submit([&](sycl::handler &h) { + auto input = sycl::accessor(input_buf, h, sycl::read_only); + auto reduction = sycl::reduction(result_buf, h, sycl::plus<>(), + syclex::properties(syclex::deterministic)); + h.parallel_for(N, reduction, + [=](size_t i, auto &reducer) { reducer += input[i]; }); + }); + + return sycl::host_accessor{result_buf}[0]; +} + +int main(int argc, char *argv[]) { + + constexpr size_t N = 1024; + std::array array; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dist(0.0f, 1.0f); + std::generate(array.begin(), array.end(), [&]() { return dist(gen); }); + + sycl::queue q; + float x = sum(q, array.data(), N); + float y = sum(q, array.data(), N); + + // NB: determinism guarantees bitwise reproducible reductions for floats + assert(sycl::bit_cast(x) == sycl::bit_cast(y)); +} diff --git a/sycl/test-e2e/Reduction/reduction_initialize_to_identity.cpp b/sycl/test-e2e/Reduction/reduction_initialize_to_identity.cpp new file mode 100644 index 0000000000000..93225a21a8b00 --- /dev/null +++ b/sycl/test-e2e/Reduction/reduction_initialize_to_identity.cpp @@ -0,0 +1,42 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include + +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +int sum(sycl::queue q, int *array, size_t N) { + + sycl::buffer input_buf{array, N}; + sycl::buffer result_buf{1}; + + sycl::host_accessor{result_buf}[0] = 42; + + q.submit([&](sycl::handler &h) { + auto input = sycl::accessor(input_buf, h, sycl::read_only); + auto reduction = + sycl::reduction(result_buf, h, sycl::plus<>(), + syclex::properties(syclex::initialize_to_identity)); + h.parallel_for(N, reduction, + [=](size_t i, auto &reducer) { reducer += input[i]; }); + }); + + return sycl::host_accessor{result_buf}[0]; +} + +int main(int argc, char *argv[]) { + + constexpr size_t N = 32; + std::array array; + std::iota(array.begin(), array.end(), 1); + + sycl::queue q; + int x = sum(q, array.data(), N); + assert(x == 528); +}