diff --git a/sycl/include/sycl/detail/cg_types.hpp b/sycl/include/sycl/detail/cg_types.hpp index 034d93382d202..3368e333ac94d 100644 --- a/sycl/include/sycl/detail/cg_types.hpp +++ b/sycl/include/sycl/detail/cg_types.hpp @@ -235,8 +235,66 @@ class HostKernel : public HostKernelBase { #endif }; +// the class keeps reference to a lambda allocated externally on stack +class HostKernelRefBase : public HostKernelBase { +public: + HostKernelRefBase() = default; + HostKernelRefBase(const HostKernelRefBase &) = delete; + HostKernelRefBase &operator=(const HostKernelRefBase &) = delete; + + virtual std::unique_ptr takeOrCopyOwnership() const = 0; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // The kernels that are passed via HostKernelRefBase are instantiated along + // ctor call with GetInstantiateKernelOnHostPtr(). + void InstantiateKernelOnHost() override {} +#endif +}; + +// Primary template for movable objects. +template +class HostKernelRef : public HostKernelRefBase { + KernelType &&MKernel; + +public: + HostKernelRef(KernelType &&Kernel) : MKernel(std::move(Kernel)) {} + HostKernelRef(const KernelType &Kernel) = delete; + + virtual char *getPtr() override { return reinterpret_cast(&MKernel); } + virtual std::unique_ptr takeOrCopyOwnership() const override { + std::unique_ptr Kernel; + Kernel.reset( + new HostKernel(std::move(MKernel))); + return Kernel; + } + + ~HostKernelRef() noexcept override = default; +}; + +// Specialization for copyable objects. +template +class HostKernelRef + : public HostKernelRefBase { + const KernelType &MKernel; + +public: + HostKernelRef(const KernelType &Kernel) : MKernel(Kernel) {} + + virtual char *getPtr() override { + return const_cast(reinterpret_cast(&MKernel)); + } + virtual std::unique_ptr takeOrCopyOwnership() const override { + std::unique_ptr Kernel; + Kernel.reset(new HostKernel(MKernel)); + return Kernel; + } + + ~HostKernelRef() noexcept override = default; +}; + // This function is needed for host-side compilation to keep kernels -// instantitated. This is important for debuggers to be able to associate +// instantiated. This is important for debuggers to be able to associate // kernel code instructions with source code lines. template constexpr void *GetInstantiateKernelOnHostPtr() { diff --git a/sycl/include/sycl/khr/free_function_commands.hpp b/sycl/include/sycl/khr/free_function_commands.hpp index c0d2afc4115ad..31464ba588dfc 100644 --- a/sycl/include/sycl/khr/free_function_commands.hpp +++ b/sycl/include/sycl/khr/free_function_commands.hpp @@ -149,44 +149,48 @@ void launch_grouped(handler &h, range<3> r, range<3> size, } template -void launch_grouped(const queue &q, range<1> r, range<1> size, - const KernelType &k, +constexpr bool enable_kernel_function_overload = + !std::is_same_v, sycl::kernel>; + +template >> +void launch_grouped(const queue &q, range<1> r, range<1> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { #ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - detail::submit_kernel_direct(q, - ext::oneapi::experimental::empty_properties_t{}, - nd_range<1>(r, size), k); + detail::submit_kernel_direct( + q, ext::oneapi::experimental::empty_properties_t{}, nd_range<1>(r, size), + std::forward(k)); #else submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, codeLoc); #endif } -template -void launch_grouped(const queue &q, range<2> r, range<2> size, - const KernelType &k, +template >> +void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { #ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - detail::submit_kernel_direct(q, - ext::oneapi::experimental::empty_properties_t{}, - nd_range<2>(r, size), k); + detail::submit_kernel_direct( + q, ext::oneapi::experimental::empty_properties_t{}, nd_range<2>(r, size), + std::forward(k)); #else submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, codeLoc); #endif } -template -void launch_grouped(const queue &q, range<3> r, range<3> size, - const KernelType &k, +template >> +void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k, const sycl::detail::code_location &codeLoc = sycl::detail::code_location::current()) { #ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT - detail::submit_kernel_direct(q, - ext::oneapi::experimental::empty_properties_t{}, - nd_range<3>(r, size), k); + detail::submit_kernel_direct( + q, ext::oneapi::experimental::empty_properties_t{}, nd_range<3>(r, size), + std::forward(k)); #else submit( q, [&](handler &h) { launch_grouped(h, r, size, k); }, diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index c610632e4c673..69911bec229fc 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -65,14 +65,14 @@ auto get_native(const SyclObjectT &Obj) template event __SYCL_EXPORT submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, - std::shared_ptr &HostKernel, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, - std::shared_ptr &HostKernel, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc); @@ -157,10 +157,10 @@ class __SYCL_EXPORT SubmissionInfo { } // namespace v1 template + typename PropertiesT, typename KernelTypeUniversalRef, int Dims> auto submit_kernel_direct( const queue &Queue, PropertiesT Props, const nd_range &Range, - const KernelType &KernelFunc, + KernelTypeUniversalRef &&KernelFunc, const detail::code_location &CodeLoc = detail::code_location::current()) { // TODO Properties not supported yet (void)Props; @@ -170,6 +170,9 @@ auto submit_kernel_direct( "Setting properties not supported yet for no-CGH kernel submit."); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + using KernelType = + std::remove_const_t>; + using NameT = typename detail::get_kernel_name_t::name; using LambdaArgType = @@ -180,15 +183,23 @@ auto submit_kernel_direct( "must be either sycl::nd_item or be convertible from sycl::nd_item"); using TransformedArgType = sycl::nd_item; - std::shared_ptr HostKernel = std::make_shared< - detail::HostKernel>(KernelFunc); + detail::KernelWrapper::wrap(KernelFunc); + + HostKernelRef + HostKernel(std::forward(KernelFunc)); + + // Instantiating the kernel on the host improves debugging. + // Passing this pointer to another translation unit prevents optimization. +#ifndef NDEBUG + // TODO: call library to prevent dropping call due to optimization + (void) + detail::GetInstantiateKernelOnHostPtr(); +#endif detail::DeviceKernelInfo *DeviceKernelInfoPtr = &detail::getDeviceKernelInfo(); - detail::KernelWrapper::wrap(KernelFunc); - if constexpr (EventNeeded) { return submit_kernel_direct_with_event_impl( Queue, Range, HostKernel, DeviceKernelInfoPtr, diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 79769d8819000..c6f1374e2b4de 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -421,15 +421,17 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, } detail::EventImplPtr queue_impl::submit_kernel_direct_impl( - const NDRDescT &NDRDesc, - std::shared_ptr &HostKernel, + const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { KernelData KData; + std::shared_ptr HostKernelPtr = + HostKernel.takeOrCopyOwnership(); + KData.setDeviceKernelInfoPtr(DeviceKernelInfo); - KData.setKernelFunc(HostKernel->getPtr()); + KData.setKernelFunc(HostKernelPtr->getPtr()); KData.setNDRDesc(NDRDesc); auto SubmitKernelFunc = @@ -441,7 +443,7 @@ detail::EventImplPtr queue_impl::submit_kernel_direct_impl( KData.extractArgsAndReqsFromLambda(); CommandGroup.reset(new detail::CGExecKernel( - KData.getNDRDesc(), HostKernel, + KData.getNDRDesc(), std::move(HostKernelPtr), nullptr, // Kernel nullptr, // KernelBundle std::move(CGData), std::move(KData).getArgs(), diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index c3d6748695423..0f6348a3a1444 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -361,8 +361,7 @@ class queue_impl : public std::enable_shared_from_this { template event submit_kernel_direct_with_event( - const nd_range &Range, - std::shared_ptr &HostKernel, + const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { detail::EventImplPtr EventImpl = @@ -373,8 +372,7 @@ class queue_impl : public std::enable_shared_from_this { template void submit_kernel_direct_without_event( - const nd_range &Range, - std::shared_ptr &HostKernel, + const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, @@ -905,8 +903,7 @@ class queue_impl : public std::enable_shared_from_this { /// /// \return a SYCL event representing submitted command group or nullptr. detail::EventImplPtr submit_kernel_direct_impl( - const NDRDescT &NDRDesc, - std::shared_ptr &HostKernel, + const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, const detail::code_location &CodeLoc, bool IsTopCodeLoc); diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 429efc16053cc..f34da47852266 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -474,7 +474,7 @@ const property_list &queue::getPropList() const { return impl->getPropList(); } template event submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, - std::shared_ptr &HostKernel, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { return getSyclObjImpl(Queue)->submit_kernel_direct_with_event( @@ -483,26 +483,26 @@ event submit_kernel_direct_with_event_impl( template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<1>( const queue &Queue, const nd_range<1> &Range, - std::shared_ptr &HostKernel, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<2>( const queue &Queue, const nd_range<2> &Range, - std::shared_ptr &HostKernel, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<3>( const queue &Queue, const nd_range<3> &Range, - std::shared_ptr &HostKernel, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, - std::shared_ptr &HostKernel, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { getSyclObjImpl(Queue)->submit_kernel_direct_without_event( @@ -511,19 +511,19 @@ void submit_kernel_direct_without_event_impl( template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<1>( const queue &Queue, const nd_range<1> &Range, - std::shared_ptr &HostKernel, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<2>( const queue &Queue, const nd_range<2> &Range, - std::shared_ptr &HostKernel, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<3>( const queue &Queue, const nd_range<3> &Range, - std::shared_ptr &HostKernel, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc); diff --git a/sycl/test/abi/layout_host_kernel_ref.cpp b/sycl/test/abi/layout_host_kernel_ref.cpp new file mode 100644 index 0000000000000..8a2a5b7e6a39a --- /dev/null +++ b/sycl/test/abi/layout_host_kernel_ref.cpp @@ -0,0 +1,15 @@ +// RUN: %clangxx -fsycl -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -o %t.out | FileCheck %s +// REQUIRES: linux +// UNSUPPORTED: libcxx + +// clang-format off + +#include + +void foo(sycl::detail::HostKernelRefBase *) {} + +// CHECK: 0 | class sycl::detail::HostKernelRefBase +// CHECK-NEXT: 0 | class sycl::detail::HostKernelBase (primary base) +// CHECK-NEXT: 0 | (HostKernelBase vtable pointer) +// CHECK-NEXT: | [sizeof=8, dsize=8, align=8, +// CHECK-NEXT: | nvsize=8, nvalign=8] diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index ec2a21ef34424..f9a2adb393f4b 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2985,12 +2985,12 @@ _ZN4sycl3_V121__isgreaterequal_implEdd _ZN4sycl3_V121__isgreaterequal_implEff _ZN4sycl3_V122accelerator_selector_vERKNS0_6deviceE _ZN4sycl3_V128verifyUSMAllocatorPropertiesERKNS0_13property_listE -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERSt10shared_ptrINS0_6detail14HostKernelBaseEEPNSB_16DeviceKernelInfoERKNSB_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERSt10shared_ptrINS0_6detail14HostKernelBaseEEPNSB_16DeviceKernelInfoERKNSB_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERSt10shared_ptrINS0_6detail14HostKernelBaseEEPNSB_16DeviceKernelInfoERKNSB_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERSt10shared_ptrINS0_6detail14HostKernelBaseEEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERSt10shared_ptrINS0_6detail14HostKernelBaseEEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERSt10shared_ptrINS0_6detail14HostKernelBaseEEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 8eed8b8dba437..7506f4fec3f22 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -286,12 +286,12 @@ ??$is_image_handle_supported@Usampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVdevice@34@AEBVcontext@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z -??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAV?$shared_ptr@VHostKernelBase@detail@_V1@sycl@@@std@@PEAVDeviceKernelInfo@detail@01@AEBUcode_location@801@_N@Z -??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAV?$shared_ptr@VHostKernelBase@detail@_V1@sycl@@@std@@PEAVDeviceKernelInfo@detail@01@AEBUcode_location@801@_N@Z -??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAV?$shared_ptr@VHostKernelBase@detail@_V1@sycl@@@std@@PEAVDeviceKernelInfo@detail@01@AEBUcode_location@801@_N@Z -??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAV?$shared_ptr@VHostKernelBase@detail@_V1@sycl@@@std@@PEAVDeviceKernelInfo@detail@01@AEBUcode_location@701@_N@Z -??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAV?$shared_ptr@VHostKernelBase@detail@_V1@sycl@@@std@@PEAVDeviceKernelInfo@detail@01@AEBUcode_location@701@_N@Z -??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAV?$shared_ptr@VHostKernelBase@detail@_V1@sycl@@@std@@PEAVDeviceKernelInfo@detail@01@AEBUcode_location@701@_N@Z +??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBUcode_location@601@_N@Z ??$update_nd_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$00@45@@Z ??$update_nd_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$01@45@@Z ??$update_nd_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$02@45@@Z diff --git a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp index 1f6a73df055c2..360bdca27e73f 100644 --- a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp +++ b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp @@ -21,6 +21,22 @@ class TestFunctor { void operator()(sycl::item<1>) const {} void operator()(sycl::nd_item<1> Item) const {} }; + +class TestMoveFunctor { +public: + static int MoveCtorCalls; + + TestMoveFunctor() = default; + TestMoveFunctor(const TestMoveFunctor &) = default; + TestMoveFunctor(TestMoveFunctor &&) { ++MoveCtorCalls; } + void operator()() const {} + void operator()(sycl::item<1>) const {} + void operator()(sycl::nd_item<1> Item) const {} + void operator()(sycl::nd_item<3> Item) const {} +}; + +int TestMoveFunctor::MoveCtorCalls = 0; + namespace sycl { inline namespace _V1 { namespace detail { @@ -28,20 +44,28 @@ template <> struct KernelInfo : public unittest::MockKernelInfoBase { static constexpr const char *getName() { return "TestFunctor"; } static constexpr int64_t getKernelSize() { return sizeof(TestFunctor); } - static constexpr const char *getFileName() { return "TestFunctor.hpp"; } +}; + +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "TestMoveFunctor"; } + static constexpr int64_t getKernelSize() { return sizeof(TestMoveFunctor); } + static constexpr const char *getFileName() { return "TestMoveFunctor.hpp"; } static constexpr const char *getFunctionName() { - return "TestFunctorFunctionName"; + return "TestMoveFunctorFunctionName"; } static constexpr unsigned getLineNumber() { return 13; } static constexpr unsigned getColumnNumber() { return 8; } }; + } // namespace detail } // namespace _V1 } // namespace sycl -static sycl::unittest::MockDeviceImage Img = - sycl::unittest::generateDefaultImage({"TestFunctor"}); -static sycl::unittest::MockDeviceImageArray<1> ImgArray{&Img}; +static sycl::unittest::MockDeviceImage Imgs[2] = { + sycl::unittest::generateDefaultImage({"TestFunctor"}), + sycl::unittest::generateDefaultImage({"TestMoveFunctor"})}; +static sycl::unittest::MockDeviceImageArray<2> ImgArray{Imgs}; namespace { @@ -203,6 +227,25 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutNoEvent) { ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } +#if __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT +TEST_F(FreeFunctionCommandsEventsTests, + LaunchGroupedShortcutMoveKernelNoEvent) { + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); + + TestMoveFunctor::MoveCtorCalls = 0; + TestMoveFunctor MoveOnly; + sycl::khr::launch_grouped(Queue, sycl::range<1>{32}, sycl::range<1>{32}, + std::move(MoveOnly)); + // Move ctor for TestMoveFunctor is called during move construction of + // HostKernel. Copy ctor is called by InstantiateKernelOnHost, can't delete + // it. + ASSERT_EQ(TestMoveFunctor::MoveCtorCalls, 1); + + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); +} +#endif + TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedKernelNoEvent) { mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", &redefined_urEnqueueKernelLaunch);