diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index ea8964bb727de..e22a7385e22b9 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -215,7 +215,7 @@ void event_impl::setQueue(queue_impl &Queue) { void event_impl::setSubmittedQueue(std::weak_ptr SubmittedQueue) { MSubmittedQueue = std::move(SubmittedQueue); if (MHostProfilingInfo) { - if (auto QueuePtr = MSubmittedQueue.lock()) { + if (std::shared_ptr QueuePtr = MSubmittedQueue.lock()) { device_impl &Device = QueuePtr->getDeviceImpl(); MHostProfilingInfo->setDevice(&Device); } @@ -248,7 +248,7 @@ void *event_impl::instrumentationProlog(std::string &Name, int32_t StreamID, // queue is available with the wait events. We check to see if the // TraceEvent is available in the Queue object. void *TraceEvent = nullptr; - if (QueueImplPtr Queue = MQueue.lock()) { + if (std::shared_ptr Queue = MQueue.lock()) { TraceEvent = Queue->getTraceEvent(); WaitEvent = (TraceEvent ? static_cast(TraceEvent) : GSYCLGraphEvent); @@ -317,7 +317,7 @@ void event_impl::wait_and_throw( std::shared_ptr Self) { wait(Self); - if (QueueImplPtr SubmittedQueue = MSubmittedQueue.lock()) + if (std::shared_ptr SubmittedQueue = MSubmittedQueue.lock()) SubmittedQueue->throw_asynchronous(); } @@ -462,7 +462,7 @@ event_impl::get_backend_info() const { "the info::platform::version info descriptor can " "only be queried with an OpenCL backend"); } - if (QueueImplPtr Queue = MQueue.lock()) { + if (std::shared_ptr Queue = MQueue.lock()) { return Queue->getDeviceImpl() .get_platform() .get_info(); @@ -485,7 +485,7 @@ event_impl::get_backend_info() const { "the info::device::version info descriptor can only " "be queried with an OpenCL backend"); } - if (QueueImplPtr Queue = MQueue.lock()) { + if (std::shared_ptr Queue = MQueue.lock()) { return Queue->getDeviceImpl().get_info(); } return ""; // If the queue has been released, no device will be associated so @@ -552,21 +552,21 @@ std::vector event_impl::getWaitList() { return Result; } -void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) { +void event_impl::flushIfNeeded(queue_impl *UserQueue) { // Some events might not have a native handle underneath even at this point, // e.g. those produced by memset with 0 size (no UR call is made). auto Handle = this->getHandle(); if (MIsFlushed || !Handle) return; - QueueImplPtr Queue = MQueue.lock(); + std::shared_ptr Queue = MQueue.lock(); // If the queue has been released, all of the commands have already been // implicitly flushed by urQueueRelease. if (!Queue) { MIsFlushed = true; return; } - if (Queue == UserQueue) + if (Queue.get() == UserQueue) return; // Check if the task for this event has already been submitted. @@ -604,9 +604,9 @@ void event_impl::setSubmissionTime() { if (!MIsProfilingEnabled && !MProfilingTagEvent) return; - std::weak_ptr Queue = isHost() ? MSubmittedQueue : MQueue; - if (QueueImplPtr QueuePtr = Queue.lock()) { - device_impl &Device = QueuePtr->getDeviceImpl(); + if (std::shared_ptr Queue = + isHost() ? MSubmittedQueue.lock() : MQueue.lock()) { + device_impl &Device = Queue->getDeviceImpl(); MSubmitTime = getTimestamp(&Device); } } diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index ca36300394d04..245e218ff4112 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -31,7 +31,6 @@ class Adapter; class context_impl; using ContextImplPtr = std::shared_ptr; class queue_impl; -using QueueImplPtr = std::shared_ptr; class event_impl; using EventImplPtr = std::shared_ptr; @@ -242,7 +241,7 @@ class event_impl : public std::enable_shared_from_this { /// Performs a flush on the queue associated with this event if the user queue /// is different and the task associated with this event hasn't been submitted /// to the device yet. - void flushIfNeeded(const QueueImplPtr &UserQueue); + void flushIfNeeded(queue_impl *UserQueue); /// Cleans dependencies of this event_impl. void cleanupDependencyEvents(); @@ -262,7 +261,9 @@ class event_impl : public std::enable_shared_from_this { /// /// @return shared_ptr to MWorkerQueue, please be aware it can be empty /// pointer - QueueImplPtr getWorkerQueue() { return MWorkerQueue.lock(); }; + std::shared_ptr getWorkerQueue() { + return MWorkerQueue.lock(); + }; /// Sets worker queue for command. /// @@ -289,7 +290,9 @@ class event_impl : public std::enable_shared_from_this { /// @return Submission time for command associated with this event uint64_t getSubmissionTime(); - QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; + std::shared_ptr getSubmittedQueue() const { + return MSubmittedQueue.lock(); + }; /// Checks if this event is complete. /// diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 946ad402de46a..94b98877d6399 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -323,13 +323,6 @@ bool Command::isFusable() const { } #endif // __INTEL_PREVIEW_BREAKING_CHANGES -static void flushCrossQueueDeps(const std::vector &EventImpls, - const QueueImplPtr &Queue) { - for (auto &EventImpl : EventImpls) { - EventImpl->flushIfNeeded(Queue); - } -} - namespace { struct EnqueueNativeCommandData { @@ -553,7 +546,7 @@ void Command::waitForEvents(QueueImplPtr Queue, } } else { std::vector RawEvents = getUrEvents(EventImpls); - flushCrossQueueDeps(EventImpls, MWorkerQueue); + flushCrossQueueDeps(EventImpls); const AdapterPtr &Adapter = Queue->getAdapter(); Adapter->call( @@ -1397,7 +1390,7 @@ ur_result_t MapMemObject::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getUrEvents(EventImpls); - flushCrossQueueDeps(EventImpls, MWorkerQueue); + flushCrossQueueDeps(EventImpls); ur_event_handle_t UREvent = nullptr; if (auto Result = callMemOpHelperRet( @@ -1480,7 +1473,7 @@ ur_result_t UnMapMemObject::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getUrEvents(EventImpls); - flushCrossQueueDeps(EventImpls, MWorkerQueue); + flushCrossQueueDeps(EventImpls); ur_event_handle_t UREvent = nullptr; if (auto Result = @@ -1590,7 +1583,7 @@ ur_result_t MemCpyCommand::enqueueImp() { ur_event_handle_t UREvent = nullptr; auto RawEvents = getUrEvents(EventImpls); - flushCrossQueueDeps(EventImpls, MWorkerQueue); + flushCrossQueueDeps(EventImpls); if (auto Result = callMemOpHelper( MemoryManager::copy, MSrcAllocaCmd->getSYCLMemObj(), @@ -1751,7 +1744,7 @@ ur_result_t MemCpyCommandHost::enqueueImp() { return UR_RESULT_SUCCESS; } - flushCrossQueueDeps(EventImpls, MWorkerQueue); + flushCrossQueueDeps(EventImpls); if (auto Result = callMemOpHelper( MemoryManager::copy, MSrcAllocaCmd->getSYCLMemObj(), @@ -2850,7 +2843,7 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { // submissions of the command buffer itself will not receive dependencies on // them, e.g. initial copies from host to device std::vector EventImpls = MPreparedDepsEvents; - flushCrossQueueDeps(EventImpls, MWorkerQueue); + flushCrossQueueDeps(EventImpls); std::vector RawEvents = getUrEvents(EventImpls); if (!RawEvents.empty()) { MQueue->getAdapter()->call(RawEvents.size(), @@ -3130,7 +3123,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; auto RawEvents = getUrEvents(EventImpls); - flushCrossQueueDeps(EventImpls, MWorkerQueue); + flushCrossQueueDeps(EventImpls); // We can omit creating a UR event and create a "discarded" event if the // command has been explicitly marked as not needing an event, e.g. if the diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index b9bc793f7a2a5..c21c8bf240255 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -272,6 +272,12 @@ class Command { void waitForPreparedHostEvents() const; + void flushCrossQueueDeps(const std::vector &EventImpls) { + for (auto &EventImpl : EventImpls) { + EventImpl->flushIfNeeded(MWorkerQueue.get()); + } + } + /// Perform glueing of events from different contexts /// \param DepEvent event this commands should depend on /// \param Dep optional DepDesc to perform connection of events properly