From 2cc3a1291f4f7c63d592a5a830d7caef0e9762cc Mon Sep 17 00:00:00 2001 From: James Brodman Date: Wed, 9 Apr 2025 16:32:22 -0400 Subject: [PATCH 1/8] Remove the legacy implementation of emulating OoO queues with multiple queues. Signed-off-by: James Brodman --- sycl/source/detail/queue_impl.cpp | 71 ++-------- sycl/source/detail/queue_impl.hpp | 70 ++-------- .../Extensions/CommandGraph/InOrderQueue.cpp | 3 +- sycl/unittests/queue/CMakeLists.txt | 1 - sycl/unittests/queue/EventClear.cpp | 124 ------------------ sycl/unittests/queue/Wait.cpp | 12 -- 6 files changed, 19 insertions(+), 262 deletions(-) delete mode 100644 sycl/unittests/queue/EventClear.cpp diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 6df1e1ea4bf67..0117edc772688 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -60,7 +60,7 @@ template <> uint32_t queue_impl::get_info() const { ur_result_t result = UR_RESULT_SUCCESS; getAdapter()->call( - MQueues[0], UR_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result, + MQueue, UR_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result, nullptr); return result; } @@ -305,56 +305,13 @@ sycl::detail::optional queue_impl::getLastEvent() { void queue_impl::addEvent(const event &Event) { const EventImplPtr &EImpl = getSyclObjImpl(Event); assert(EImpl && "Event implementation is missing"); - auto *Cmd = static_cast(EImpl->getCommand()); - if (!Cmd) { - // if there is no command on the event, we cannot track it with MEventsWeak - // as that will leave it with no owner. Track in MEventsShared only if we're - // unable to call urQueueFinish during wait. - if (MEmulateOOO) - addSharedEvent(Event); - } - // As long as the queue supports urQueueFinish we only need to store events - // for undiscarded, unenqueued commands and host tasks. - else if (MEmulateOOO || - (EImpl->getHandle() == nullptr && !EImpl->isDiscarded())) { + if (EImpl->getHandle() == nullptr && !EImpl->isDiscarded()) { std::weak_ptr EventWeakPtr{EImpl}; std::lock_guard Lock{MMutex}; MEventsWeak.push_back(std::move(EventWeakPtr)); } } -/// addSharedEvent - queue_impl tracks events with weak pointers -/// but some events have no other owner. In this case, -/// addSharedEvent will have the queue track the events via a shared pointer. -void queue_impl::addSharedEvent(const event &Event) { - assert(MEmulateOOO); - std::lock_guard Lock(MMutex); - // Events stored in MEventsShared are not released anywhere else aside from - // calls to queue::wait/wait_and_throw, which a user application might not - // make, and ~queue_impl(). If the number of events grows large enough, - // there's a good chance that most of them are already completed and ownership - // of them can be released. - const size_t EventThreshold = 128; - if (MEventsShared.size() >= EventThreshold) { - // Generally, the vector is ordered so that the oldest events are in the - // front and the newer events are in the end. So, search to find the first - // event that isn't yet complete. All the events prior to that can be - // erased. This could leave some few events further on that have completed - // not yet erased, but that is OK. This cleanup doesn't have to be perfect. - // This also keeps the algorithm linear rather than quadratic because it - // doesn't continually recheck things towards the back of the list that - // really haven't had time to complete. - MEventsShared.erase( - MEventsShared.begin(), - std::find_if( - MEventsShared.begin(), MEventsShared.end(), [](const event &E) { - return E.get_info() != - info::event_command_status::complete; - })); - } - MEventsShared.push_back(Event); -} - event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, const std::shared_ptr &Self, const std::shared_ptr &PrimaryQueue, @@ -490,9 +447,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, : MExtGraphDeps.LastEventPtr; EventToStoreIn = EventImpl; } - // Track only if we won't be able to handle it with urQueueFinish. - if (MEmulateOOO) - addSharedEvent(ResEvent); + return discard_or_return(ResEvent); } } @@ -612,11 +567,9 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { } std::vector> WeakEvents; - std::vector SharedEvents; { std::lock_guard Lock(MMutex); WeakEvents.swap(MEventsWeak); - SharedEvents.swap(MEventsShared); { std::lock_guard RequestLock(MMissedCleanupRequestsMtx); @@ -630,27 +583,19 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { // directly. Otherwise, only wait for unenqueued or host task events, starting // from the latest submitted task in order to minimize total amount of calls, // then handle the rest with urQueueFinish. - const bool SupportsPiFinish = !MEmulateOOO; for (auto EventImplWeakPtrIt = WeakEvents.rbegin(); EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) { if (std::shared_ptr EventImplSharedPtr = EventImplWeakPtrIt->lock()) { // A nullptr UR event indicates that urQueueFinish will not cover it, // either because it's a host task event or an unenqueued one. - if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandle()) { + if (nullptr == EventImplSharedPtr->getHandle()) { EventImplSharedPtr->wait(EventImplSharedPtr); } } } - if (SupportsPiFinish) { - const AdapterPtr &Adapter = getAdapter(); - Adapter->call(getHandleRef()); - assert(SharedEvents.empty() && "Queues that support calling piQueueFinish " - "shouldn't have shared events"); - } else { - for (event &Event : SharedEvents) - Event.wait(); - } + const AdapterPtr &Adapter = getAdapter(); + Adapter->call(getHandleRef()); std::vector StreamsServiceEvents; { @@ -730,7 +675,7 @@ ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const { nullptr, nullptr}; UrNativeDesc.pNativeData = &NativeHandleDesc; - Adapter->call(MQueues[0], &UrNativeDesc, + Adapter->call(MQueue, &UrNativeDesc, &Handle); if (getContextImplPtr()->getBackend() == backend::opencl) __SYCL_OCL_CALL(clRetainCommandQueue, ur::cast(Handle)); @@ -759,7 +704,7 @@ bool queue_impl::ext_oneapi_empty() const { // Check the status of the backend queue if this is not a host queue. ur_bool_t IsReady = false; getAdapter()->call( - MQueues[0], UR_QUEUE_INFO_EMPTY, sizeof(IsReady), &IsReady, nullptr); + MQueue, UR_QUEUE_INFO_EMPTY, sizeof(IsReady), &IsReady, nullptr); if (!IsReady) return false; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 2c5d13a1e3831..826a7fb8e7dd4 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -166,7 +166,7 @@ class queue_impl { } const QueueOrder QOrder = MIsInorder ? QueueOrder::Ordered : QueueOrder::OOO; - MQueues.push_back(createQueue(QOrder)); + MQueue = createQueue(QOrder); // This section is the second part of the instrumentation that uses the // tracepoint information and notifies @@ -191,13 +191,13 @@ class queue_impl { "discard_events and enable_profiling."); } - MQueues.push_back(UrQueue); + MQueue = UrQueue; ur_device_handle_t DeviceUr{}; const AdapterPtr &Adapter = getAdapter(); // TODO catch an exception and put it to list of asynchronous exceptions Adapter->call( - MQueues[0], UR_QUEUE_INFO_DEVICE, sizeof(DeviceUr), &DeviceUr, nullptr); + MQueue, UR_QUEUE_INFO_DEVICE, sizeof(DeviceUr), &DeviceUr, nullptr); MDevice = MContext->findMatchingDeviceImpl(DeviceUr); if (MDevice == nullptr) { throw sycl::exception( @@ -264,7 +264,7 @@ class queue_impl { destructorNotification(); #endif throw_asynchronous(); - getAdapter()->call(MQueues[0]); + getAdapter()->call(MQueue); } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~queue_impl", e); } @@ -274,7 +274,7 @@ class queue_impl { cl_command_queue get() { ur_native_handle_t nativeHandle = 0; - getAdapter()->call(MQueues[0], nullptr, + getAdapter()->call(MQueue, nullptr, &nativeHandle); __SYCL_OCL_CALL(clRetainCommandQueue, ur::cast(nativeHandle)); return ur::cast(nativeHandle); @@ -322,9 +322,7 @@ class queue_impl { "flush cannot be called for a queue which is " "recording to a command graph."); } - for (const auto &queue : MQueues) { - getAdapter()->call(queue); - } + getAdapter()->call(MQueue); } /// Submits a command group function object to the queue, in order to be @@ -542,59 +540,15 @@ class queue_impl { } ur_result_t Error = Adapter->call_nocheck( Context, Device, &Properties, &Queue); - - // If creating out-of-order queue failed and this property is not - // supported (for example, on FPGA), it will return - // UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES and will try to create in-order - // queue. - if (!MEmulateOOO && Error == UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES) { - MEmulateOOO = true; - Queue = createQueue(QueueOrder::Ordered); - } else { - Adapter->checkUrResult(Error); - } + Adapter->checkUrResult(Error); return Queue; } - /// \return a raw UR handle for a free queue. The returned handle is not - /// retained. It is caller responsibility to make sure queue is still alive. - ur_queue_handle_t &getExclusiveUrQueueHandleRef() { - ur_queue_handle_t *PIQ = nullptr; - bool ReuseQueue = false; - { - std::lock_guard Lock(MMutex); - - // To achieve parallelism for FPGA with in order execution model with - // possibility of two kernels to share data with each other we shall - // create a queue for every kernel enqueued. - if (MQueues.size() < MaxNumQueues) { - MQueues.push_back({}); - PIQ = &MQueues.back(); - } else { - // If the limit of OpenCL queues is going to be exceeded - take the - // earliest used queue, wait until it finished and then reuse it. - PIQ = &MQueues[MNextQueueIdx]; - MNextQueueIdx = (MNextQueueIdx + 1) % MaxNumQueues; - ReuseQueue = true; - } - } - - if (!ReuseQueue) - *PIQ = createQueue(QueueOrder::Ordered); - else - getAdapter()->call(*PIQ); - - return *PIQ; - } - /// \return a raw UR queue handle. The returned handle is not retained. It /// is caller responsibility to make sure queue is still alive. ur_queue_handle_t &getHandleRef() { - if (!MEmulateOOO) - return MQueues[0]; - - return getExclusiveUrQueueHandleRef(); + return MQueue; } /// \return true if the queue was constructed with property specified by @@ -1000,13 +954,7 @@ class queue_impl { const property_list MPropList; /// List of queues created for FPGA device from a single SYCL queue. - std::vector MQueues; - /// Iterator through MQueues. - size_t MNextQueueIdx = 0; - - /// Indicates that a native out-of-order queue could not be created and we - /// need to emulate it with multiple native in-order queues. - bool MEmulateOOO = false; + ur_queue_handle_t MQueue; // Access should be guarded with MMutex struct DependencyTrackingItems { diff --git a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp index f65018d63fb35..d2e6763b2a0ee 100644 --- a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp +++ b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp @@ -271,6 +271,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { // Record in-order queue with three nodes. InOrderGraph.begin_recording(InOrderQueue); + #if 1 auto Node1Graph = InOrderQueue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); @@ -305,7 +306,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); - +#endif InOrderGraph.end_recording(InOrderQueue); auto EventLast = InOrderQueue.submit( diff --git a/sycl/unittests/queue/CMakeLists.txt b/sycl/unittests/queue/CMakeLists.txt index 844e2509337fe..1045a1865cb29 100644 --- a/sycl/unittests/queue/CMakeLists.txt +++ b/sycl/unittests/queue/CMakeLists.txt @@ -1,6 +1,5 @@ add_sycl_unittest(QueueTests OBJECT DeviceCheck.cpp - EventClear.cpp Hash.cpp USM.cpp Wait.cpp diff --git a/sycl/unittests/queue/EventClear.cpp b/sycl/unittests/queue/EventClear.cpp deleted file mode 100644 index 2000235b1f15d..0000000000000 --- a/sycl/unittests/queue/EventClear.cpp +++ /dev/null @@ -1,124 +0,0 @@ -//==------------------ EventClear.cpp --- queue unit tests -----------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include - -using namespace sycl; - -struct TestCtx { - TestCtx(context &Ctx) : Ctx{Ctx} {}; - - context &Ctx; - int NEventsWaitedFor = 0; - int EventReferenceCount = 0; -}; - -std::unique_ptr TestContext; - -const int ExpectedEventThreshold = 128; - -ur_result_t redefinedQueueCreate(void *pParams) { - auto params = *static_cast(pParams); - assert(*params.ppProperties); - // Use in-order queues to force storing events for calling wait on them, - // rather than calling urQueueFinish. - if ((*params.ppProperties)->flags & - UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { - return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES; - } - return UR_RESULT_SUCCESS; -} - -ur_result_t redefinedEventsWait(void *) { - ++TestContext->NEventsWaitedFor; - return UR_RESULT_SUCCESS; -} - -ur_result_t redefinedEventGetInfoAfter(void *pParams) { - auto params = *static_cast(pParams); - EXPECT_EQ(*params.ppropName, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) - << "Unexpected event info requested"; - // Report first half of events as complete. - // Report second half of events as running. - // This is important, because removal algorithm assumes that - // events are likely to be removed oldest first, and stops removing - // at the first non-completed event. - static int Counter = 0; - auto *Result = reinterpret_cast(*params.ppPropValue); - *Result = (Counter < (ExpectedEventThreshold / 2)) ? UR_EVENT_STATUS_COMPLETE - : UR_EVENT_STATUS_RUNNING; - Counter++; - return UR_RESULT_SUCCESS; -} - -ur_result_t redefinedEventRetain(void *) { - ++TestContext->EventReferenceCount; - return UR_RESULT_SUCCESS; -} - -ur_result_t redefinedEventRelease(void *) { - --TestContext->EventReferenceCount; - return UR_RESULT_SUCCESS; -} - -void prepareUrMock(unittest::UrMock<> &Mock) { - mock::getCallbacks().set_before_callback("urQueueCreate", - &redefinedQueueCreate); - mock::getCallbacks().set_before_callback("urEventWait", &redefinedEventsWait); - mock::getCallbacks().set_after_callback("urEventGetInfo", - &redefinedEventGetInfoAfter); - mock::getCallbacks().set_before_callback("urEventRetain", - &redefinedEventRetain); - mock::getCallbacks().set_before_callback("urEventRelease", - &redefinedEventRelease); -} - -// Check that the USM events are cleared from the queue upon call to wait(), -// so that they are not waited for multiple times. -TEST(QueueEventClear, ClearOnQueueWait) { - sycl::unittest::UrMock<> Mock; - sycl::platform Plt = sycl::platform(); - prepareUrMock(Mock); - - context Ctx{Plt.get_devices()[0]}; - TestContext.reset(new TestCtx(Ctx)); - queue Q{Ctx, default_selector()}; - - unsigned char *HostAlloc = (unsigned char *)malloc_host(1, Ctx); - TestContext->EventReferenceCount = 1; - Q.memset(HostAlloc, 42, 1); - Q.wait(); - ASSERT_EQ(TestContext->NEventsWaitedFor, 1); - ASSERT_EQ(TestContext->EventReferenceCount, 0); - Q.wait(); - ASSERT_EQ(TestContext->NEventsWaitedFor, 1); -} - -// Check that shared events are cleaned up from the queue once their number -// exceeds a threshold. -TEST(QueueEventClear, CleanupOnThreshold) { - sycl::unittest::UrMock<> Mock; - sycl::platform Plt = sycl::platform(); - prepareUrMock(Mock); - - context Ctx{Plt.get_devices()[0]}; - TestContext.reset(new TestCtx(Ctx)); - queue Q{Ctx, default_selector()}; - - unsigned char *HostAlloc = (unsigned char *)malloc_host(1, Ctx); - TestContext->EventReferenceCount = ExpectedEventThreshold; - for (size_t I = 0; I < ExpectedEventThreshold; ++I) { - Q.memset(HostAlloc, 42, 1).wait(); - } - // Half of the events (those reported as completed) should be released. - Q.memset(HostAlloc, 42, 1); - ASSERT_EQ(TestContext->EventReferenceCount, ExpectedEventThreshold / 2); -} diff --git a/sycl/unittests/queue/Wait.cpp b/sycl/unittests/queue/Wait.cpp index 023f07a0a5284..95e32a2ff8a43 100644 --- a/sycl/unittests/queue/Wait.cpp +++ b/sycl/unittests/queue/Wait.cpp @@ -138,18 +138,6 @@ TEST(QueueWait, QueueWaitTest) { ASSERT_EQ(TestContext.NEventsWaitedFor, 1); ASSERT_TRUE(TestContext.UrQueueFinishCalled); } - - // Test behaviour for emulating an OOO queue with multiple in-order ones. - TestContext = {}; - TestContext.SupportOOO = false; - Q = queue{Ctx, default_selector()}; - Q.memset(HostAlloc, 42, 1); - // The event is kept alive in this case to call wait. - ASSERT_EQ(TestContext.EventReferenceCount, 1); - Q.wait(); - ASSERT_EQ(TestContext.EventReferenceCount, 0); - ASSERT_EQ(TestContext.NEventsWaitedFor, 1); - ASSERT_FALSE(TestContext.UrQueueFinishCalled); } } // namespace From 27f025d4b323aa456bdadd5a67130737831eb20b Mon Sep 17 00:00:00 2001 From: James Brodman Date: Wed, 9 Apr 2025 16:55:29 -0400 Subject: [PATCH 2/8] Nuke some more vestigial bits Signed-off-by: James Brodman --- sycl/source/detail/queue_impl.cpp | 8 +------- sycl/source/detail/queue_impl.hpp | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 0117edc772688..32d11ca2425fb 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -555,10 +555,9 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { // Additionally, we can clean up the event lists that we would have // otherwise cleared. - if (!MEventsWeak.empty() || !MEventsShared.empty()) { + if (!MEventsWeak.empty()) { std::lock_guard Lock(MMutex); MEventsWeak.clear(); - MEventsShared.clear(); } if (!MStreamsServiceEvents.empty()) { std::lock_guard Lock(MStreamsServiceEventsMutex); @@ -711,11 +710,6 @@ bool queue_impl::ext_oneapi_empty() const { // We may have events like host tasks which are not submitted to the backend // queue so we need to get their status separately. std::lock_guard Lock(MMutex); - for (event Event : MEventsShared) - if (Event.get_info() != - info::event_command_status::complete) - return false; - for (auto EventImplWeakPtrIt = MEventsWeak.begin(); EventImplWeakPtrIt != MEventsWeak.end(); ++EventImplWeakPtrIt) if (std::shared_ptr EventImplSharedPtr = diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 826a7fb8e7dd4..acae2ecf94217 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -948,7 +948,6 @@ class queue_impl { /// Events without data dependencies (such as USM) need an owner, /// additionally, USM operations are not added to the scheduler command graph, /// queue is the only owner on the runtime side. - std::vector MEventsShared; exception_list MExceptions; const async_handler MAsyncHandler; const property_list MPropList; From 437fa5486b6f5b69c77449cb8d90b520ab283efa Mon Sep 17 00:00:00 2001 From: James Brodman Date: Wed, 9 Apr 2025 17:14:30 -0400 Subject: [PATCH 3/8] Format Signed-off-by: James Brodman --- sycl/source/detail/queue_impl.cpp | 3 +-- sycl/source/detail/queue_impl.hpp | 4 +--- sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 32d11ca2425fb..48098f165ff30 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -60,8 +60,7 @@ template <> uint32_t queue_impl::get_info() const { ur_result_t result = UR_RESULT_SUCCESS; getAdapter()->call( - MQueue, UR_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result, - nullptr); + MQueue, UR_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result, nullptr); return result; } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index acae2ecf94217..83efde5dac411 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -547,9 +547,7 @@ class queue_impl { /// \return a raw UR queue handle. The returned handle is not retained. It /// is caller responsibility to make sure queue is still alive. - ur_queue_handle_t &getHandleRef() { - return MQueue; - } + ur_queue_handle_t &getHandleRef() { return MQueue; } /// \return true if the queue was constructed with property specified by /// PropertyT. diff --git a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp index d2e6763b2a0ee..56854d656fe9e 100644 --- a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp +++ b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp @@ -271,7 +271,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { // Record in-order queue with three nodes. InOrderGraph.begin_recording(InOrderQueue); - #if 1 +#if 1 auto Node1Graph = InOrderQueue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); From f97b10eda051f63d4676f71c08cf3a0fa4bcab81 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Thu, 10 Apr 2025 11:52:08 -0400 Subject: [PATCH 4/8] Update event wait check to skip Graph events Signed-off-by: James Brodman --- sycl/source/detail/queue_impl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 48098f165ff30..6df412e3e6202 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -587,7 +587,8 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { EventImplWeakPtrIt->lock()) { // A nullptr UR event indicates that urQueueFinish will not cover it, // either because it's a host task event or an unenqueued one. - if (nullptr == EventImplSharedPtr->getHandle()) { + if (!EventImplSharedPtr->hasCommandGraph() && + nullptr == EventImplSharedPtr->getHandle()) { EventImplSharedPtr->wait(EventImplSharedPtr); } } From 53ecd5c12bbe3ffdd7bb5400d52edaab059873e1 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Thu, 10 Apr 2025 14:27:09 -0400 Subject: [PATCH 5/8] Add back a missing if Signed-off-by: James Brodman --- sycl/source/detail/queue_impl.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index aab20673de2bf..dae4192c4372e 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -301,7 +301,9 @@ sycl::detail::optional queue_impl::getLastEvent() { void queue_impl::addEvent(const event &Event) { const EventImplPtr &EImpl = getSyclObjImpl(Event); assert(EImpl && "Event implementation is missing"); - if (EImpl->getHandle() == nullptr && !EImpl->isDiscarded()) { + auto *Cmd = static_cast(EImpl->getCommand()); + if (Cmd != nullptr && EImpl->getHandle() == nullptr && + !EImpl->isDiscarded()) { std::weak_ptr EventWeakPtr{EImpl}; std::lock_guard Lock{MMutex}; MEventsWeak.push_back(std::move(EventWeakPtr)); From 220bcc26e7b1e6a1c48524df0282a4b120f7d2ee Mon Sep 17 00:00:00 2001 From: James Brodman Date: Fri, 11 Apr 2025 10:43:19 -0400 Subject: [PATCH 6/8] Remove Graph workaround as unnecessary. Signed-off-by: James Brodman --- sycl/source/detail/queue_impl.cpp | 3 +-- sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index dae4192c4372e..58f4b0631cdb8 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -586,8 +586,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { EventImplWeakPtrIt->lock()) { // A nullptr UR event indicates that urQueueFinish will not cover it, // either because it's a host task event or an unenqueued one. - if (!EventImplSharedPtr->hasCommandGraph() && - nullptr == EventImplSharedPtr->getHandle()) { + if (nullptr == EventImplSharedPtr->getHandle()) { EventImplSharedPtr->wait(EventImplSharedPtr); } } diff --git a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp index 56854d656fe9e..93699e49c3d34 100644 --- a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp +++ b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp @@ -271,7 +271,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { // Record in-order queue with three nodes. InOrderGraph.begin_recording(InOrderQueue); -#if 1 + auto Node1Graph = InOrderQueue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); @@ -306,7 +306,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); -#endif + InOrderGraph.end_recording(InOrderQueue); auto EventLast = InOrderQueue.submit( From 2afdfb1d1c7445359c6ff22412cb986a70b7242b Mon Sep 17 00:00:00 2001 From: James Brodman Date: Fri, 11 Apr 2025 10:44:32 -0400 Subject: [PATCH 7/8] Old Formatting Signed-off-by: James Brodman --- sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp index 93699e49c3d34..f65018d63fb35 100644 --- a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp +++ b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp @@ -271,7 +271,6 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { // Record in-order queue with three nodes. InOrderGraph.begin_recording(InOrderQueue); - auto Node1Graph = InOrderQueue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); From f54f8692ae13e158e62db79c9f63ecab86122668 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Mon, 14 Apr 2025 11:37:51 -0400 Subject: [PATCH 8/8] Simplify UR call Signed-off-by: James Brodman --- sycl/source/detail/queue_impl.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 2fa896cda203a..81e5fe3ec9526 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -538,9 +538,8 @@ class queue_impl { .get_index(); Properties.pNext = &IndexProperties; } - ur_result_t Error = Adapter->call_nocheck( - Context, Device, &Properties, &Queue); - Adapter->checkUrResult(Error); + Adapter->call(Context, Device, &Properties, + &Queue); return Queue; }