Skip to content

[SYCL] optimize graph recording on in-order queue #18637

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 14 additions & 18 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,25 +349,22 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,

HandlerImpl->MEventMode = SubmitInfo.EventMode();

auto isInOrderGraphRecordOperation = !MGraph.expired() && isInOrder();
auto isHostTask = Type == CGType::CodeplayHostTask;

// TODO: this shouldn't be needed but without this
// the legacy adapter doesn't synchronize the operations properly
// when non-immediate command lists are used.
auto isGraphSubmission = Type == CGType::ExecCommandBuffer;

auto requiresPostProcess = SubmitInfo.PostProcessorFunc() || Streams.size();
auto noLastEventPath = !isHostTask && !isGraphSubmission &&
MNoEventMode.load(std::memory_order_relaxed) &&
!requiresPostProcess;
auto noLastEventPath =
isInOrderGraphRecordOperation ||
(!isHostTask && MNoEventMode.load(std::memory_order_relaxed) &&
!requiresPostProcess);

if (noLastEventPath) {
std::unique_lock<std::mutex> Lock(MMutex);

// Check if we are still in no event mode. There could
// have been a concurrent submit.
if (MNoEventMode.load(std::memory_order_relaxed)) {
return finalizeHandlerInOrderNoEventsUnlocked(Handler);
if (isInOrderGraphRecordOperation ||
MNoEventMode.load(std::memory_order_relaxed)) {
return finalizeHandlerNoEventsUnlocked(Handler);
}
}

Expand All @@ -382,8 +379,8 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
} else {
std::unique_lock<std::mutex> Lock(MMutex);

if (!isGraphSubmission && trySwitchingToNoEventsMode()) {
Event = finalizeHandlerInOrderNoEventsUnlocked(Handler);
if (trySwitchingToNoEventsMode()) {
Event = finalizeHandlerNoEventsUnlocked(Handler);
} else {
Event = finalizeHandlerInOrderWithDepsUnlocked(Handler);
}
Expand Down Expand Up @@ -502,9 +499,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,

if (isInOrder() &&
(!isNoEventsMode || MContext->getBackend() == backend::opencl)) {
auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
: MExtGraphDeps.LastEventPtr;
EventToStoreIn = EventImpl;
MDefaultGraphDeps.LastEventPtr = EventImpl;
}

return ResEvent;
Expand Down Expand Up @@ -760,11 +755,12 @@ bool queue_impl::queue_empty() const {
if (MEmpty)
return true;

if (MDefaultGraphDeps.LastEventPtr &&
!MDefaultGraphDeps.LastEventPtr->isDiscarded())
if (MDefaultGraphDeps.LastEventPtr) {
assert(!MDefaultGraphDeps.LastEventPtr->isDiscarded());
return MDefaultGraphDeps.LastEventPtr
->get_info<info::event::command_execution_status>() ==
info::event_command_status::complete;
}
}

// Check the status of the backend queue if this is not a host queue.
Expand Down
61 changes: 21 additions & 40 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,12 +598,6 @@ class queue_impl {
std::lock_guard<std::mutex> Lock(MMutex);
MGraph = Graph;
MExtGraphDeps.reset();

if (Graph) {
MNoEventMode = false;
} else {
trySwitchingToNoEventsMode();
}
}

std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
Expand Down Expand Up @@ -705,10 +699,11 @@ class queue_impl {
if (MNoEventMode.load(std::memory_order_relaxed))
return true;

if (!MGraph.expired() || !isInOrder())
if (!isInOrder())
return false;

if (MDefaultGraphDeps.LastEventPtr != nullptr &&
// Graphs track events internally
if (MGraph.expired() && MDefaultGraphDeps.LastEventPtr != nullptr &&
!Scheduler::CheckEventReadiness(MContext,
MDefaultGraphDeps.LastEventPtr))
return false;
Expand All @@ -719,12 +714,10 @@ class queue_impl {
}

template <typename HandlerType = handler>
event finalizeHandlerInOrderNoEventsUnlocked(HandlerType &Handler) {
assert(isInOrder());
assert(MGraph.expired());
event finalizeHandlerNoEventsUnlocked(HandlerType &Handler) {
assert(MDefaultGraphDeps.LastEventPtr == nullptr ||
MContext->getBackend() == backend::opencl);
assert(MNoEventMode);
MContext->getBackend() == backend::opencl || !MGraph.expired());
assert(MNoEventMode || !MGraph.expired());

MEmpty = false;

Expand All @@ -745,18 +738,17 @@ class queue_impl {
template <typename HandlerType = handler>
event finalizeHandlerInOrderHostTaskUnlocked(HandlerType &Handler) {
assert(isInOrder());
assert(MGraph.expired());
assert(Handler.getType() == CGType::CodeplayHostTask);

auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
: MExtGraphDeps.LastEventPtr;

if (EventToBuildDeps && Handler.getType() != CGType::AsyncAlloc) {
if (MDefaultGraphDeps.LastEventPtr &&
Handler.getType() != CGType::AsyncAlloc) {
// We are not in no-event mode, so we can use the last event.
// depends_on after an async alloc is explicitly disallowed. Async alloc
// handles in order queue dependencies preemptively, so we skip them.
// Note: This could be improved by moving the handling of dependencies
// to before calling the CGF.
Handler.depends_on(EventToBuildDeps);
Handler.depends_on(MDefaultGraphDeps.LastEventPtr);
} else if (MNoEventMode) {
// There might be some operations submitted to the queue
// but the LastEventPtr is not set. If we are to run a host_task,
Expand All @@ -770,34 +762,28 @@ class queue_impl {
synchronizeWithExternalEvent(Handler);

auto Event = Handler.finalize();
EventToBuildDeps = getSyclObjImpl(Event);
assert(!EventToBuildDeps->isDiscarded());
MDefaultGraphDeps.LastEventPtr = getSyclObjImpl(Event);
assert(!MDefaultGraphDeps.LastEventPtr->isDiscarded());
return Event;
}

template <typename HandlerType = handler>
event finalizeHandlerInOrderWithDepsUnlocked(HandlerType &Handler) {
assert(isInOrder());
assert(MGraph.expired());

// this is handled by finalizeHandlerInOrderHostTask
assert(Handler.getType() != CGType::CodeplayHostTask);

if (Handler.getType() == CGType::ExecCommandBuffer && MNoEventMode) {
// TODO: this shouldn't be needed but without this
// the legacy adapter doesn't synchronize the operations properly
// when non-immediate command lists are used.
Handler.depends_on(insertHelperBarrier(Handler));
}

auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
: MExtGraphDeps.LastEventPtr;

// depends_on after an async alloc is explicitly disallowed. Async alloc
// handles in order queue dependencies preemptively, so we skip them.
// Note: This could be improved by moving the handling of dependencies
// to before calling the CGF.
if (EventToBuildDeps && Handler.getType() != CGType::AsyncAlloc) {
if (MDefaultGraphDeps.LastEventPtr &&
Handler.getType() != CGType::AsyncAlloc) {
// If we have last event, this means we are no longer in no-event mode.
assert(!MNoEventMode);
Handler.depends_on(EventToBuildDeps);
Handler.depends_on(MDefaultGraphDeps.LastEventPtr);
}

MEmpty = false;
Expand All @@ -807,15 +793,10 @@ class queue_impl {
auto EventRet = Handler.finalize();

if (getSyclObjImpl(EventRet)->isDiscarded()) {
EventToBuildDeps = nullptr;
MDefaultGraphDeps.LastEventPtr = nullptr;
} else {
MNoEventMode = false;
EventToBuildDeps = getSyclObjImpl(EventRet);

// TODO: if the event is NOP we should be able to discard it as well.
// However, NOP events are used to describe ordering for graph operations
// Once https://github.com/intel/llvm/issues/18330 is fixed, we can
// start relying on command buffer in-order property instead.
assert(!getSyclObjImpl(EventRet)->isNOP());
MDefaultGraphDeps.LastEventPtr = getSyclObjImpl(EventRet);
}

return EventRet;
Expand Down
2 changes: 1 addition & 1 deletion sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,11 +313,11 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) {

auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast);
auto WaitList = EventLastImpl->getWaitList();
Lock.unlock();
// Previous task is a host task. Explicit dependency is needed to enforce the
// execution order.
ASSERT_EQ(WaitList.size(), 1lu);
ASSERT_EQ(WaitList[0], EventInitialImpl);
Lock.unlock();
InOrderQueue.wait();
}

Expand Down
Loading