Skip to content

Commit 8e70e5c

Browse files
Modified the adapters such that it is valid to call release on CB while it is executing
1 parent 4037e92 commit 8e70e5c

File tree

10 files changed

+48
-25
lines changed

10 files changed

+48
-25
lines changed

sycl/source/detail/graph_impl.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -977,11 +977,6 @@ exec_graph_impl::~exec_graph_impl() {
977977
const sycl::detail::AdapterPtr &Adapter =
978978
sycl::detail::getSyclObjImpl(MContext)->getAdapter();
979979
MSchedule.clear();
980-
// We need to wait on all command buffer executions before we can release
981-
// them.
982-
for (auto &Event : MExecutionEvents) {
983-
Event->wait(Event);
984-
}
985980

986981
// Clean up any graph-owned allocations that were allocated
987982
MGraphImpl->getMemPool().deallocateAndUnmapAll();

unified-runtime/scripts/core/EXP-COMMAND-BUFFER.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,3 +563,4 @@ Contributors
563563
* Maxime France-Pillois `[email protected] <[email protected]>`_
564564
565565
566+

unified-runtime/scripts/core/exp-command-buffer.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ returns:
310310
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
311311
--- #--------------------------------------------------------------------------
312312
type: function
313-
desc: "Decrement the command-buffer object's reference count and delete the command-buffer object if the reference count becomes zero."
313+
desc: "Decrement the command-buffer object's reference count and delete the command-buffer object if the reference count becomes zero. It will try synchronizing the command-buffer, hence it is legal to call it while command-buffer is still executing."
314314
class: $xCommandBuffer
315315
name: ReleaseExp
316316
params:

unified-runtime/source/adapters/cuda/command_buffer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,9 @@ urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
388388
UR_APIEXPORT ur_result_t UR_APICALL
389389
urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
390390
if (hCommandBuffer->decrementReferenceCount() == 0) {
391+
if (hCommandBuffer->CurrentExecution) {
392+
UR_CHECK_ERROR(hCommandBuffer->CurrentExecution->wait());
393+
}
391394
// Ref count has reached zero, release of created commands
392395
for (auto &Command : hCommandBuffer->CommandHandles) {
393396
commandHandleDestroy(Command);
@@ -1175,6 +1178,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCommandBufferExp(
11751178
if (phEvent) {
11761179
UR_CHECK_ERROR(RetImplEvent->record());
11771180
*phEvent = RetImplEvent.release();
1181+
hCommandBuffer->CurrentExecution = *phEvent;
11781182
}
11791183
return UR_RESULT_SUCCESS;
11801184
} catch (ur_result_t Err) {

unified-runtime/source/adapters/cuda/command_buffer.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ struct ur_exp_command_buffer_handle_t_ : ur::cuda::handle_base {
192192
// Atomic variable counting the number of reference to this command_buffer
193193
// using std::atomic prevents data race when incrementing/decrementing.
194194
std::atomic_uint32_t RefCount;
195+
// The event of current graph execution.
196+
ur_event_handle_t CurrentExecution = nullptr;
195197

196198
// Ordered map of sync_points to ur_events, so that we can find the last
197199
// node added to an in-order command-buffer.

unified-runtime/source/adapters/hip/command_buffer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,9 @@ urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
273273
UR_APIEXPORT ur_result_t UR_APICALL
274274
urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
275275
if (hCommandBuffer->decrementReferenceCount() == 0) {
276+
if (hCommandBuffer->CurrentExecution) {
277+
UR_CHECK_ERROR(hCommandBuffer->CurrentExecution->wait());
278+
}
276279
delete hCommandBuffer;
277280
}
278281
return UR_RESULT_SUCCESS;
@@ -811,6 +814,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCommandBufferExp(
811814
if (phEvent) {
812815
UR_CHECK_ERROR(RetImplEvent->record());
813816
*phEvent = RetImplEvent.release();
817+
hCommandBuffer->CurrentExecution = *phEvent;
814818
}
815819
} catch (ur_result_t Err) {
816820
return Err;

unified-runtime/source/adapters/hip/command_buffer.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ struct ur_exp_command_buffer_handle_t_ : ur::hip::handle_base {
128128
// Atomic variable counting the number of reference to this command_buffer
129129
// using std::atomic prevents data race when incrementing/decrementing.
130130
std::atomic_uint32_t RefCount;
131+
// The event of current graph execution.
132+
ur_event_handle_t CurrentExecution = nullptr;
131133

132134
// Ordered map of sync_points to ur_events
133135
std::map<ur_exp_command_buffer_sync_point_t, hipGraphNode_t> SyncPoints;

unified-runtime/source/adapters/level_zero/command_buffer.cpp

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,25 @@ ur_result_t createMainCommandList(ur_context_handle_t Context,
671671
return UR_RESULT_SUCCESS;
672672
}
673673

674+
/**
675+
* Waits for any ongoing executions of the command-buffer to finish.
676+
* @param CommandBuffer The command-buffer to wait for.
677+
* @return UR_RESULT_SUCCESS or an error code on failure
678+
*/
679+
ur_result_t
680+
waitForOngoingExecution(ur_exp_command_buffer_handle_t CommandBuffer) {
681+
682+
if (ur_event_handle_t &CurrentSubmissionEvent =
683+
CommandBuffer->CurrentSubmissionEvent) {
684+
ZE2UR_CALL(zeEventHostSynchronize,
685+
(CurrentSubmissionEvent->ZeEvent, UINT64_MAX));
686+
UR_CALL(urEventReleaseInternal(CurrentSubmissionEvent));
687+
CurrentSubmissionEvent = nullptr;
688+
}
689+
690+
return UR_RESULT_SUCCESS;
691+
}
692+
674693
/**
675694
* Checks whether the command-buffer can be constructed using in order
676695
* command-lists.
@@ -830,6 +849,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) {
830849
if (!CommandBuffer->RefCount.decrementAndTest())
831850
return UR_RESULT_SUCCESS;
832851

852+
UR_CALL(waitForOngoingExecution(CommandBuffer));
833853
CommandBuffer->cleanupCommandBufferResources();
834854
delete CommandBuffer;
835855
return UR_RESULT_SUCCESS;
@@ -1442,25 +1462,6 @@ ur_result_t getZeCommandQueue(ur_queue_handle_t Queue, bool UseCopyEngine,
14421462
return UR_RESULT_SUCCESS;
14431463
}
14441464

1445-
/**
1446-
* Waits for any ongoing executions of the command-buffer to finish.
1447-
* @param CommandBuffer The command-buffer to wait for.
1448-
* @return UR_RESULT_SUCCESS or an error code on failure
1449-
*/
1450-
ur_result_t
1451-
waitForOngoingExecution(ur_exp_command_buffer_handle_t CommandBuffer) {
1452-
1453-
if (ur_event_handle_t &CurrentSubmissionEvent =
1454-
CommandBuffer->CurrentSubmissionEvent) {
1455-
ZE2UR_CALL(zeEventHostSynchronize,
1456-
(CurrentSubmissionEvent->ZeEvent, UINT64_MAX));
1457-
UR_CALL(urEventReleaseInternal(CurrentSubmissionEvent));
1458-
CurrentSubmissionEvent = nullptr;
1459-
}
1460-
1461-
return UR_RESULT_SUCCESS;
1462-
}
1463-
14641465
/**
14651466
* Waits for the all the dependencies of the command-buffer
14661467
* @param[in] CommandBuffer The command-buffer.

unified-runtime/source/adapters/opencl/command_buffer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
115115
UR_APIEXPORT ur_result_t UR_APICALL
116116
urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
117117
if (hCommandBuffer->decrementReferenceCount() == 0) {
118+
if (hCommandBuffer->LastSubmission) {
119+
cl_int RetErr = clWaitForEvents(1, &(hCommandBuffer->LastSubmission));
120+
CL_RETURN_ON_FAILURE(RetErr);
121+
}
118122
delete hCommandBuffer;
119123
}
120124

unified-runtime/test/conformance/exp_command_buffer/enqueue.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,13 @@ TEST_P(urEnqueueCommandBufferExpTest, SerializeOutofOrderQueue) {
132132
ASSERT_EQ(reference, Output[i]);
133133
}
134134
}
135+
136+
// Tests releasing command-buffer while it is still executing relying
137+
// on synchronization during urCommandBufferReleaseExp call.
138+
TEST_P(urEnqueueCommandBufferExpTest, EnqueueAndRelease) {
139+
ASSERT_SUCCESS(urEnqueueCommandBufferExp(out_of_order_queue, cmd_buf_handle,
140+
0, nullptr, nullptr));
141+
142+
// Release the command buffer
143+
ASSERT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle));
144+
}

0 commit comments

Comments
 (0)