Skip to content

Commit d937ec7

Browse files
committed
Revert "[L0] Refactor Copy Engine Usage checks for Performance"
This reverts commit 781e576.
1 parent e3f7e33 commit d937ec7

File tree

3 files changed

+42
-31
lines changed

3 files changed

+42
-31
lines changed

unified-runtime/source/adapters/level_zero/command_buffer.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,10 +1097,19 @@ ur_result_t urCommandBufferAppendUSMMemcpyExp(
10971097
ur_event_handle_t * /*Event*/,
10981098
ur_exp_command_buffer_command_handle_t * /*Command*/) {
10991099

1100+
bool PreferCopyEngine = !IsDevicePointer(CommandBuffer->Context, Src) ||
1101+
!IsDevicePointer(CommandBuffer->Context, Dst);
1102+
// For better performance, Copy Engines are not preferred given Shared
1103+
// pointers on DG2.
1104+
if (CommandBuffer->Device->isDG2() &&
1105+
(IsSharedPointer(CommandBuffer->Context, Src) ||
1106+
IsSharedPointer(CommandBuffer->Context, Dst))) {
1107+
PreferCopyEngine = false;
1108+
}
1109+
PreferCopyEngine |= UseCopyEngineForD2DCopy;
1110+
11001111
return enqueueCommandBufferMemCopyHelper(
1101-
UR_COMMAND_USM_MEMCPY, CommandBuffer, Dst, Src, Size,
1102-
PreferCopyEngineUsage(CommandBuffer->Device, CommandBuffer->Context, Src,
1103-
Dst),
1112+
UR_COMMAND_USM_MEMCPY, CommandBuffer, Dst, Src, Size, PreferCopyEngine,
11041113
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
11051114
}
11061115

unified-runtime/source/adapters/level_zero/memory.cpp

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -57,27 +57,6 @@ bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr) {
5757
return (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_SHARED);
5858
}
5959

60-
// Helper Function to check if the Copy Engine should be preferred given the
61-
// types of memory used.
62-
bool PreferCopyEngineUsage(ur_device_handle_t Device,
63-
ur_context_handle_t Context, const void *Src,
64-
void *Dst) {
65-
bool PreferCopyEngine = false;
66-
// Given Integrated Devices, Copy Engines are not preferred for any Copy
67-
// operations.
68-
if (!Device->isIntegrated()) {
69-
// Given non D2D Copies, for better performance, Copy Engines are preferred
70-
// only if one has both the Main and Link Copy Engines.
71-
if (Device->hasLinkCopyEngine() && Device->hasMainCopyEngine() &&
72-
(!IsDevicePointer(Context, Src) || !IsDevicePointer(Context, Dst))) {
73-
PreferCopyEngine = true;
74-
}
75-
}
76-
// Temporary option added to use force engine for D2D copy
77-
PreferCopyEngine |= UseCopyEngineForD2DCopy;
78-
return PreferCopyEngine;
79-
}
80-
8160
// Shared by all memory read/write/copy PI interfaces.
8261
// PI interfaces must have queue's and destination buffer's mutexes locked for
8362
// exclusive use and source buffer's mutex locked for shared use on entry.
@@ -1259,10 +1238,23 @@ ur_result_t urEnqueueUSMMemcpy(
12591238
ur_event_handle_t *OutEvent) {
12601239
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
12611240

1241+
// Device to Device copies are found to execute slower on copy engine
1242+
// (versus compute engine).
1243+
bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Src) ||
1244+
!IsDevicePointer(Queue->Context, Dst);
1245+
// For better performance, Copy Engines are not preferred given Shared
1246+
// pointers on DG2.
1247+
if (Queue->Device->isDG2() && (IsSharedPointer(Queue->Context, Src) ||
1248+
IsSharedPointer(Queue->Context, Dst))) {
1249+
PreferCopyEngine = false;
1250+
}
1251+
1252+
// Temporary option added to use copy engine for D2D copy
1253+
PreferCopyEngine |= UseCopyEngineForD2DCopy;
1254+
12621255
return enqueueMemCopyHelper( // TODO: do we need a new command type for this?
12631256
UR_COMMAND_MEM_BUFFER_COPY, Queue, Dst, Blocking, Size, Src,
1264-
NumEventsInWaitList, EventWaitList, OutEvent,
1265-
PreferCopyEngineUsage(Queue->Device, Queue->Context, Src, Dst));
1257+
NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine);
12661258
}
12671259

12681260
ur_result_t urEnqueueUSMPrefetch(
@@ -1462,13 +1454,26 @@ ur_result_t urEnqueueUSMMemcpy2D(
14621454

14631455
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
14641456

1457+
// Device to Device copies are found to execute slower on copy engine
1458+
// (versus compute engine).
1459+
bool PreferCopyEngine = !IsDevicePointer(Queue->Context, Src) ||
1460+
!IsDevicePointer(Queue->Context, Dst);
1461+
// For better performance, Copy Engines are not preferred given Shared
1462+
// pointers on DG2.
1463+
if (Queue->Device->isDG2() && (IsSharedPointer(Queue->Context, Src) ||
1464+
IsSharedPointer(Queue->Context, Dst))) {
1465+
PreferCopyEngine = false;
1466+
}
1467+
1468+
// Temporary option added to use copy engine for D2D copy
1469+
PreferCopyEngine |= UseCopyEngineForD2DCopy;
1470+
14651471
return enqueueMemCopyRectHelper( // TODO: do we need a new command type for
14661472
// this?
14671473
UR_COMMAND_MEM_BUFFER_COPY_RECT, Queue, Src, Dst, ZeroOffset, ZeroOffset,
14681474
Region, SrcPitch, DstPitch, 0, /*SrcSlicePitch=*/
14691475
0, /*DstSlicePitch=*/
1470-
Blocking, NumEventsInWaitList, EventWaitList, Event,
1471-
PreferCopyEngineUsage(Queue->Device, Queue->Context, Src, Dst));
1476+
Blocking, NumEventsInWaitList, EventWaitList, Event, PreferCopyEngine);
14721477
}
14731478

14741479
ur_result_t urMemImageCreate(

unified-runtime/source/adapters/level_zero/memory.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@ struct ur_device_handle_t_;
2929

3030
bool IsDevicePointer(ur_context_handle_t Context, const void *Ptr);
3131
bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr);
32-
bool PreferCopyEngineUsage(ur_device_handle_t Device,
33-
ur_context_handle_t Context, const void *Src,
34-
void *Dst);
3532

3633
// This is an experimental option to test performance of device to device copy
3734
// operations on copy engines (versus compute engine)

0 commit comments

Comments
 (0)