@@ -57,27 +57,6 @@ bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr) {
57
57
return (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_SHARED);
58
58
}
59
59
60
- // Helper Function to check if the Copy Engine should be preferred given the
61
- // types of memory used.
62
- bool PreferCopyEngineUsage (ur_device_handle_t Device,
63
- ur_context_handle_t Context, const void *Src,
64
- void *Dst) {
65
- bool PreferCopyEngine = false ;
66
- // Given Integrated Devices, Copy Engines are not preferred for any Copy
67
- // operations.
68
- if (!Device->isIntegrated ()) {
69
- // Given non D2D Copies, for better performance, Copy Engines are preferred
70
- // only if one has both the Main and Link Copy Engines.
71
- if (Device->hasLinkCopyEngine () && Device->hasMainCopyEngine () &&
72
- (!IsDevicePointer (Context, Src) || !IsDevicePointer (Context, Dst))) {
73
- PreferCopyEngine = true ;
74
- }
75
- }
76
- // Temporary option added to use force engine for D2D copy
77
- PreferCopyEngine |= UseCopyEngineForD2DCopy;
78
- return PreferCopyEngine;
79
- }
80
-
81
60
// Shared by all memory read/write/copy PI interfaces.
82
61
// PI interfaces must have queue's and destination buffer's mutexes locked for
83
62
// exclusive use and source buffer's mutex locked for shared use on entry.
@@ -1259,10 +1238,23 @@ ur_result_t urEnqueueUSMMemcpy(
1259
1238
ur_event_handle_t *OutEvent) {
1260
1239
std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
1261
1240
1241
+ // Device to Device copies are found to execute slower on copy engine
1242
+ // (versus compute engine).
1243
+ bool PreferCopyEngine = !IsDevicePointer (Queue->Context , Src) ||
1244
+ !IsDevicePointer (Queue->Context , Dst);
1245
+ // For better performance, Copy Engines are not preferred given Shared
1246
+ // pointers on DG2.
1247
+ if (Queue->Device ->isDG2 () && (IsSharedPointer (Queue->Context , Src) ||
1248
+ IsSharedPointer (Queue->Context , Dst))) {
1249
+ PreferCopyEngine = false ;
1250
+ }
1251
+
1252
+ // Temporary option added to use copy engine for D2D copy
1253
+ PreferCopyEngine |= UseCopyEngineForD2DCopy;
1254
+
1262
1255
return enqueueMemCopyHelper ( // TODO: do we need a new command type for this?
1263
1256
UR_COMMAND_MEM_BUFFER_COPY, Queue, Dst, Blocking, Size , Src,
1264
- NumEventsInWaitList, EventWaitList, OutEvent,
1265
- PreferCopyEngineUsage (Queue->Device , Queue->Context , Src, Dst));
1257
+ NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine);
1266
1258
}
1267
1259
1268
1260
ur_result_t urEnqueueUSMPrefetch (
@@ -1462,13 +1454,26 @@ ur_result_t urEnqueueUSMMemcpy2D(
1462
1454
1463
1455
std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
1464
1456
1457
+ // Device to Device copies are found to execute slower on copy engine
1458
+ // (versus compute engine).
1459
+ bool PreferCopyEngine = !IsDevicePointer (Queue->Context , Src) ||
1460
+ !IsDevicePointer (Queue->Context , Dst);
1461
+ // For better performance, Copy Engines are not preferred given Shared
1462
+ // pointers on DG2.
1463
+ if (Queue->Device ->isDG2 () && (IsSharedPointer (Queue->Context , Src) ||
1464
+ IsSharedPointer (Queue->Context , Dst))) {
1465
+ PreferCopyEngine = false ;
1466
+ }
1467
+
1468
+ // Temporary option added to use copy engine for D2D copy
1469
+ PreferCopyEngine |= UseCopyEngineForD2DCopy;
1470
+
1465
1471
return enqueueMemCopyRectHelper ( // TODO: do we need a new command type for
1466
1472
// this?
1467
1473
UR_COMMAND_MEM_BUFFER_COPY_RECT, Queue, Src, Dst, ZeroOffset, ZeroOffset,
1468
1474
Region, SrcPitch, DstPitch, 0 , /* SrcSlicePitch=*/
1469
1475
0 , /* DstSlicePitch=*/
1470
- Blocking, NumEventsInWaitList, EventWaitList, Event,
1471
- PreferCopyEngineUsage (Queue->Device , Queue->Context , Src, Dst));
1476
+ Blocking, NumEventsInWaitList, EventWaitList, Event, PreferCopyEngine);
1472
1477
}
1473
1478
1474
1479
ur_result_t urMemImageCreate (
0 commit comments