From 300299a6f201de8a397694671acc810d9b66ce05 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Thu, 3 Jul 2025 10:37:41 +0200 Subject: [PATCH 1/5] fix fill2d --- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 47 +++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index dc5e5ab77034d..4434cd1dc529c 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -51,6 +51,47 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices, return UR_RESULT_SUCCESS; } +ur_result_t urEnqueueUSMFill2DFallback(ur_queue_handle_t hQueue, void *pMem, + size_t pitch, size_t patternSize, + const void *pPattern, size_t width, + size_t height, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + ur_result_t Result = getContext()->urDdiTable.Enqueue.pfnUSMFill2D( + hQueue, pMem, pitch, patternSize, pPattern, width, height, + numEventsInWaitList, phEventWaitList, phEvent); + if (Result == UR_RESULT_SUCCESS || + Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + return Result; + } + + // fallback code + auto pfnUSMFill = getContext()->urDdiTable.Enqueue.pfnUSMFill; + + std::vector WaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + WaitEvents[i] = phEventWaitList[i]; + } + + for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { + ur_event_handle_t Event = nullptr; + + UR_CALL(pfnUSMFill(hQueue, (void *)((char *)pMem + pitch * HeightIndex), + patternSize, pPattern, width, WaitEvents.size(), + WaitEvents.data(), &Event)); + + WaitEvents.clear(); + WaitEvents.push_back(Event); + } + + if (phEvent && WaitEvents.size()) { + *phEvent = WaitEvents[0]; + } + + return UR_RESULT_SUCCESS; +} + } // namespace /////////////////////////////////////////////////////////////////////////////// @@ -1756,9 +1797,9 @@ ur_result_t urEnqueueUSMMemcpy2D( const auto DstShadow = DstDI->Shadow->MemToShadow((uptr)pDst); const char Pattern = 0; ur_event_handle_t Event = nullptr; - UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill2D( - hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, 0, - nullptr, &Event)); + UR_CALL(urEnqueueUSMFill2DFallback(hQueue, (void *)DstShadow, dstPitch, 1, + &Pattern, width, height, 0, nullptr, + &Event)); Events.push_back(Event); } From 882a3b877e23f737ea8a62cdd59d4f120d97d7c2 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 4 Jul 2025 04:32:39 +0200 Subject: [PATCH 2/5] implement fallback on adapters --- .../source/adapters/level_zero/memory.cpp | 52 ++++++++++++++----- .../source/adapters/opencl/usm.cpp | 34 +++++++++--- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 47 ++--------------- 3 files changed, 67 insertions(+), 66 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/memory.cpp b/unified-runtime/source/adapters/level_zero/memory.cpp index 3b1158645e77a..4556323644e92 100644 --- a/unified-runtime/source/adapters/level_zero/memory.cpp +++ b/unified-runtime/source/adapters/level_zero/memory.cpp @@ -1393,33 +1393,57 @@ ur_result_t urEnqueueUSMAdvise( ur_result_t urEnqueueUSMFill2D( /// [in] handle of the queue to submit to. - ur_queue_handle_t /*Queue*/, + ur_queue_handle_t Queue, /// [in] pointer to memory to be filled. - void * /*Mem*/, + void *Mem, /// [in] the total width of the destination memory including padding. - size_t /*Pitch*/, + size_t Pitch, /// [in] the size in bytes of the pattern. - size_t /*PatternSize*/, + size_t PatternSize, /// [in] pointer with the bytes of the pattern to set. - const void * /*Pattern*/, + const void *Pattern, /// [in] the width in bytes of each row to fill. - size_t /*Width*/, + size_t Width, /// [in] the height of the columns to fill. - size_t /*Height*/, + size_t Height, /// [in] size of the event wait list - uint32_t /*NumEventsInWaitList*/, + uint32_t NumEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of /// events that must be complete before the kernel execution. If /// nullptr, the numEventsInWaitList must be 0, indicating that no wait /// event. - const ur_event_handle_t * /*EventWaitList*/, + const ur_event_handle_t *EventWaitList, /// [in,out][optional] return an event object that identifies this /// particular kernel execution instance. - ur_event_handle_t * /*OutEvent*/) { - UR_LOG_LEGACY(ERR, - logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_event_handle_t *OutEvent) { + // A quick fallback implementation, since Level Zero does not support USM + // fill2d natively + std::scoped_lock Lock(Queue->Mutex); + + std::vector WaitEvents(NumEventsInWaitList); + for (uint32_t i = 0; i < NumEventsInWaitList; i++) { + WaitEvents[i] = EventWaitList[i]; + } + + for (size_t HeightIndex = 0; HeightIndex < Height; HeightIndex++) { + ur_event_handle_t Event = nullptr; + + UR_CALL(enqueueMemFillHelper( + UR_COMMAND_MEM_BUFFER_FILL, Queue, + (void *)((char *)Mem + Pitch * HeightIndex), + Pattern, // It will be interpreted as an 8-bit value, + PatternSize, // which is indicated with this pattern_size==1 + Width, WaitEvents.size(), WaitEvents.data(), &Event)); + + WaitEvents.clear(); + WaitEvents.push_back(Event); + } + + if (OutEvent && WaitEvents.size()) { + *OutEvent = WaitEvents[0]; + } + + return UR_RESULT_SUCCESS; } ur_result_t urEnqueueUSMMemcpy2D( diff --git a/unified-runtime/source/adapters/opencl/usm.cpp b/unified-runtime/source/adapters/opencl/usm.cpp index e3c510c745766..3a42f5e944c6d 100644 --- a/unified-runtime/source/adapters/opencl/usm.cpp +++ b/unified-runtime/source/adapters/opencl/usm.cpp @@ -587,14 +587,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] void *pMem, - [[maybe_unused]] size_t pitch, [[maybe_unused]] size_t patternSize, - [[maybe_unused]] const void *pPattern, [[maybe_unused]] size_t width, - [[maybe_unused]] size_t height, - [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_event_handle_t *phEvent) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, + const void *pPattern, size_t width, size_t height, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + // A quick fallback implementation + std::vector WaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + WaitEvents[i] = phEventWaitList[i]; + } + + for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { + ur_event_handle_t Event = nullptr; + + UR_CALL(urEnqueueUSMFill( + hQueue, (void *)((char *)pMem + pitch * HeightIndex), patternSize, + pPattern, width, WaitEvents.size(), WaitEvents.data(), &Event)); + + WaitEvents.clear(); + WaitEvents.push_back(Event); + } + + if (phEvent && WaitEvents.size()) { + *phEvent = WaitEvents[0]; + } + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 4434cd1dc529c..dc5e5ab77034d 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -51,47 +51,6 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices, return UR_RESULT_SUCCESS; } -ur_result_t urEnqueueUSMFill2DFallback(ur_queue_handle_t hQueue, void *pMem, - size_t pitch, size_t patternSize, - const void *pPattern, size_t width, - size_t height, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - ur_result_t Result = getContext()->urDdiTable.Enqueue.pfnUSMFill2D( - hQueue, pMem, pitch, patternSize, pPattern, width, height, - numEventsInWaitList, phEventWaitList, phEvent); - if (Result == UR_RESULT_SUCCESS || - Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { - return Result; - } - - // fallback code - auto pfnUSMFill = getContext()->urDdiTable.Enqueue.pfnUSMFill; - - std::vector WaitEvents(numEventsInWaitList); - for (uint32_t i = 0; i < numEventsInWaitList; i++) { - WaitEvents[i] = phEventWaitList[i]; - } - - for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { - ur_event_handle_t Event = nullptr; - - UR_CALL(pfnUSMFill(hQueue, (void *)((char *)pMem + pitch * HeightIndex), - patternSize, pPattern, width, WaitEvents.size(), - WaitEvents.data(), &Event)); - - WaitEvents.clear(); - WaitEvents.push_back(Event); - } - - if (phEvent && WaitEvents.size()) { - *phEvent = WaitEvents[0]; - } - - return UR_RESULT_SUCCESS; -} - } // namespace /////////////////////////////////////////////////////////////////////////////// @@ -1797,9 +1756,9 @@ ur_result_t urEnqueueUSMMemcpy2D( const auto DstShadow = DstDI->Shadow->MemToShadow((uptr)pDst); const char Pattern = 0; ur_event_handle_t Event = nullptr; - UR_CALL(urEnqueueUSMFill2DFallback(hQueue, (void *)DstShadow, dstPitch, 1, - &Pattern, width, height, 0, nullptr, - &Event)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill2D( + hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, 0, + nullptr, &Event)); Events.push_back(Event); } From da7b04db00173c9e0e191027299884f49814d316 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 4 Jul 2025 04:54:10 +0200 Subject: [PATCH 3/5] fix event dependency --- .../source/adapters/level_zero/memory.cpp | 17 +++++++++-------- unified-runtime/source/adapters/opencl/usm.cpp | 17 +++++++++-------- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 9 ++------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/memory.cpp b/unified-runtime/source/adapters/level_zero/memory.cpp index 4556323644e92..d3dce632cc64f 100644 --- a/unified-runtime/source/adapters/level_zero/memory.cpp +++ b/unified-runtime/source/adapters/level_zero/memory.cpp @@ -1420,10 +1420,7 @@ ur_result_t urEnqueueUSMFill2D( // fill2d natively std::scoped_lock Lock(Queue->Mutex); - std::vector WaitEvents(NumEventsInWaitList); - for (uint32_t i = 0; i < NumEventsInWaitList; i++) { - WaitEvents[i] = EventWaitList[i]; - } + std::vector WaitEvents(Height); for (size_t HeightIndex = 0; HeightIndex < Height; HeightIndex++) { ur_event_handle_t Event = nullptr; @@ -1433,14 +1430,18 @@ ur_result_t urEnqueueUSMFill2D( (void *)((char *)Mem + Pitch * HeightIndex), Pattern, // It will be interpreted as an 8-bit value, PatternSize, // which is indicated with this pattern_size==1 - Width, WaitEvents.size(), WaitEvents.data(), &Event)); + Width, NumEventsInWaitList, EventWaitList, &Event)); - WaitEvents.clear(); WaitEvents.push_back(Event); } - if (OutEvent && WaitEvents.size()) { - *OutEvent = WaitEvents[0]; + if (OutEvent) { + UR_CALL(ur::level_zero::urEnqueueEventsWait(Queue, WaitEvents.size(), + WaitEvents.data(), OutEvent)); + } + + for (const auto Event : WaitEvents) { + UR_CALL(ur::level_zero::urEventRelease(Event)); } return UR_RESULT_SUCCESS; diff --git a/unified-runtime/source/adapters/opencl/usm.cpp b/unified-runtime/source/adapters/opencl/usm.cpp index 3a42f5e944c6d..b3b2aade4aa49 100644 --- a/unified-runtime/source/adapters/opencl/usm.cpp +++ b/unified-runtime/source/adapters/opencl/usm.cpp @@ -592,24 +592,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // A quick fallback implementation - std::vector WaitEvents(numEventsInWaitList); - for (uint32_t i = 0; i < numEventsInWaitList; i++) { - WaitEvents[i] = phEventWaitList[i]; - } + std::vector WaitEvents(height); for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { ur_event_handle_t Event = nullptr; UR_CALL(urEnqueueUSMFill( hQueue, (void *)((char *)pMem + pitch * HeightIndex), patternSize, - pPattern, width, WaitEvents.size(), WaitEvents.data(), &Event)); + pPattern, width, numEventsInWaitList, phEventWaitList, &Event)); - WaitEvents.clear(); WaitEvents.push_back(Event); } - if (phEvent && WaitEvents.size()) { - *phEvent = WaitEvents[0]; + if (phEvent) { + UR_CALL(urEnqueueEventsWait(hQueue, WaitEvents.size(), WaitEvents.data(), + phEvent)); + } + + for (const auto Event : WaitEvents) { + UR_CALL(urEventRelease(Event)); } return UR_RESULT_SUCCESS; diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index dc5e5ab77034d..5aa03d39af866 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1726,11 +1726,6 @@ ur_result_t urEnqueueUSMMemcpy2D( { auto pfnUSMMemcpy = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy; - std::vector WaitEvents(numEventsInWaitList); - for (uint32_t i = 0; i < numEventsInWaitList; i++) { - WaitEvents[i] = phEventWaitList[i]; - } - for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { ur_event_handle_t Event = nullptr; const auto DstOrigin = @@ -1742,8 +1737,8 @@ ur_result_t urEnqueueUSMMemcpy2D( width - 1) + MSAN_ORIGIN_GRANULARITY; pfnUSMMemcpy(hQueue, false, (void *)DstOrigin, (void *)SrcOrigin, - SrcOriginEnd - SrcOrigin, WaitEvents.size(), - WaitEvents.data(), &Event); + SrcOriginEnd - SrcOrigin, numEventsInWaitList, phEventWaitList, + &Event); Events.push_back(Event); } } From 9637da47322d7ac02843040f63f46f3d672b27a7 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 4 Jul 2025 04:55:56 +0200 Subject: [PATCH 4/5] clean code --- unified-runtime/source/adapters/level_zero/memory.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/memory.cpp b/unified-runtime/source/adapters/level_zero/memory.cpp index d3dce632cc64f..24405b460fa26 100644 --- a/unified-runtime/source/adapters/level_zero/memory.cpp +++ b/unified-runtime/source/adapters/level_zero/memory.cpp @@ -1425,12 +1425,10 @@ ur_result_t urEnqueueUSMFill2D( for (size_t HeightIndex = 0; HeightIndex < Height; HeightIndex++) { ur_event_handle_t Event = nullptr; - UR_CALL(enqueueMemFillHelper( - UR_COMMAND_MEM_BUFFER_FILL, Queue, - (void *)((char *)Mem + Pitch * HeightIndex), - Pattern, // It will be interpreted as an 8-bit value, - PatternSize, // which is indicated with this pattern_size==1 - Width, NumEventsInWaitList, EventWaitList, &Event)); + UR_CALL(enqueueMemFillHelper(UR_COMMAND_MEM_BUFFER_FILL, Queue, + (void *)((char *)Mem + Pitch * HeightIndex), + Pattern, PatternSize, Width, + NumEventsInWaitList, EventWaitList, &Event)); WaitEvents.push_back(Event); } From c29367111be803612e79dea64ab2b73c47cf3de8 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 4 Jul 2025 09:42:29 +0200 Subject: [PATCH 5/5] revert adapters --- .../source/adapters/level_zero/memory.cpp | 51 +++++------------- .../source/adapters/opencl/usm.cpp | 35 +++--------- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 54 ++++++++++++++++--- 3 files changed, 70 insertions(+), 70 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/memory.cpp b/unified-runtime/source/adapters/level_zero/memory.cpp index 24405b460fa26..3b1158645e77a 100644 --- a/unified-runtime/source/adapters/level_zero/memory.cpp +++ b/unified-runtime/source/adapters/level_zero/memory.cpp @@ -1393,56 +1393,33 @@ ur_result_t urEnqueueUSMAdvise( ur_result_t urEnqueueUSMFill2D( /// [in] handle of the queue to submit to. - ur_queue_handle_t Queue, + ur_queue_handle_t /*Queue*/, /// [in] pointer to memory to be filled. - void *Mem, + void * /*Mem*/, /// [in] the total width of the destination memory including padding. - size_t Pitch, + size_t /*Pitch*/, /// [in] the size in bytes of the pattern. - size_t PatternSize, + size_t /*PatternSize*/, /// [in] pointer with the bytes of the pattern to set. - const void *Pattern, + const void * /*Pattern*/, /// [in] the width in bytes of each row to fill. - size_t Width, + size_t /*Width*/, /// [in] the height of the columns to fill. - size_t Height, + size_t /*Height*/, /// [in] size of the event wait list - uint32_t NumEventsInWaitList, + uint32_t /*NumEventsInWaitList*/, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of /// events that must be complete before the kernel execution. If /// nullptr, the numEventsInWaitList must be 0, indicating that no wait /// event. - const ur_event_handle_t *EventWaitList, + const ur_event_handle_t * /*EventWaitList*/, /// [in,out][optional] return an event object that identifies this /// particular kernel execution instance. - ur_event_handle_t *OutEvent) { - // A quick fallback implementation, since Level Zero does not support USM - // fill2d natively - std::scoped_lock Lock(Queue->Mutex); - - std::vector WaitEvents(Height); - - for (size_t HeightIndex = 0; HeightIndex < Height; HeightIndex++) { - ur_event_handle_t Event = nullptr; - - UR_CALL(enqueueMemFillHelper(UR_COMMAND_MEM_BUFFER_FILL, Queue, - (void *)((char *)Mem + Pitch * HeightIndex), - Pattern, PatternSize, Width, - NumEventsInWaitList, EventWaitList, &Event)); - - WaitEvents.push_back(Event); - } - - if (OutEvent) { - UR_CALL(ur::level_zero::urEnqueueEventsWait(Queue, WaitEvents.size(), - WaitEvents.data(), OutEvent)); - } - - for (const auto Event : WaitEvents) { - UR_CALL(ur::level_zero::urEventRelease(Event)); - } - - return UR_RESULT_SUCCESS; + ur_event_handle_t * /*OutEvent*/) { + UR_LOG_LEGACY(ERR, + logger::LegacyMessage("[UR][L0] {} function not implemented!"), + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } ur_result_t urEnqueueUSMMemcpy2D( diff --git a/unified-runtime/source/adapters/opencl/usm.cpp b/unified-runtime/source/adapters/opencl/usm.cpp index b3b2aade4aa49..e3c510c745766 100644 --- a/unified-runtime/source/adapters/opencl/usm.cpp +++ b/unified-runtime/source/adapters/opencl/usm.cpp @@ -587,33 +587,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, - const void *pPattern, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - // A quick fallback implementation - std::vector WaitEvents(height); - - for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { - ur_event_handle_t Event = nullptr; - - UR_CALL(urEnqueueUSMFill( - hQueue, (void *)((char *)pMem + pitch * HeightIndex), patternSize, - pPattern, width, numEventsInWaitList, phEventWaitList, &Event)); - - WaitEvents.push_back(Event); - } - - if (phEvent) { - UR_CALL(urEnqueueEventsWait(hQueue, WaitEvents.size(), WaitEvents.data(), - phEvent)); - } - - for (const auto Event : WaitEvents) { - UR_CALL(urEventRelease(Event)); - } - - return UR_RESULT_SUCCESS; + [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] void *pMem, + [[maybe_unused]] size_t pitch, [[maybe_unused]] size_t patternSize, + [[maybe_unused]] const void *pPattern, [[maybe_unused]] size_t width, + [[maybe_unused]] size_t height, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 5aa03d39af866..439afeae99a26 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -51,6 +51,48 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices, return UR_RESULT_SUCCESS; } +ur_result_t urEnqueueUSMFill2DFallback(ur_queue_handle_t hQueue, void *pMem, + size_t pitch, size_t patternSize, + const void *pPattern, size_t width, + size_t height, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + ur_result_t Result = getContext()->urDdiTable.Enqueue.pfnUSMFill2D( + hQueue, pMem, pitch, patternSize, pPattern, width, height, + numEventsInWaitList, phEventWaitList, phEvent); + if (Result == UR_RESULT_SUCCESS || + Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + return Result; + } + + // fallback code + auto pfnUSMFill = getContext()->urDdiTable.Enqueue.pfnUSMFill; + + std::vector WaitEvents(numEventsInWaitList); + + for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { + ur_event_handle_t Event = nullptr; + + UR_CALL(pfnUSMFill(hQueue, (void *)((char *)pMem + pitch * HeightIndex), + patternSize, pPattern, width, WaitEvents.size(), + WaitEvents.data(), &Event)); + + WaitEvents.push_back(Event); + } + + if (phEvent) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, WaitEvents.size(), WaitEvents.data(), phEvent)); + } + + for (const auto Event : WaitEvents) { + UR_CALL(getContext()->urDdiTable.Event.pfnRelease(Event)); + } + + return UR_RESULT_SUCCESS; +} + } // namespace /////////////////////////////////////////////////////////////////////////////// @@ -1737,8 +1779,8 @@ ur_result_t urEnqueueUSMMemcpy2D( width - 1) + MSAN_ORIGIN_GRANULARITY; pfnUSMMemcpy(hQueue, false, (void *)DstOrigin, (void *)SrcOrigin, - SrcOriginEnd - SrcOrigin, numEventsInWaitList, phEventWaitList, - &Event); + SrcOriginEnd - SrcOrigin, numEventsInWaitList, + phEventWaitList, &Event); Events.push_back(Event); } } @@ -1751,9 +1793,9 @@ ur_result_t urEnqueueUSMMemcpy2D( const auto DstShadow = DstDI->Shadow->MemToShadow((uptr)pDst); const char Pattern = 0; ur_event_handle_t Event = nullptr; - UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill2D( - hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, 0, - nullptr, &Event)); + UR_CALL(urEnqueueUSMFill2DFallback(hQueue, (void *)DstShadow, dstPitch, 1, + &Pattern, width, height, 0, nullptr, + &Event)); Events.push_back(Event); } @@ -1762,7 +1804,7 @@ ur_result_t urEnqueueUSMMemcpy2D( hQueue, Events.size(), Events.data(), phEvent)); } - for (const auto &E : Events) + for (const auto E : Events) UR_CALL(getContext()->urDdiTable.Event.pfnRelease(E)); return UR_RESULT_SUCCESS;