| 
 | 1 | +/*  | 
 | 2 | + *  | 
 | 3 | + * Copyright (C) 2025 Intel Corporation  | 
 | 4 | + *  | 
 | 5 | + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM  | 
 | 6 | + * Exceptions. See LICENSE.TXT  | 
 | 7 | + *  | 
 | 8 | + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception  | 
 | 9 | + *  | 
 | 10 | + * @file tsan_buffer.cpp  | 
 | 11 | + *  | 
 | 12 | + */  | 
 | 13 | + | 
 | 14 | +#include "tsan_buffer.hpp"  | 
 | 15 | +#include "sanitizer_common/sanitizer_utils.hpp"  | 
 | 16 | +#include "tsan_interceptor.hpp"  | 
 | 17 | +#include "ur_sanitizer_layer.hpp"  | 
 | 18 | + | 
 | 19 | +namespace ur_sanitizer_layer {  | 
 | 20 | +namespace tsan {  | 
 | 21 | + | 
 | 22 | +ur_result_t EnqueueMemCopyRectHelper(  | 
 | 23 | +    ur_queue_handle_t Queue, char *pSrc, char *pDst, ur_rect_offset_t SrcOffset,  | 
 | 24 | +    ur_rect_offset_t DstOffset, ur_rect_region_t Region, size_t SrcRowPitch,  | 
 | 25 | +    size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch,  | 
 | 26 | +    bool Blocking, uint32_t NumEventsInWaitList,  | 
 | 27 | +    const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event) {  | 
 | 28 | +  // If user doesn't determine src/dst row pitch and slice pitch, just use  | 
 | 29 | +  // region for it.  | 
 | 30 | +  if (SrcRowPitch == 0) {  | 
 | 31 | +    SrcRowPitch = Region.width;  | 
 | 32 | +  }  | 
 | 33 | + | 
 | 34 | +  if (SrcSlicePitch == 0) {  | 
 | 35 | +    SrcSlicePitch = SrcRowPitch * Region.height;  | 
 | 36 | +  }  | 
 | 37 | + | 
 | 38 | +  if (DstRowPitch == 0) {  | 
 | 39 | +    DstRowPitch = Region.width;  | 
 | 40 | +  }  | 
 | 41 | + | 
 | 42 | +  if (DstSlicePitch == 0) {  | 
 | 43 | +    DstSlicePitch = DstRowPitch * Region.height;  | 
 | 44 | +  }  | 
 | 45 | + | 
 | 46 | +  // Calculate the src and dst addresses that actually will be copied.  | 
 | 47 | +  char *SrcOrigin = pSrc + SrcOffset.x + SrcRowPitch * SrcOffset.y +  | 
 | 48 | +                    SrcSlicePitch * SrcOffset.z;  | 
 | 49 | +  char *DstOrigin = pDst + DstOffset.x + DstRowPitch * DstOffset.y +  | 
 | 50 | +                    DstSlicePitch * DstOffset.z;  | 
 | 51 | + | 
 | 52 | +  ur_device_handle_t Device = GetDevice(Queue);  | 
 | 53 | +  std::shared_ptr<DeviceInfo> DeviceInfo =  | 
 | 54 | +      getTsanInterceptor()->getDeviceInfo(Device);  | 
 | 55 | +  std::vector<ur_event_handle_t> Events;  | 
 | 56 | + | 
 | 57 | +  // For now, USM doesn't support 3D memory copy operation, so we can only  | 
 | 58 | +  // loop call 2D memory copy function to implement it.  | 
 | 59 | +  for (size_t i = 0; i < Region.depth; i++) {  | 
 | 60 | +    ur_event_handle_t NewEvent{};  | 
 | 61 | +    UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy2D(  | 
 | 62 | +        Queue, false, DstOrigin + (i * DstSlicePitch), DstRowPitch,  | 
 | 63 | +        SrcOrigin + (i * SrcSlicePitch), SrcRowPitch, Region.width,  | 
 | 64 | +        Region.height, NumEventsInWaitList, EventWaitList, &NewEvent));  | 
 | 65 | +    Events.push_back(NewEvent);  | 
 | 66 | +  }  | 
 | 67 | + | 
 | 68 | +  if (Blocking) {  | 
 | 69 | +    UR_CALL(getContext()->urDdiTable.Event.pfnWait(Events.size(), &Events[0]));  | 
 | 70 | +  }  | 
 | 71 | + | 
 | 72 | +  if (Event) {  | 
 | 73 | +    UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait(Queue, Events.size(),  | 
 | 74 | +                                                           &Events[0], Event));  | 
 | 75 | +  }  | 
 | 76 | + | 
 | 77 | +  return UR_RESULT_SUCCESS;  | 
 | 78 | +}  | 
 | 79 | + | 
 | 80 | +ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {  | 
 | 81 | +  // Sub-buffers don't maintain own allocations but rely on parent buffer.  | 
 | 82 | +  if (SubBuffer) {  | 
 | 83 | +    UR_CALL(SubBuffer->Parent->getHandle(Device, Handle));  | 
 | 84 | +    Handle += SubBuffer->Origin;  | 
 | 85 | +    return UR_RESULT_SUCCESS;  | 
 | 86 | +  }  | 
 | 87 | + | 
 | 88 | +  // Device may be null, we follow the L0 adapter's practice to use the first  | 
 | 89 | +  // device  | 
 | 90 | +  if (!Device) {  | 
 | 91 | +    auto Devices = GetDevices(Context);  | 
 | 92 | +    assert(Devices.size() > 0 && "Devices should not be empty");  | 
 | 93 | +    Device = Devices[0];  | 
 | 94 | +  }  | 
 | 95 | +  assert((void *)Device != nullptr && "Device cannot be nullptr");  | 
 | 96 | + | 
 | 97 | +  std::scoped_lock<ur_shared_mutex> Guard(Mutex);  | 
 | 98 | +  auto &Allocation = Allocations[Device];  | 
 | 99 | +  ur_result_t URes = UR_RESULT_SUCCESS;  | 
 | 100 | +  if (!Allocation) {  | 
 | 101 | +    ur_usm_desc_t USMDesc{};  | 
 | 102 | +    USMDesc.align = getAlignment();  | 
 | 103 | +    ur_usm_pool_handle_t Pool{};  | 
 | 104 | +    URes = getTsanInterceptor()->allocateMemory(Context, Device, &USMDesc, Pool,  | 
 | 105 | +                                                Size, AllocType::DEVICE_USM,  | 
 | 106 | +                                                ur_cast<void **>(&Allocation));  | 
 | 107 | +    if (URes != UR_RESULT_SUCCESS) {  | 
 | 108 | +      getContext()->logger.error(  | 
 | 109 | +          "Failed to allocate {} bytes memory for buffer {}", Size, this);  | 
 | 110 | +      return URes;  | 
 | 111 | +    }  | 
 | 112 | + | 
 | 113 | +    if (HostPtr) {  | 
 | 114 | +      ManagedQueue Queue(Context, Device);  | 
 | 115 | +      URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(  | 
 | 116 | +          Queue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr);  | 
 | 117 | +      if (URes != UR_RESULT_SUCCESS) {  | 
 | 118 | +        getContext()->logger.error("Failed to copy {} bytes data from host "  | 
 | 119 | +                                   "pointer {} to buffer {}",  | 
 | 120 | +                                   Size, HostPtr, this);  | 
 | 121 | +        return URes;  | 
 | 122 | +      }  | 
 | 123 | +    }  | 
 | 124 | +  }  | 
 | 125 | + | 
 | 126 | +  Handle = Allocation;  | 
 | 127 | + | 
 | 128 | +  if (!LastSyncedDevice.hDevice) {  | 
 | 129 | +    LastSyncedDevice = MemBuffer::Device_t{Device, Handle};  | 
 | 130 | +    return URes;  | 
 | 131 | +  }  | 
 | 132 | + | 
 | 133 | +  // If the device required to allocate memory is not the previous one, we  | 
 | 134 | +  // need to do data migration.  | 
 | 135 | +  if (Device != LastSyncedDevice.hDevice) {  | 
 | 136 | +    auto &HostAllocation = Allocations[nullptr];  | 
 | 137 | +    if (!HostAllocation) {  | 
 | 138 | +      ur_usm_desc_t USMDesc{};  | 
 | 139 | +      USMDesc.align = getAlignment();  | 
 | 140 | +      ur_usm_pool_handle_t Pool{};  | 
 | 141 | +      URes = getContext()->urDdiTable.USM.pfnHostAlloc(  | 
 | 142 | +          Context, &USMDesc, Pool, Size, ur_cast<void **>(&HostAllocation));  | 
 | 143 | +      if (URes != UR_RESULT_SUCCESS) {  | 
 | 144 | +        getContext()->logger.error("Failed to allocate {} bytes host "  | 
 | 145 | +                                   "USM for buffer {} migration",  | 
 | 146 | +                                   Size, this);  | 
 | 147 | +        return URes;  | 
 | 148 | +      }  | 
 | 149 | +    }  | 
 | 150 | + | 
 | 151 | +    // Copy data from last synced device to host  | 
 | 152 | +    {  | 
 | 153 | +      ManagedQueue Queue(Context, LastSyncedDevice.hDevice);  | 
 | 154 | +      URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(  | 
 | 155 | +          Queue, true, HostAllocation, LastSyncedDevice.MemHandle, Size, 0,  | 
 | 156 | +          nullptr, nullptr);  | 
 | 157 | +      if (URes != UR_RESULT_SUCCESS) {  | 
 | 158 | +        getContext()->logger.error("Failed to migrate memory buffer data");  | 
 | 159 | +        return URes;  | 
 | 160 | +      }  | 
 | 161 | +    }  | 
 | 162 | + | 
 | 163 | +    // Sync data back to device  | 
 | 164 | +    {  | 
 | 165 | +      ManagedQueue Queue(Context, Device);  | 
 | 166 | +      URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(  | 
 | 167 | +          Queue, true, Allocation, HostAllocation, Size, 0, nullptr, nullptr);  | 
 | 168 | +      if (URes != UR_RESULT_SUCCESS) {  | 
 | 169 | +        getContext()->logger.error("Failed to migrate memory buffer data");  | 
 | 170 | +        return URes;  | 
 | 171 | +      }  | 
 | 172 | +    }  | 
 | 173 | +  }  | 
 | 174 | + | 
 | 175 | +  LastSyncedDevice = MemBuffer::Device_t{Device, Handle};  | 
 | 176 | + | 
 | 177 | +  return URes;  | 
 | 178 | +}  | 
 | 179 | + | 
 | 180 | +ur_result_t MemBuffer::free() {  | 
 | 181 | +  for (const auto &[_, Ptr] : Allocations) {  | 
 | 182 | +    ur_result_t URes = getContext()->urDdiTable.USM.pfnFree(Context, Ptr);  | 
 | 183 | +    if (URes != UR_RESULT_SUCCESS) {  | 
 | 184 | +      getContext()->logger.error("Failed to free buffer handle {}", Ptr);  | 
 | 185 | +      return URes;  | 
 | 186 | +    }  | 
 | 187 | +  }  | 
 | 188 | +  Allocations.clear();  | 
 | 189 | +  return UR_RESULT_SUCCESS;  | 
 | 190 | +}  | 
 | 191 | + | 
 | 192 | +size_t MemBuffer::getAlignment() {  | 
 | 193 | +  // Choose an alignment that is at most 128 and is the next power of 2  | 
 | 194 | +  // for sizes less than 128.  | 
 | 195 | +  // TODO: If we don't set the alignment size explicitly, the device will  | 
 | 196 | +  // usually choose a very large size (more than 1k). Then sanitizer will  | 
 | 197 | +  // allocate extra unnessary memory. Not sure if this will impact  | 
 | 198 | +  // performance.  | 
 | 199 | +  size_t MsbIdx = 63 - __builtin_clzl(Size);  | 
 | 200 | +  size_t Alignment = (1ULL << (MsbIdx + 1));  | 
 | 201 | +  if (Alignment > 128) {  | 
 | 202 | +    Alignment = 128;  | 
 | 203 | +  }  | 
 | 204 | +  return Alignment;  | 
 | 205 | +}  | 
 | 206 | + | 
 | 207 | +} // namespace tsan  | 
 | 208 | +} // namespace ur_sanitizer_layer  | 
0 commit comments