Skip to content

[DeviceMSAN] Use device usm to allocate LaunchInfo #17948

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions libdevice/sanitizer/msan_rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

DeviceGlobal<void *> __MsanLaunchInfo;
#define GetMsanLaunchInfo \
((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
((__SYCL_GLOBAL__ MsanRuntimeData *)__MsanLaunchInfo.get())

namespace {

Expand Down Expand Up @@ -160,14 +160,11 @@ inline uptr __msan_get_shadow_dg2(uptr addr, uint32_t as) {
}

if (as != ADDRESS_SPACE_GLOBAL || !(addr & DG2_DEVICE_USM_MASK))
return (uptr)((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
->CleanShadow;
return (uptr)GetMsanLaunchInfo->CleanShadow;

// Device USM only
auto shadow_begin = ((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
->GlobalShadowOffset;
auto shadow_end = ((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
->GlobalShadowOffsetEnd;
auto shadow_begin = GetMsanLaunchInfo->GlobalShadowOffset;
auto shadow_end = GetMsanLaunchInfo->GlobalShadowOffsetEnd;
if (addr < shadow_begin) {
return addr + (shadow_begin - DG2_DEVICE_USM_BEGIN);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context,

*ResultPtr = Allocated;

ContextInfo->MaxAllocatedSize = std::max(ContextInfo->MaxAllocatedSize, Size);
if (Type != AllocType::DEVICE_USM) {
ContextInfo->CleanShadowSize = std::max(ContextInfo->CleanShadowSize, Size);
}

// For host/shared usm, we only record the alloc size.
if (Type != AllocType::DEVICE_USM) {
Expand Down Expand Up @@ -138,15 +140,16 @@ ur_result_t MsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel,
// FIXME: We must use block operation here, until we support
// urEventSetCallback
auto Result = getContext()->urDdiTable.Queue.pfnFinish(Queue);
UR_CALL(LaunchInfo.Data.syncFromDevice(Queue));

if (Result == UR_RESULT_SUCCESS) {
const auto &Report = LaunchInfo.Data->Report;
const auto &Report = LaunchInfo.Data.Host.Report;

if (!Report.Flag) {
return Result;
}

ReportUsesUninitializedValue(LaunchInfo.Data->Report, Kernel);
ReportUsesUninitializedValue(LaunchInfo.Data.Host.Report, Kernel);

exitWithErrors();
}
Expand Down Expand Up @@ -286,8 +289,8 @@ MsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
MsanShadowMemoryDG2::IsDeviceUSM(GVInfo.Addr))) {
UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, GVInfo.Addr,
GVInfo.Size, 0));
ContextInfo->MaxAllocatedSize =
std::max(ContextInfo->MaxAllocatedSize, GVInfo.Size);
ContextInfo->CleanShadowSize =
std::max(ContextInfo->CleanShadowSize, GVInfo.Size);
}
}
}
Expand Down Expand Up @@ -471,16 +474,20 @@ ur_result_t MsanInterceptor::prepareLaunch(

// Set LaunchInfo
auto ContextInfo = getContextInfo(LaunchInfo.Context);
LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin;
LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd;
LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin;
LaunchInfo.Data.Host.GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd;

LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
LaunchInfo.Data.Host.Debug = getContext()->Options.Debug ? 1 : 0;

LaunchInfo.Data->DeviceTy = DeviceInfo->Type;
LaunchInfo.Data->Debug = getContext()->Options.Debug ? 1 : 0;
// Clean shadow
// Its content is always zero, and is used for unsupport memory types
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
ContextInfo->Handle, DeviceInfo->Handle, nullptr, nullptr,
ContextInfo->MaxAllocatedSize, (void **)&LaunchInfo.Data->CleanShadow));
UR_CALL(EnqueueUSMBlockingSet(Queue, (void *)LaunchInfo.Data->CleanShadow, 0,
ContextInfo->MaxAllocatedSize, 0, nullptr,
ContextInfo->CleanShadowSize,
(void **)&LaunchInfo.Data.Host.CleanShadow));
UR_CALL(EnqueueUSMBlockingSet(Queue, (void *)LaunchInfo.Data.Host.CleanShadow,
0, ContextInfo->CleanShadowSize, 0, nullptr,
nullptr));

if (LaunchInfo.LocalWorkSize.empty()) {
Expand Down Expand Up @@ -510,8 +517,8 @@ ur_result_t MsanInterceptor::prepareLaunch(
// Write shadow memory offset for local memory
if (KernelInfo.IsCheckLocals) {
if (DeviceInfo->Shadow->AllocLocalShadow(
Queue, NumWG, LaunchInfo.Data->LocalShadowOffset,
LaunchInfo.Data->LocalShadowOffsetEnd) != UR_RESULT_SUCCESS) {
Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset,
LaunchInfo.Data.Host.LocalShadowOffsetEnd) != UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for local "
"memory, maybe the number of workgroup ({}) is too "
Expand All @@ -520,18 +527,18 @@ ur_result_t MsanInterceptor::prepareLaunch(
getContext()->logger.warning("Skip checking local memory of kernel <{}> ",
GetKernelName(Kernel));
} else {
getContext()->logger.debug("ShadowMemory(Local, WorkGroup={}, {} - {})",
NumWG,
(void *)LaunchInfo.Data->LocalShadowOffset,
(void *)LaunchInfo.Data->LocalShadowOffsetEnd);
getContext()->logger.debug(
"ShadowMemory(Local, WorkGroup={}, {} - {})", NumWG,
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
}
}

// Write shadow memory offset for private memory
if (KernelInfo.IsCheckPrivates) {
if (DeviceInfo->Shadow->AllocPrivateShadow(
Queue, NumWG, LaunchInfo.Data->PrivateShadowOffset,
LaunchInfo.Data->PrivateShadowOffsetEnd) != UR_RESULT_SUCCESS) {
Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset,
LaunchInfo.Data.Host.PrivateShadowOffsetEnd) != UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for private "
"memory, maybe the number of workgroup ({}) is too "
Expand All @@ -542,8 +549,8 @@ ur_result_t MsanInterceptor::prepareLaunch(
} else {
getContext()->logger.debug(
"ShadowMemory(Private, WorkGroup={}, {} - {})", NumWG,
(void *)LaunchInfo.Data->PrivateShadowOffset,
(void *)LaunchInfo.Data->PrivateShadowOffsetEnd);
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
}
// Write local arguments info
if (!KernelInfo.LocalArgs.empty()) {
Expand All @@ -553,22 +560,26 @@ ur_result_t MsanInterceptor::prepareLaunch(
getContext()->logger.debug("LocalArgs (argIndex={}, size={})", ArgIndex,
ArgInfo.Size);
}
UR_CALL(LaunchInfo.importLocalArgsInfo(Queue, LocalArgsInfo));
UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo));
}
}

// sync msan runtime data to device side
UR_CALL(LaunchInfo.Data.syncToDevice(Queue));

getContext()->logger.info(
"LaunchInfo {} (GlobalShadow={}, LocalShadow={}, PrivateShadow={}, "
"CleanShadow={}, LocalArgs={}, NumLocalArgs={}, Device={}, Debug={})",
(void *)LaunchInfo.Data, (void *)LaunchInfo.Data->GlobalShadowOffset,
(void *)LaunchInfo.Data->LocalShadowOffset,
(void *)LaunchInfo.Data->PrivateShadowOffset,
(void *)LaunchInfo.Data->CleanShadow, (void *)LaunchInfo.Data->LocalArgs,
LaunchInfo.Data->NumLocalArgs, ToString(LaunchInfo.Data->DeviceTy),
LaunchInfo.Data->Debug);

ur_result_t URes =
EnqueueWriteGlobal("__MsanLaunchInfo", &LaunchInfo.Data, sizeof(uptr));
(void *)LaunchInfo.Data.getDevicePtr(),
(void *)LaunchInfo.Data.Host.GlobalShadowOffset,
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
(void *)LaunchInfo.Data.Host.CleanShadow,
(void *)LaunchInfo.Data.Host.LocalArgs, LaunchInfo.Data.Host.NumLocalArgs,
ToString(LaunchInfo.Data.Host.DeviceTy), LaunchInfo.Data.Host.Debug);

ur_result_t URes = EnqueueWriteGlobal(
"__MsanLaunchInfo", &LaunchInfo.Data.DevicePtr, sizeof(uptr));
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.info("EnqueueWriteGlobal(__MsanLaunchInfo) "
"failed, maybe empty kernel: {}",
Expand Down Expand Up @@ -641,47 +652,30 @@ ContextInfo::~ContextInfo() {
ur_result_t USMLaunchInfo::initialize() {
UR_CALL(getContext()->urDdiTable.Context.pfnRetain(Context));
UR_CALL(getContext()->urDdiTable.Device.pfnRetain(Device));
UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc(
Context, Device, nullptr, nullptr, sizeof(MsanLaunchInfo),
(void **)&Data));
*Data = MsanLaunchInfo{};
return UR_RESULT_SUCCESS;
}

USMLaunchInfo::~USMLaunchInfo() {
[[maybe_unused]] ur_result_t Result;
if (Data) {
if (Data->CleanShadow) {
Result = getContext()->urDdiTable.USM.pfnFree(Context,
(void *)Data->CleanShadow);
assert(Result == UR_RESULT_SUCCESS);
}
Result = getContext()->urDdiTable.USM.pfnFree(Context, (void *)Data);
MsanRuntimeDataWrapper::~MsanRuntimeDataWrapper() {
if (Host.CleanShadow) {
[[maybe_unused]] auto Result =
getContext()->urDdiTable.USM.pfnFree(Context, (void *)Host.CleanShadow);
assert(Result == UR_RESULT_SUCCESS);
}
if (DevicePtr) {
[[maybe_unused]] auto Result =
getContext()->urDdiTable.USM.pfnFree(Context, (void *)DevicePtr);
assert(Result == UR_RESULT_SUCCESS);
}
}

USMLaunchInfo::~USMLaunchInfo() {
[[maybe_unused]] ur_result_t Result;
Result = getContext()->urDdiTable.Context.pfnRelease(Context);
assert(Result == UR_RESULT_SUCCESS);
Result = getContext()->urDdiTable.Device.pfnRelease(Device);
assert(Result == UR_RESULT_SUCCESS);
}

ur_result_t USMLaunchInfo::importLocalArgsInfo(
ur_queue_handle_t Queue, const std::vector<MsanLocalArgsInfo> &LocalArgs) {
assert(!LocalArgs.empty());

Data->NumLocalArgs = LocalArgs.size();
const size_t LocalArgsInfoSize = sizeof(MsanLocalArgsInfo) * LocalArgs.size();
UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc(
Context, Device, nullptr, nullptr, LocalArgsInfoSize,
ur_cast<void **>(&Data->LocalArgs)));

UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
Queue, true, Data->LocalArgs, LocalArgs.data(), LocalArgsInfoSize, 0,
nullptr, nullptr));

return UR_RESULT_SUCCESS;
}

} // namespace msan

using namespace msan;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ struct ProgramInfo {

struct ContextInfo {
ur_context_handle_t Handle;
size_t MaxAllocatedSize = 1024;
size_t CleanShadowSize = 1024;
std::atomic<int32_t> RefCount = 1;

std::vector<ur_device_handle_t> DeviceList;
Expand All @@ -148,8 +148,71 @@ struct ContextInfo {
~ContextInfo();
};

struct MsanRuntimeDataWrapper {
MsanRuntimeData Host{};

MsanRuntimeData *DevicePtr = nullptr;

ur_context_handle_t Context{};

ur_device_handle_t Device{};

MsanRuntimeDataWrapper(ur_context_handle_t Context, ur_device_handle_t Device)
: Context(Context), Device(Device) {}

~MsanRuntimeDataWrapper();

MsanRuntimeData *getDevicePtr() {
if (DevicePtr == nullptr) {
ur_result_t Result = getContext()->urDdiTable.USM.pfnDeviceAlloc(
Context, Device, nullptr, nullptr, sizeof(MsanRuntimeData),
(void **)&DevicePtr);
if (Result != UR_RESULT_SUCCESS) {
getContext()->logger.error(
"Failed to alloc device usm for msan runtime data: {}", Result);
}
}
return DevicePtr;
}

ur_result_t syncFromDevice(ur_queue_handle_t Queue) {
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
Queue, true, ur_cast<void *>(&Host), getDevicePtr(),
sizeof(MsanRuntimeData), 0, nullptr, nullptr));

return UR_RESULT_SUCCESS;
}

ur_result_t syncToDevice(ur_queue_handle_t Queue) {
UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
Queue, true, getDevicePtr(), ur_cast<void *>(&Host),
sizeof(MsanRuntimeData), 0, nullptr, nullptr));

return UR_RESULT_SUCCESS;
}

ur_result_t
importLocalArgsInfo(ur_queue_handle_t Queue,
const std::vector<MsanLocalArgsInfo> &LocalArgs) {
assert(!LocalArgs.empty());

Host.NumLocalArgs = LocalArgs.size();
const size_t LocalArgsInfoSize =
sizeof(MsanLocalArgsInfo) * Host.NumLocalArgs;
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
Context, Device, nullptr, nullptr, LocalArgsInfoSize,
ur_cast<void **>(&Host.LocalArgs)));

UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
Queue, true, Host.LocalArgs, &LocalArgs[0], LocalArgsInfoSize, 0,
nullptr, nullptr));

return UR_RESULT_SUCCESS;
}
};

struct USMLaunchInfo {
MsanLaunchInfo *Data = nullptr;
MsanRuntimeDataWrapper Data;

ur_context_handle_t Context = nullptr;
ur_device_handle_t Device = nullptr;
Expand All @@ -161,8 +224,9 @@ struct USMLaunchInfo {
USMLaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device,
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
const size_t *GlobalWorkOffset, uint32_t WorkDim)
: Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize),
GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim) {
: Data(Context, Device), Context(Context), Device(Device),
GlobalWorkSize(GlobalWorkSize), GlobalWorkOffset(GlobalWorkOffset),
WorkDim(WorkDim) {
if (LocalWorkSize) {
this->LocalWorkSize =
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
Expand All @@ -171,9 +235,6 @@ struct USMLaunchInfo {
~USMLaunchInfo();

ur_result_t initialize();
ur_result_t
importLocalArgsInfo(ur_queue_handle_t Queue,
const std::vector<MsanLocalArgsInfo> &LocalArgs);
};

struct DeviceGlobalInfo {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ struct MsanLocalArgsInfo {
uint64_t Size = 0;
};

struct MsanLaunchInfo {
struct MsanRuntimeData {
uintptr_t GlobalShadowOffset = 0;
uintptr_t GlobalShadowOffsetEnd = 0;

Expand Down
Loading