Skip to content

Commit eca641e

Browse files
authored
[DevTSAN] Clean shadow before each kernel launch (#19355)
* Previously we only clean shadow once, this may cause false positive reports sometimes. So, we need to clean shadow of allocated memory before each kernel launch. * Allow internal managed queue to be created with out of order property for better performance. * Updated one test for better code coverage.
1 parent f9de199 commit eca641e

File tree

6 files changed

+265
-38
lines changed

6 files changed

+265
-38
lines changed

sycl/test-e2e/ThreadSanitizer/group_local_memory.cpp

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,41 @@ int main() {
1313
auto *sum = sycl::malloc_shared<int>(1, Q);
1414

1515
Q.submit([&](sycl::handler &h) {
16-
h.parallel_for<class Test>(
17-
sycl::nd_range<1>(128, 16), [=](sycl::nd_item<1> item) {
18-
auto ptr =
19-
sycl::ext::oneapi::group_local_memory<int>(item.get_group());
20-
*ptr += item.get_global_linear_id();
16+
h.parallel_for<class Test>(
17+
sycl::nd_range<1>(128, 16), [=](sycl::nd_item<1> item) {
18+
auto ptr =
19+
sycl::ext::oneapi::group_local_memory<int>(item.get_group());
20+
*ptr += item.get_global_linear_id();
2121

22-
check(ptr, item.get_local_linear_id());
22+
check(ptr, item.get_local_linear_id());
2323

24-
item.barrier();
24+
item.barrier();
2525

26-
if (item.get_global_linear_id() == 0)
27-
*sum = *ptr;
28-
});
29-
});
30-
Q.wait();
26+
if (item.get_global_linear_id() == 0)
27+
*sum = *ptr;
28+
});
29+
}).wait();
3130
// CHECK: WARNING: DeviceSanitizer: data race
3231
// CHECK-NEXT: When write of size 4 at 0x{{.*}} in kernel <{{.*}}Test>
3332
// CHECK-NEXT: #0 {{.*}}group_local_memory.cpp
3433

34+
// More work groups to triger local shadow re-allocated
35+
Q.submit([&](sycl::handler &h) {
36+
h.parallel_for<class Test2>(
37+
sycl::nd_range<1>(256, 16), [=](sycl::nd_item<1> item) {
38+
auto ptr =
39+
sycl::ext::oneapi::group_local_memory<int>(item.get_group());
40+
*ptr += item.get_global_linear_id();
41+
42+
check(ptr, item.get_local_linear_id());
43+
44+
item.barrier();
45+
46+
if (item.get_global_linear_id() == 0)
47+
*sum = *ptr;
48+
});
49+
}).wait();
50+
3551
sycl::free(sum, Q);
3652
return 0;
3753
}

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
186186

187187
ur_result_t MemBuffer::free() {
188188
for (const auto &[_, Ptr] : Allocations) {
189-
ur_result_t URes = getContext()->urDdiTable.USM.pfnFree(Context, Ptr);
189+
ur_result_t URes = getTsanInterceptor()->releaseMemory(Context, Ptr);
190190
if (URes != UR_RESULT_SUCCESS) {
191191
UR_LOG_L(getContext()->logger, ERR, "Failed to free buffer handle {}",
192192
(void *)Ptr);

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,11 +201,118 @@ ur_result_t urProgramLink(
201201
PrintUrBuildLogIfError(UrRes, *phProgram, Devices.data(), Devices.size());
202202
return UrRes;
203203
}
204+
UR_CALL(getTsanInterceptor()->insertProgram(*phProgram));
204205
UR_CALL(getTsanInterceptor()->registerProgram(*phProgram));
205206

206207
return UR_RESULT_SUCCESS;
207208
}
208209

210+
///////////////////////////////////////////////////////////////////////////////
211+
/// @brief Intercept function for urProgramCreateWithIL
212+
__urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL(
213+
/// [in] handle of the context instance
214+
ur_context_handle_t hContext,
215+
/// [in] pointer to IL binary.
216+
const void *pIL,
217+
/// [in] length of `pIL` in bytes.
218+
size_t length,
219+
/// [in][optional] pointer to program creation properties.
220+
const ur_program_properties_t *pProperties,
221+
/// [out] pointer to handle of program object created.
222+
ur_program_handle_t *phProgram) {
223+
UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramCreateWithIL");
224+
225+
UR_CALL(getContext()->urDdiTable.Program.pfnCreateWithIL(
226+
hContext, pIL, length, pProperties, phProgram));
227+
UR_CALL(getTsanInterceptor()->insertProgram(*phProgram));
228+
229+
return UR_RESULT_SUCCESS;
230+
}
231+
232+
///////////////////////////////////////////////////////////////////////////////
233+
/// @brief Intercept function for urProgramCreateWithBinary
234+
__urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary(
235+
/// [in] handle of the context instance
236+
ur_context_handle_t hContext,
237+
/// [in] number of devices
238+
uint32_t numDevices,
239+
/// [in][range(0, numDevices)] a pointer to a list of device handles.
240+
/// The binaries are loaded for devices specified in this list.
241+
ur_device_handle_t *phDevices,
242+
/// [in][range(0, numDevices)] array of sizes of program binaries specified
243+
/// by `pBinaries` (in bytes).
244+
size_t *pLengths,
245+
/// [in][range(0, numDevices)] pointer to program binaries to be loaded
246+
/// for devices specified by `phDevices`.
247+
const uint8_t **ppBinaries,
248+
/// [in][optional] pointer to program creation properties.
249+
const ur_program_properties_t *pProperties,
250+
/// [out] pointer to handle of Program object created.
251+
ur_program_handle_t *phProgram) {
252+
UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramCreateWithBinary");
253+
254+
UR_CALL(getContext()->urDdiTable.Program.pfnCreateWithBinary(
255+
hContext, numDevices, phDevices, pLengths, ppBinaries, pProperties,
256+
phProgram));
257+
UR_CALL(getTsanInterceptor()->insertProgram(*phProgram));
258+
259+
return UR_RESULT_SUCCESS;
260+
}
261+
262+
///////////////////////////////////////////////////////////////////////////////
263+
/// @brief Intercept function for urProgramCreateWithNativeHandle
264+
__urdlllocal ur_result_t UR_APICALL urProgramCreateWithNativeHandle(
265+
/// [in][nocheck] the native handle of the program.
266+
ur_native_handle_t hNativeProgram,
267+
/// [in] handle of the context instance
268+
ur_context_handle_t hContext,
269+
/// [in][optional] pointer to native program properties struct.
270+
const ur_program_native_properties_t *pProperties,
271+
/// [out] pointer to the handle of the program object created.
272+
ur_program_handle_t *phProgram) {
273+
UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramCreateWithNativeHandle");
274+
275+
UR_CALL(getContext()->urDdiTable.Program.pfnCreateWithNativeHandle(
276+
hNativeProgram, hContext, pProperties, phProgram));
277+
UR_CALL(getTsanInterceptor()->insertProgram(*phProgram));
278+
279+
return UR_RESULT_SUCCESS;
280+
}
281+
282+
///////////////////////////////////////////////////////////////////////////////
283+
/// @brief Intercept function for urProgramRetain
284+
__urdlllocal ur_result_t UR_APICALL urProgramRetain(
285+
ur_program_handle_t
286+
/// [in][retain] handle for the Program to retain
287+
hProgram) {
288+
UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramRetain");
289+
290+
UR_CALL(getContext()->urDdiTable.Program.pfnRetain(hProgram));
291+
292+
auto &ProgramInfo = getTsanInterceptor()->getProgramInfo(hProgram);
293+
ProgramInfo.RefCount++;
294+
295+
return UR_RESULT_SUCCESS;
296+
}
297+
298+
///////////////////////////////////////////////////////////////////////////////
299+
/// @brief Intercept function for urProgramRelease
300+
ur_result_t UR_APICALL urProgramRelease(
301+
/// [in][release] handle for the Program to release
302+
ur_program_handle_t hProgram) {
303+
UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramRelease");
304+
305+
UR_CALL(getContext()->urDdiTable.Program.pfnRelease(hProgram));
306+
307+
auto &ProgramInfo = getTsanInterceptor()->getProgramInfo(hProgram);
308+
if (--ProgramInfo.RefCount == 0) {
309+
UR_CALL(getTsanInterceptor()->unregisterProgram(hProgram));
310+
UR_CALL(getTsanInterceptor()->eraseProgram(hProgram));
311+
}
312+
313+
return UR_RESULT_SUCCESS;
314+
}
315+
209316
///////////////////////////////////////////////////////////////////////////////
210317
/// @brief Intercept function for urProgramBuildExp
211318
ur_result_t urProgramBuildExp(
@@ -255,6 +362,7 @@ ur_result_t urProgramLinkExp(
255362
return UrRes;
256363
}
257364

365+
UR_CALL(getTsanInterceptor()->insertProgram(*phProgram));
258366
UR_CALL(getTsanInterceptor()->registerProgram(*phProgram));
259367

260368
return UR_RESULT_SUCCESS;
@@ -1157,6 +1265,18 @@ __urdlllocal ur_result_t UR_APICALL urUSMSharedAlloc(
11571265
hContext, hDevice, pUSMDesc, pool, size, AllocType::SHARED_USM, ppMem);
11581266
}
11591267

1268+
///////////////////////////////////////////////////////////////////////////////
1269+
/// @brief Intercept function for urUSMFree
1270+
__urdlllocal ur_result_t UR_APICALL urUSMFree(
1271+
/// [in] handle of the context object
1272+
ur_context_handle_t hContext,
1273+
/// [in] pointer to USM memory object
1274+
void *pMem) {
1275+
UR_LOG_L(getContext()->logger, DEBUG, "==== urUSMFree");
1276+
1277+
return getTsanInterceptor()->releaseMemory(hContext, pMem);
1278+
}
1279+
11601280
///////////////////////////////////////////////////////////////////////////////
11611281
/// @brief Intercept function for urEnqueueKernelLaunch
11621282
ur_result_t urEnqueueKernelLaunch(
@@ -1285,6 +1405,13 @@ ur_result_t urGetProgramProcAddrTable(
12851405
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
12861406
}
12871407

1408+
pDdiTable->pfnCreateWithIL = ur_sanitizer_layer::tsan::urProgramCreateWithIL;
1409+
pDdiTable->pfnCreateWithBinary =
1410+
ur_sanitizer_layer::tsan::urProgramCreateWithBinary;
1411+
pDdiTable->pfnCreateWithNativeHandle =
1412+
ur_sanitizer_layer::tsan::urProgramCreateWithNativeHandle;
1413+
pDdiTable->pfnRetain = ur_sanitizer_layer::tsan::urProgramRetain;
1414+
pDdiTable->pfnRelease = ur_sanitizer_layer::tsan::urProgramRelease;
12881415
pDdiTable->pfnBuild = ur_sanitizer_layer::tsan::urProgramBuild;
12891416
pDdiTable->pfnLink = ur_sanitizer_layer::tsan::urProgramLink;
12901417

@@ -1380,6 +1507,7 @@ __urdlllocal ur_result_t UR_APICALL urGetUSMProcAddrTable(
13801507
pDdiTable->pfnDeviceAlloc = ur_sanitizer_layer::tsan::urUSMDeviceAlloc;
13811508
pDdiTable->pfnHostAlloc = ur_sanitizer_layer::tsan::urUSMHostAlloc;
13821509
pDdiTable->pfnSharedAlloc = ur_sanitizer_layer::tsan::urUSMSharedAlloc;
1510+
pDdiTable->pfnFree = ur_sanitizer_layer::tsan::urUSMFree;
13831511

13841512
return UR_RESULT_SUCCESS;
13851513
}

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp

Lines changed: 55 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -102,16 +102,9 @@ ur_result_t DeviceInfo::allocShadowMemory() {
102102
return UR_RESULT_SUCCESS;
103103
}
104104

105-
void ContextInfo::insertAllocInfo(ur_device_handle_t Device, TsanAllocInfo AI) {
106-
if (Device) {
107-
std::scoped_lock<ur_shared_mutex> Guard(AllocInfosMapMutex);
108-
AllocInfosMap[Device].emplace_back(std::move(AI));
109-
} else {
110-
for (auto Device : DeviceList) {
111-
std::scoped_lock<ur_shared_mutex> Guard(AllocInfosMapMutex);
112-
AllocInfosMap[Device].emplace_back(AI);
113-
}
114-
}
105+
void ContextInfo::insertAllocInfo(TsanAllocInfo AI) {
106+
std::scoped_lock<ur_shared_mutex> Guard(AllocInfosMutex);
107+
AllocInfos.insert(std::move(AI));
115108
}
116109

117110
TsanInterceptor::~TsanInterceptor() {
@@ -154,24 +147,47 @@ ur_result_t TsanInterceptor::allocateMemory(ur_context_handle_t Context,
154147

155148
auto AI = TsanAllocInfo{reinterpret_cast<uptr>(Allocated), Size};
156149
// For updating shadow memory
157-
CI->insertAllocInfo(Device, std::move(AI));
150+
CI->insertAllocInfo(std::move(AI));
158151

159152
*ResultPtr = Allocated;
160153
return UR_RESULT_SUCCESS;
161154
}
162155

156+
ur_result_t TsanInterceptor::releaseMemory(ur_context_handle_t Context,
157+
void *Ptr) {
158+
auto CI = getContextInfo(Context);
159+
auto Addr = reinterpret_cast<uptr>(Ptr);
160+
{
161+
std::scoped_lock<ur_shared_mutex> Guard(CI->AllocInfosMutex);
162+
auto It = std::find_if(CI->AllocInfos.begin(), CI->AllocInfos.end(),
163+
[&](auto &P) { return P.AllocBegin == Addr; });
164+
CI->AllocInfos.erase(It);
165+
}
166+
167+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, Ptr));
168+
return UR_RESULT_SUCCESS;
169+
}
170+
163171
ur_result_t TsanInterceptor::registerProgram(ur_program_handle_t Program) {
164172
UR_LOG_L(getContext()->logger, INFO, "registerDeviceGlobals");
165173
UR_CALL(registerDeviceGlobals(Program));
166174
return UR_RESULT_SUCCESS;
167175
}
168176

177+
ur_result_t TsanInterceptor::unregisterProgram(ur_program_handle_t Program) {
178+
UR_LOG_L(getContext()->logger, INFO, "unregisterDeviceGlobals");
179+
auto &ProgramInfo = getProgramInfo(Program);
180+
ProgramInfo.AllocInfoForGlobals.clear();
181+
return UR_RESULT_SUCCESS;
182+
}
183+
169184
ur_result_t
170185
TsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
171186
std::vector<ur_device_handle_t> Devices = GetDevices(Program);
172187
assert(Devices.size() != 0 && "No devices in registerDeviceGlobals");
173188
auto Context = GetContext(Program);
174189
auto ContextInfo = getContextInfo(Context);
190+
auto &ProgramInfo = getProgramInfo(Program);
175191

176192
for (auto Device : Devices) {
177193
ManagedQueue Queue(Context, Device);
@@ -202,7 +218,7 @@ TsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
202218
for (size_t i = 0; i < NumOfDeviceGlobal; i++) {
203219
const auto &GVInfo = GVInfos[i];
204220
auto AI = TsanAllocInfo{GVInfo.Addr, GVInfo.Size};
205-
ContextInfo->insertAllocInfo(Device, std::move(AI));
221+
ProgramInfo.AllocInfoForGlobals.emplace_back(std::move(AI));
206222
}
207223
}
208224

@@ -269,6 +285,22 @@ ur_result_t TsanInterceptor::insertDevice(ur_device_handle_t Device,
269285
return UR_RESULT_SUCCESS;
270286
}
271287

288+
ur_result_t TsanInterceptor::insertProgram(ur_program_handle_t Program) {
289+
std::scoped_lock<ur_shared_mutex> Guard(m_ProgramMapMutex);
290+
if (m_ProgramMap.find(Program) != m_ProgramMap.end()) {
291+
return UR_RESULT_SUCCESS;
292+
}
293+
m_ProgramMap.emplace(Program, Program);
294+
return UR_RESULT_SUCCESS;
295+
}
296+
297+
ur_result_t TsanInterceptor::eraseProgram(ur_program_handle_t Program) {
298+
std::scoped_lock<ur_shared_mutex> Guard(m_ProgramMapMutex);
299+
assert(m_ProgramMap.find(Program) != m_ProgramMap.end());
300+
m_ProgramMap.erase(Program);
301+
return UR_RESULT_SUCCESS;
302+
}
303+
272304
ur_result_t
273305
TsanInterceptor::insertMemBuffer(std::shared_ptr<MemBuffer> MemBuffer) {
274306
std::scoped_lock<ur_shared_mutex> Guard(m_MemBufferMapMutex);
@@ -309,7 +341,7 @@ ur_result_t TsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
309341

310342
UR_CALL(prepareLaunch(CI, DI, InternalQueue, Kernel, LaunchInfo));
311343

312-
UR_CALL(updateShadowMemory(CI, DI, InternalQueue));
344+
UR_CALL(updateShadowMemory(CI, DI, Kernel, InternalQueue));
313345

314346
return UR_RESULT_SUCCESS;
315347
}
@@ -434,16 +466,19 @@ ur_result_t TsanInterceptor::prepareLaunch(std::shared_ptr<ContextInfo> &,
434466
return UR_RESULT_SUCCESS;
435467
}
436468

437-
ur_result_t
438-
TsanInterceptor::updateShadowMemory(std::shared_ptr<ContextInfo> &CI,
439-
std::shared_ptr<DeviceInfo> &DI,
440-
ur_queue_handle_t Queue) {
441-
std::scoped_lock<ur_shared_mutex> Guard(CI->AllocInfosMapMutex);
442-
for (auto &AllocInfo : CI->AllocInfosMap[DI->Handle]) {
469+
ur_result_t TsanInterceptor::updateShadowMemory(
470+
std::shared_ptr<ContextInfo> &CI, std::shared_ptr<DeviceInfo> &DI,
471+
ur_kernel_handle_t Kernel, ur_queue_handle_t Queue) {
472+
auto &PI = getProgramInfo(GetProgram(Kernel));
473+
std::scoped_lock<ur_shared_mutex> Guard(CI->AllocInfosMutex);
474+
for (auto &AllocInfo : CI->AllocInfos) {
475+
UR_CALL(DI->Shadow->CleanShadow(Queue, AllocInfo.AllocBegin,
476+
AllocInfo.AllocSize));
477+
}
478+
for (auto &AllocInfo : PI.AllocInfoForGlobals) {
443479
UR_CALL(DI->Shadow->CleanShadow(Queue, AllocInfo.AllocBegin,
444480
AllocInfo.AllocSize));
445481
}
446-
CI->AllocInfosMap[DI->Handle].clear();
447482
return UR_RESULT_SUCCESS;
448483
}
449484

0 commit comments

Comments
 (0)