From 61ad878a0ca3082c481d1c48c9d583e9441d9faf Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 7 Jan 2026 20:08:13 +0100 Subject: [PATCH 1/5] [SYCL] Fix race in kernel_impl::getName() --- sycl/source/detail/kernel_impl.cpp | 8 ++- sycl/test/regression/queue_submit.cpp | 78 +++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 2 deletions(-) create mode 100755 sycl/test/regression/queue_submit.cpp diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 484291366ee3d..c2d982aa3d952 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -118,8 +118,12 @@ bool kernel_impl::hasSYCLMetadata() const noexcept { // TODO this is how kernel_impl::get_info should behave instead. std::string_view kernel_impl::getName() const { - if (MName.empty()) - MName = get_info(); + static std::once_flag NameInitFlag; + while (MName.empty()) { + std::call_once(NameInitFlag, + [&]() { MName = get_info(); }); + } + return MName; } diff --git a/sycl/test/regression/queue_submit.cpp b/sycl/test/regression/queue_submit.cpp new file mode 100755 index 0000000000000..8047e4351a917 --- /dev/null +++ b/sycl/test/regression/queue_submit.cpp @@ -0,0 +1,78 @@ +//==---------------- queue_submit.cpp - SYCL queue submit test +//--------------==// + +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %t.out + +#include +#include +#include + +#define DIMS 1024 + +class kernel_set_value; + +void submit(sycl::queue *queue, sycl::kernel *kernel) { + int data[DIMS]; + try { + sycl::buffer result_buf{data, sycl::range<1>{DIMS}}; + queue->submit([&](sycl::handler &cgh) { + auto result_acc = + result_buf.get_access(cgh); + cgh.set_arg(0, result_acc); + cgh.parallel_for(sycl::range<1>{DIMS}, *kernel); + }); + queue->wait_and_throw(); + } catch (sycl::exception &e) { + std::cerr << "Exception thrown: " << e.what() << "\n"; + return; + } + + for (int i = 0; i < DIMS; i++) { + if (data[i] != i) { + std::cerr << "data[" << i << "] != " << i << " (got " << data[i] << ")\n"; + } + } +} + +class single_queue_with_kernel { +public: + single_queue_with_kernel(size_t n) + : numThreads(n), queue(sycl::default_selector_v), + kernel(sycl::get_kernel_bundle( + queue.get_context()) + .get_kernel(sycl::get_kernel_id())) {} + + void run_threads() { + if (0) { + sycl::buffer result_buf{sycl::range<1>{DIMS}}; + queue.submit([&](sycl::handler &cgh) { + auto result_acc = + result_buf.get_access(cgh); + cgh.parallel_for( + sycl::range<1>{DIMS}, + [=](sycl::id<1> idx) { result_acc[idx] = idx[0]; }); + }); + } + + std::vector threads; + for (size_t i = 0; i < numThreads; ++i) + threads.emplace_back(new std::thread(&submit, &queue, &kernel)); + + for (auto &t : threads) { + t->join(); + delete t; + } + } + +private: + sycl::queue queue; + sycl::kernel kernel; + size_t numThreads; +}; + +int main() { + single_queue_with_kernel test_q(10); + test_q.run_threads(); + return 0; +} From d440c2c5650b0b7dac4b225066a1eb103fbf0ef3 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 7 Jan 2026 20:11:28 +0100 Subject: [PATCH 2/5] Fix test header --- sycl/test/regression/queue_submit.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/test/regression/queue_submit.cpp b/sycl/test/regression/queue_submit.cpp index 8047e4351a917..12f144a38f435 100755 --- a/sycl/test/regression/queue_submit.cpp +++ b/sycl/test/regression/queue_submit.cpp @@ -1,9 +1,11 @@ -//==---------------- queue_submit.cpp - SYCL queue submit test -//--------------==// +//==--- queue_submit.cpp - SYCL queue submit test --------------==// // RUN: %clangxx -fsycl %s -o %t.out // RUN: %t.out +// This test submits same kernel via multiple threads to the same queue. +// It's a regression test for CMPLRLLVM-72408 + #include #include #include From 1c83a49bb0c576a74255555ab7e6ffed98b5e429 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 8 Jan 2026 00:50:32 +0100 Subject: [PATCH 3/5] Move test to E2E --- sycl/test-e2e/Regression/queue_submit.cpp | 71 ++++++++++++++++++++ sycl/test/regression/queue_submit.cpp | 80 ----------------------- 2 files changed, 71 insertions(+), 80 deletions(-) create mode 100755 sycl/test-e2e/Regression/queue_submit.cpp delete mode 100755 sycl/test/regression/queue_submit.cpp diff --git a/sycl/test-e2e/Regression/queue_submit.cpp b/sycl/test-e2e/Regression/queue_submit.cpp new file mode 100755 index 0000000000000..6110002cc949a --- /dev/null +++ b/sycl/test-e2e/Regression/queue_submit.cpp @@ -0,0 +1,71 @@ +//==--- queue_submit.cpp - SYCL queue submit test --------------==// + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// This test submits same kernel via multiple threads to the same queue. +// It's a regression test for CMPLRLLVM-72408 + +#include +#include +#include + +#define DIMS 1024 + +class kernel_set_value; + +void submit(sycl::queue *queue, sycl::kernel *kernel) { + int data[DIMS]; + try { + sycl::buffer result_buf{data, sycl::range<1>{DIMS}}; + queue->submit([&](sycl::handler &cgh) { + auto result_acc = + result_buf.get_access(cgh); + cgh.set_arg(0, result_acc); + cgh.parallel_for(sycl::range<1>{DIMS}, *kernel); + }); + queue->wait_and_throw(); + } catch (sycl::exception &e) { + std::cerr << "Exception thrown: " << e.what() << "\n"; + return; + } + + for (int i = 0; i < DIMS; i++) { + if (data[i] != i) { + std::cerr << "data[" << i << "] != " << i << " (got " << data[i] << ")\n"; + } + } +} + +void run_test(size_t numThreads) { + sycl::queue queue(sycl::default_selector_v); + sycl::kernel kernel = sycl::get_kernel_bundle( + queue.get_context()) + .get_kernel(sycl::get_kernel_id()); + + // Warm up. + { + sycl::buffer result_buf{sycl::range<1>{DIMS}}; + queue.submit([&](sycl::handler &cgh) { + auto result_acc = + result_buf.get_access(cgh); + cgh.parallel_for( + sycl::range<1>{DIMS}, + [=](sycl::id<1> idx) { result_acc[idx] = idx[0]; }); + }).wait_and_throw(); + } + + // Spawn multiple threads submitting the same kernel to the same queue. + std::vector threads; + for (size_t i = 0; i < numThreads; ++i) + threads.push_back(std::thread(&submit, &queue, &kernel)); + + for (auto &t : threads) + t.join(); +} + + +int main() { + run_test(10); + return 0; +} diff --git a/sycl/test/regression/queue_submit.cpp b/sycl/test/regression/queue_submit.cpp deleted file mode 100755 index 12f144a38f435..0000000000000 --- a/sycl/test/regression/queue_submit.cpp +++ /dev/null @@ -1,80 +0,0 @@ -//==--- queue_submit.cpp - SYCL queue submit test --------------==// - -// RUN: %clangxx -fsycl %s -o %t.out -// RUN: %t.out - -// This test submits same kernel via multiple threads to the same queue. -// It's a regression test for CMPLRLLVM-72408 - -#include -#include -#include - -#define DIMS 1024 - -class kernel_set_value; - -void submit(sycl::queue *queue, sycl::kernel *kernel) { - int data[DIMS]; - try { - sycl::buffer result_buf{data, sycl::range<1>{DIMS}}; - queue->submit([&](sycl::handler &cgh) { - auto result_acc = - result_buf.get_access(cgh); - cgh.set_arg(0, result_acc); - cgh.parallel_for(sycl::range<1>{DIMS}, *kernel); - }); - queue->wait_and_throw(); - } catch (sycl::exception &e) { - std::cerr << "Exception thrown: " << e.what() << "\n"; - return; - } - - for (int i = 0; i < DIMS; i++) { - if (data[i] != i) { - std::cerr << "data[" << i << "] != " << i << " (got " << data[i] << ")\n"; - } - } -} - -class single_queue_with_kernel { -public: - single_queue_with_kernel(size_t n) - : numThreads(n), queue(sycl::default_selector_v), - kernel(sycl::get_kernel_bundle( - queue.get_context()) - .get_kernel(sycl::get_kernel_id())) {} - - void run_threads() { - if (0) { - sycl::buffer result_buf{sycl::range<1>{DIMS}}; - queue.submit([&](sycl::handler &cgh) { - auto result_acc = - result_buf.get_access(cgh); - cgh.parallel_for( - sycl::range<1>{DIMS}, - [=](sycl::id<1> idx) { result_acc[idx] = idx[0]; }); - }); - } - - std::vector threads; - for (size_t i = 0; i < numThreads; ++i) - threads.emplace_back(new std::thread(&submit, &queue, &kernel)); - - for (auto &t : threads) { - t->join(); - delete t; - } - } - -private: - sycl::queue queue; - sycl::kernel kernel; - size_t numThreads; -}; - -int main() { - single_queue_with_kernel test_q(10); - test_q.run_threads(); - return 0; -} From 8ed5a13a96af28b03031e859567de95548cf74f6 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 8 Jan 2026 01:23:01 +0100 Subject: [PATCH 4/5] Fix tests and hang --- sycl/source/detail/kernel_impl.cpp | 7 ++---- sycl/source/detail/kernel_impl.hpp | 1 + sycl/test-e2e/Regression/queue_submit.cpp | 27 +++++++++++++---------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index c2d982aa3d952..41dfaf8a8ed03 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -118,11 +118,8 @@ bool kernel_impl::hasSYCLMetadata() const noexcept { // TODO this is how kernel_impl::get_info should behave instead. std::string_view kernel_impl::getName() const { - static std::once_flag NameInitFlag; - while (MName.empty()) { - std::call_once(NameInitFlag, - [&]() { MName = get_info(); }); - } + std::call_once(MNameInitFlag, + [&]() { MName = get_info(); }); return MName; } diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index e69946504f9b6..3fd2b201c6e89 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -255,6 +255,7 @@ class kernel_impl { const KernelArgMask *MKernelArgMaskPtr; std::mutex *MCacheMutex = nullptr; mutable std::string MName; + mutable std::once_flag MNameInitFlag; // Used for images that aren't obtained with standard SYCL offline // compilation. diff --git a/sycl/test-e2e/Regression/queue_submit.cpp b/sycl/test-e2e/Regression/queue_submit.cpp index 6110002cc949a..d4c1383867d0b 100755 --- a/sycl/test-e2e/Regression/queue_submit.cpp +++ b/sycl/test-e2e/Regression/queue_submit.cpp @@ -7,7 +7,8 @@ // It's a regression test for CMPLRLLVM-72408 #include -#include +#include +#include #include #define DIMS 1024 @@ -39,20 +40,23 @@ void submit(sycl::queue *queue, sycl::kernel *kernel) { void run_test(size_t numThreads) { sycl::queue queue(sycl::default_selector_v); - sycl::kernel kernel = sycl::get_kernel_bundle( - queue.get_context()) - .get_kernel(sycl::get_kernel_id()); + sycl::kernel kernel = + sycl::get_kernel_bundle( + queue.get_context()) + .get_kernel(sycl::get_kernel_id()); // Warm up. { sycl::buffer result_buf{sycl::range<1>{DIMS}}; - queue.submit([&](sycl::handler &cgh) { - auto result_acc = - result_buf.get_access(cgh); - cgh.parallel_for( - sycl::range<1>{DIMS}, - [=](sycl::id<1> idx) { result_acc[idx] = idx[0]; }); - }).wait_and_throw(); + queue + .submit([&](sycl::handler &cgh) { + auto result_acc = + result_buf.get_access(cgh); + cgh.parallel_for( + sycl::range<1>{DIMS}, + [=](sycl::id<1> idx) { result_acc[idx] = idx[0]; }); + }) + .wait_and_throw(); } // Spawn multiple threads submitting the same kernel to the same queue. @@ -64,7 +68,6 @@ void run_test(size_t numThreads) { t.join(); } - int main() { run_test(10); return 0; From bd43daa9542704606084c240f0dd2c2ba3e26d26 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 8 Jan 2026 18:00:37 +0100 Subject: [PATCH 5/5] ADdress feedback --- sycl/test-e2e/Regression/queue_submit.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/sycl/test-e2e/Regression/queue_submit.cpp b/sycl/test-e2e/Regression/queue_submit.cpp index d4c1383867d0b..b756e980fe303 100755 --- a/sycl/test-e2e/Regression/queue_submit.cpp +++ b/sycl/test-e2e/Regression/queue_submit.cpp @@ -15,27 +15,24 @@ class kernel_set_value; -void submit(sycl::queue *queue, sycl::kernel *kernel) { +void submit(sycl::queue &queue, sycl::kernel &kernel) { int data[DIMS]; try { sycl::buffer result_buf{data, sycl::range<1>{DIMS}}; - queue->submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler &cgh) { auto result_acc = result_buf.get_access(cgh); cgh.set_arg(0, result_acc); - cgh.parallel_for(sycl::range<1>{DIMS}, *kernel); + cgh.parallel_for(sycl::range<1>{DIMS}, kernel); }); - queue->wait_and_throw(); + queue.wait_and_throw(); } catch (sycl::exception &e) { std::cerr << "Exception thrown: " << e.what() << "\n"; return; } - for (int i = 0; i < DIMS; i++) { - if (data[i] != i) { - std::cerr << "data[" << i << "] != " << i << " (got " << data[i] << ")\n"; - } - } + for (int i = 0; i < DIMS; i++) + assert(data[i] == i); } void run_test(size_t numThreads) { @@ -62,7 +59,7 @@ void run_test(size_t numThreads) { // Spawn multiple threads submitting the same kernel to the same queue. std::vector threads; for (size_t i = 0; i < numThreads; ++i) - threads.push_back(std::thread(&submit, &queue, &kernel)); + threads.push_back(std::thread(&submit, std::ref(queue), std::ref(kernel))); for (auto &t : threads) t.join();