diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index fdabffdcf4f50..aeccce21acc19 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10634,6 +10634,8 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, std::string OutputArg = Output.getFilename(); if (T.getSubArch() == llvm::Triple::SPIRSubArch_gen && Device.data()) OutputArg = ("intel_gpu_" + Device + "," + OutputArg).str(); + else if (T.getSubArch() == llvm::Triple::SPIRSubArch_x86_64) + OutputArg = "spir64_x86_64," + OutputArg; addArgs(CmdArgs, TCArgs, {"-o", OutputArg}); diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 2befb540e3daa..c3a24b357f619 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -139,7 +139,7 @@ def : TargetInfo<"__TestDeprecatedAspectList", def : TargetInfo<"spir64", [], [], "", "", 1>; def : TargetInfo<"spir64_gen", [], [], "", "", 1>; -def : TargetInfo<"spir64_x86_64", [], [], "", "", 1>; +def : TargetInfo<"spir64_x86_64", [AspectFp64, AspectAtomic64], [4, 8, 16, 32, 64], "", "", 1>; def : TargetInfo<"spir64_fpga", [], [], "", "", 1>; def : TargetInfo<"x86_64", [], [], "", "", 1>; // Examples of how to use a combination of explicitly specified values + predefined lists diff --git a/sycl/test-e2e/OptionalKernelFeatures/sg-size-aot.cpp b/sycl/test-e2e/OptionalKernelFeatures/sg-size-aot.cpp new file mode 100644 index 0000000000000..9c0fc7e2f79fa --- /dev/null +++ b/sycl/test-e2e/OptionalKernelFeatures/sg-size-aot.cpp @@ -0,0 +1,71 @@ +// REQUIRES: cpu +// RUN: %clangxx %s -fsycl -fsycl-targets=spir64_x86_64 -o %t.out +// RUN: %{run} %t.out +#include +#include + +#include + +using namespace sycl; + +template +class kernel_name; + +template +struct SubgroupDispatcher { + std::vector> fails; + SubgroupDispatcher(queue &q) : q(q) {} + + void operator()(const std::vector &v) { + for (auto i : v) + (*this)(i); + } + + void operator()(size_t n) { + (dispatch(n), ...); + } + +private: + queue &q; + + template + void dispatch(size_t n) { + if (n == size) { + buffer buf(1); + q.submit([&](handler& cgh) { + accessor acc { buf, cgh }; + cgh.parallel_for>(nd_range<1>(1, 1), [=](auto item) [[intel::reqd_sub_group_size(size)]] { + acc[0] = item.get_sub_group().get_max_local_range()[0]; + }); + }); + host_accessor ha { buf }; + if (ha[0] != size) + fails.push_back({ha[0], size}); + } + } +}; + +int main() { + queue q; + auto ctx = q.get_context(); + auto dev = q.get_device(); + auto sizes = dev.get_info(); + std::cout << " sub-group sizes supported by the device: " << sizes[0]; + for (int i = 1; i < sizes.size(); ++i) { + std::cout << ", " << sizes[i]; + } + std::cout << '\n'; + + using dispatcher_t = SubgroupDispatcher<4, 8, 16, 32, 64, 128>; + dispatcher_t dispatcher(q); + dispatcher(sizes); + if (dispatcher.fails.size() > 0) { + for (auto [actual, expected] : dispatcher.fails) { + std::cout + << "actual: " << actual << "\n" + << "expected: " << expected << "\n"; + } + } else { + std::cout << "pass\n"; + } +} \ No newline at end of file