Skip to content

Commit c07bdb9

Browse files
committed
Merge remote-tracking branch 'origin/sycl' into host_prof
2 parents 7b72320 + 51aeea6 commit c07bdb9

File tree

75 files changed

+1044
-416
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+1044
-416
lines changed

.github/workflows/sycl-linux-precommit.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ jobs:
111111
- name: E2E tests on Intel Ponte Vecchio GPU
112112
runner: '["Linux", "pvc"]'
113113
image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
114-
target_devices: level_zero:gpu;opencl:gpu
114+
target_devices: level_zero:gpu;opencl:gpu;level_zero_v2:gpu
115115
- name: Dev IGC on Intel Ponte Vecchio GPU
116116
runner: '["Linux", "pvc"]'
117117
image: ghcr.io/intel/llvm/ubuntu2404_intel_drivers:devigc
@@ -163,6 +163,7 @@ jobs:
163163
'false' }}
164164
# Run only if the PR does not have the 'ci-no-devigc' label.
165165
skip_run: ${{matrix.use_igc_dev && contains(github.event.pull_request.labels.*.name, 'ci-no-devigc') || 'false'}}
166+
env: ${{ contains(needs.detect_changes.outputs.filters, 'esimd') && '{}' || '{"LIT_FILTER_OUT":"ESIMD/"}' }}
166167

167168
test-perf:
168169
needs: [build, detect_changes]

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,11 @@ void CudaToolChain::addClangTargetOptions(
968968
"--nvptx-prec-sqrtf32=0"});
969969

970970
CC1Args.append({"-mllvm", "-enable-memcpyopt-without-libcalls"});
971+
972+
if (DriverArgs.hasFlag(options::OPT_fsycl_id_queries_fit_in_int,
973+
options::OPT_fno_sycl_id_queries_fit_in_int, false))
974+
CC1Args.append(
975+
{"-mllvm", "-nvvm-reflect-add=__CUDA_ID_QUERIES_FIT_IN_INT=1"});
971976
} else {
972977
CC1Args.append({"-fcuda-is-device", "-mllvm",
973978
"-enable-memcpyopt-without-libcalls",

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 53 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,8 @@ bool SemaSYCL::isFreeFunction(const FunctionDecl *FD) {
11971197
NameValuePair.first == "sycl-single-task-kernel";
11981198
});
11991199
IsFreeFunctionAttr = it != NameValuePairs.end();
1200+
if (IsFreeFunctionAttr)
1201+
break;
12001202
}
12011203
if (Redecl->isFirstDecl()) {
12021204
if (IsFreeFunctionAttr)
@@ -6615,9 +6617,18 @@ class FreeFunctionPrinter {
66156617
// function
66166618
NSInserted = true;
66176619
}
6620+
if (FD->isFunctionTemplateSpecialization() &&
6621+
FD->isThisDeclarationADefinition())
6622+
O << "template <> ";
66186623
O << TemplateParameters;
66196624
O << FD->getReturnType().getAsString() << " ";
6620-
O << FD->getNameAsString() << "(" << Args << ");";
6625+
FD->printName(O, Policy);
6626+
if (FD->isFunctionTemplateSpecialization() &&
6627+
FD->isThisDeclarationADefinition())
6628+
O << getTemplateSpecializationArgString(
6629+
FD->getTemplateSpecializationArgs());
6630+
6631+
O << "(" << Args << ");";
66216632
if (NSInserted) {
66226633
O << "\n";
66236634
PrintNSClosingBraces(O, FD);
@@ -6639,35 +6650,49 @@ class FreeFunctionPrinter {
66396650
if (NSInserted)
66406651
PrintNamespaces(O, FD, /*isPrintNamesOnly=*/true);
66416652
O << FD->getIdentifier()->getName().data();
6642-
if (FD->getPrimaryTemplate()) {
6643-
std::string Buffer;
6644-
llvm::raw_string_ostream StringStream(Buffer);
6645-
const TemplateArgumentList *TAL = FD->getTemplateSpecializationArgs();
6646-
ArrayRef<TemplateArgument> A = TAL->asArray();
6647-
bool FirstParam = true;
6648-
for (const auto &X : A) {
6649-
if (FirstParam)
6650-
FirstParam = false;
6651-
else if (X.getKind() == TemplateArgument::Pack) {
6652-
for (const auto &PackArg : X.pack_elements()) {
6653-
StringStream << ", ";
6654-
PackArg.print(Policy, StringStream, true);
6655-
}
6656-
continue;
6657-
} else {
6653+
if (FD->getPrimaryTemplate())
6654+
O << getTemplateSpecializationArgString(
6655+
FD->getTemplateSpecializationArgs());
6656+
}
6657+
6658+
private:
6659+
/// Helper method to get string with template types
6660+
/// \param TAL The template argument list.
6661+
/// \returns string Example:
6662+
/// \code
6663+
/// template <typename T1, typename T2>
6664+
/// void foo(T1 a, T2 b);
6665+
/// \endcode
6666+
/// returns string "<T1, T2>"
6667+
/// If TAL is nullptr, returns empty string.
6668+
std::string
6669+
getTemplateSpecializationArgString(const TemplateArgumentList *TAL) {
6670+
if (!TAL)
6671+
return "";
6672+
std::string Buffer;
6673+
llvm::raw_string_ostream StringStream(Buffer);
6674+
ArrayRef<TemplateArgument> A = TAL->asArray();
6675+
bool FirstParam = true;
6676+
for (const auto &X : A) {
6677+
if (FirstParam)
6678+
FirstParam = false;
6679+
else if (X.getKind() == TemplateArgument::Pack) {
6680+
for (const auto &PackArg : X.pack_elements()) {
66586681
StringStream << ", ";
6682+
PackArg.print(Policy, StringStream, /*IncludeType*/ true);
66596683
}
6684+
continue;
6685+
} else
6686+
StringStream << ", ";
66606687

6661-
X.print(Policy, StringStream, true);
6662-
}
6663-
StringStream.flush();
6664-
if (Buffer.front() != '<')
6665-
Buffer = "<" + Buffer + ">";
6666-
O << Buffer;
6688+
X.print(Policy, StringStream, /*IncludeType*/ true);
66676689
}
6690+
StringStream.flush();
6691+
if (Buffer.front() != '<')
6692+
Buffer = "<" + Buffer + ">";
6693+
return Buffer;
66686694
}
66696695

6670-
private:
66716696
/// Helper method to get arguments of templated function as a string
66726697
/// \param Parameters Array of parameters of the function.
66736698
/// \param Policy Printing policy.
@@ -7081,6 +7106,10 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
70817106
FreeFunctionPrinter FFPrinter(O, Policy);
70827107
if (FTD) {
70837108
FFPrinter.printFreeFunctionDeclaration(FTD, S);
7109+
if (const auto kind = K.SyclKernel->getTemplateSpecializationKind();
7110+
K.SyclKernel->isFunctionTemplateSpecialization() &&
7111+
kind == TSK_ExplicitSpecialization)
7112+
FFPrinter.printFreeFunctionDeclaration(K.SyclKernel, ParmListWithNames);
70847113
} else {
70857114
FFPrinter.printFreeFunctionDeclaration(K.SyclKernel, ParmListWithNames);
70867115
}

clang/test/CodeGenSYCL/free_function_int_header.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,16 @@ __attribute__((sycl_device))
254254
void ff_20(sycl::accessor<int, 1, sycl::access::mode::read_write> acc) {
255255
}
256256

257+
[[__sycl_detail__::add_ir_attributes_function("work_group_size", 16)]]
258+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 0)]]
259+
void ff_21(AliasType start, AliasType *ptr) {
260+
}
261+
262+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 0)]]
263+
[[__sycl_detail__::add_ir_attributes_function("work_group_size", 16)]]
264+
void ff_22(AliasType start, AliasType *ptr) {
265+
}
266+
257267
// CHECK: const char* const kernel_names[] = {
258268
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piii
259269
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piiii
@@ -286,6 +296,8 @@ void ff_20(sycl::accessor<int, 1, sycl::access::mode::read_write> acc) {
286296
// CHECK-NEXT: {{.*}}__sycl_kernel_free_functions5tests5ff_18ENS_3AggEPS1_
287297
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_19N14free_functions16KArgWithPtrArrayILi50EEE
288298
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_20N4sycl3_V18accessorIiLi1ELNS0_6access4modeE1026ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEEE
299+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_217DerivedPS_
300+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_227DerivedPS_
289301

290302
// CHECK-NEXT: ""
291303
// CHECK-NEXT: };
@@ -479,6 +491,7 @@ void ff_20(sycl::accessor<int, 1, sycl::access::mode::read_write> acc) {
479491
// CHECK: Definition of _Z18__sycl_kernel_ff_3IdEvPT_S0_S0_ as a free function kernel
480492
// CHECK: Forward declarations of kernel and its argument types:
481493
// CHECK: template <typename T> void ff_3(T * ptr, T start, T end);
494+
// CHECK: template <> void ff_3<double>(double * ptr, double start, double end);
482495
// CHECK-NEXT: static constexpr auto __sycl_shim5() {
483496
// CHECK-NEXT: return (void (*)(double *, double, double))ff_3<double>;
484497
// CHECK-NEXT: }
@@ -980,6 +993,37 @@ void ff_20(sycl::accessor<int, 1, sycl::access::mode::read_write> acc) {
980993
// CHECK-NEXT: };
981994
// CHECK-NEXT: }
982995

996+
997+
// CHECK: void ff_21(Derived start, Derived * ptr);
998+
// CHECK-NEXT: static constexpr auto __sycl_shim30() {
999+
// CHECK-NEXT: return (void (*)(struct Derived, struct Derived *))ff_21;
1000+
// CHECK-NEXT: }
1001+
// CHECK-NEXT: namespace sycl {
1002+
// CHECK-NEXT: template <>
1003+
// CHECK-NEXT: struct ext::oneapi::experimental::is_kernel<__sycl_shim30()> {
1004+
// CHECK-NEXT: static constexpr bool value = true;
1005+
// CHECK-NEXT: };
1006+
// CHECK-NEXT: template <>
1007+
// CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim30()> {
1008+
// CHECK-NEXT: static constexpr bool value = true;
1009+
// CHECK-NEXT: };
1010+
// CHECK-NEXT: }
1011+
1012+
// CHECK: void ff_22(Derived start, Derived * ptr);
1013+
// CHECK-NEXT: static constexpr auto __sycl_shim31() {
1014+
// CHECK-NEXT: return (void (*)(struct Derived, struct Derived *))ff_22;
1015+
// CHECK-NEXT: }
1016+
// CHECK-NEXT: namespace sycl {
1017+
// CHECK-NEXT: template <>
1018+
// CHECK-NEXT: struct ext::oneapi::experimental::is_kernel<__sycl_shim31()> {
1019+
// CHECK-NEXT: static constexpr bool value = true;
1020+
// CHECK-NEXT: };
1021+
// CHECK-NEXT: template <>
1022+
// CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim31()> {
1023+
// CHECK-NEXT: static constexpr bool value = true;
1024+
// CHECK-NEXT: };
1025+
// CHECK-NEXT: }
1026+
9831027
// CHECK: #include <sycl/kernel_bundle.hpp>
9841028

9851029
// CHECK: Definition of kernel_id of _Z18__sycl_kernel_ff_2Piii
@@ -1196,3 +1240,17 @@ void ff_20(sycl::accessor<int, 1, sycl::access::mode::read_write> acc) {
11961240
// CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_20N4sycl3_V18accessorIiLi1ELNS0_6access4modeE1026ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEEE"});
11971241
// CHECK-NEXT: }
11981242
// CHECK-NEXT: }
1243+
1244+
// CHECK: namespace sycl {
1245+
// CHECK-NEXT: template <>
1246+
// CHECK-NEXT: kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim30()>() {
1247+
// CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_217DerivedPS_"});
1248+
// CHECK-NEXT: }
1249+
// CHECK-NEXT: }
1250+
1251+
// CHECK: namespace sycl {
1252+
// CHECK-NEXT: template <>
1253+
// CHECK-NEXT: kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim31()>() {
1254+
// CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_227DerivedPS_"});
1255+
// CHECK-NEXT: }
1256+
// CHECK-NEXT: }
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// REQUIRES: nvptx-registered-target
2+
3+
// RUN: %clang -### -nocudalib \
4+
// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
5+
// RUN: | FileCheck --check-prefix=CHECK-DEFAULT %s
6+
7+
// RUN: %clang -### -nocudalib \
8+
// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda -fno-sycl-id-queries-fit-in-int %s 2>&1 \
9+
// RUN: | FileCheck --check-prefix=CHECK-DEFAULT %s
10+
11+
// RUN: %clang -### -nocudalib \
12+
// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda -fsycl-id-queries-fit-in-int %s 2>&1 \
13+
// RUN: | FileCheck --check-prefix=CHECK-INT %s
14+
15+
// CHECK-INT: "-mllvm" "-nvvm-reflect-add=__CUDA_ID_QUERIES_FIT_IN_INT=1"
16+
// CHECK-DEFAULT-NOT: "-nvvm-reflect-add=__CUDA_ID_QUERIES_FIT_IN_INT=1"

libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,31 @@
88

99
#include <libspirv/spirv.h>
1010

11+
extern int __nvvm_reflect_ocl(constant char *);
12+
1113
_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_x() {
14+
if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) {
15+
return (uint)__spirv_WorkgroupId_x() * (uint)__spirv_WorkgroupSize_x() +
16+
(uint)__spirv_LocalInvocationId_x() + (uint)__spirv_GlobalOffset_x();
17+
}
1218
return __spirv_WorkgroupId_x() * __spirv_WorkgroupSize_x() +
1319
__spirv_LocalInvocationId_x() + __spirv_GlobalOffset_x();
1420
}
1521

1622
_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_y() {
23+
if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) {
24+
return (uint)__spirv_WorkgroupId_y() * (uint)__spirv_WorkgroupSize_y() +
25+
(uint)__spirv_LocalInvocationId_y() + (uint)__spirv_GlobalOffset_y();
26+
}
1727
return __spirv_WorkgroupId_y() * __spirv_WorkgroupSize_y() +
1828
__spirv_LocalInvocationId_y() + __spirv_GlobalOffset_y();
1929
}
2030

2131
_CLC_DEF _CLC_OVERLOAD size_t __spirv_GlobalInvocationId_z() {
32+
if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) {
33+
return (uint)__spirv_WorkgroupId_z() * (uint)__spirv_WorkgroupSize_z() +
34+
(uint)__spirv_LocalInvocationId_z() + (uint)__spirv_GlobalOffset_z();
35+
}
2236
return __spirv_WorkgroupId_z() * __spirv_WorkgroupSize_z() +
2337
__spirv_LocalInvocationId_z() + __spirv_GlobalOffset_z();
2438
}

libdevice/crt_wrapper.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,8 @@
1212

1313
#include <cstdint>
1414

15-
#ifndef __NVPTX__
1615
#define RAND_NEXT_LEN 1024
1716
DeviceGlobal<uint64_t[RAND_NEXT_LEN]> RandNext;
18-
#endif
1917

2018
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
2119
defined(__AMDGCN__)
@@ -34,8 +32,6 @@ int memcmp(const void *s1, const void *s2, size_t n) {
3432
return __devicelib_memcmp(s1, s2, n);
3533
}
3634

37-
#ifndef __NVPTX__
38-
3935
// This simple rand is for ease of use only, the implementation aligns with
4036
// LLVM libc rand which is based on xorshift64star pseudo random number
4137
// generator. If work item number <= 1024, each work item has its own internal
@@ -107,8 +103,6 @@ void srand(unsigned int seed) {
107103
RAND_NEXT_ACC[gid1] = seed;
108104
}
109105

110-
#endif
111-
112106
#if defined(_WIN32)
113107
// Truncates a wide (16 or 32 bit) string (wstr) into an ASCII string (str).
114108
// Any non-ASCII characters are replaced by question mark '?'.

llvm/lib/SYCLLowerIR/LowerWGScope.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,9 @@ template <typename T> static unsigned asUInt(T val) {
144144

145145
static IntegerType *getSizeTTy(Module &M) {
146146
LLVMContext &Ctx = M.getContext();
147-
auto PtrSize = M.getDataLayout().getPointerTypeSize(PointerType::getUnqual(Ctx));
147+
const DataLayout &DL = M.getDataLayout();
148+
auto PtrSize = DL.getPointerTypeSize(
149+
PointerType::get(Ctx, DL.getDefaultGlobalsAddressSpace()));
148150
return PtrSize == 8 ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx);
149151
}
150152

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; RUN: opt < %s -passes=LowerWGScope -S | FileCheck %s
2+
3+
; This test checks that pointer size in default global address space is used for
4+
; size_t type, which is value type of GV __spirv_BuiltInLocalInvocationIndex.
5+
; Note that pointer size in the default address space is 4.
6+
7+
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-i64:64-v16:16-v32:32-n16:32:64-G1"
8+
9+
%struct.baz = type { i64 }
10+
11+
; CHECK: @__spirv_BuiltInLocalInvocationIndex = external addrspace(1) constant i64, align 8
12+
13+
define internal void @wibble(ptr byval(%struct.baz) %arg1) !work_group_scope !0 {
14+
; CHECK: load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8
15+
; CHECK: call void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 272)
16+
ret void
17+
}
18+
19+
; CHECK: ; Function Attrs: convergent
20+
; CHECK: declare void @_Z22__spirv_ControlBarrieriii(i32, i32, i32) #[[ATTR_NUM:[0-9]+]]
21+
22+
; CHECK: attributes #[[ATTR_NUM]] = { convergent }
23+
24+
!0 = !{}

mlir/utils/vscode/package-lock.json

Lines changed: 7 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sycl/doc/design/SYCLBINDesign.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ clang-offload-packager invocation to clang-linker-wrapper together with the new
225225
`--syclbin` flag.
226226

227227
Setting this option will override `-fsycl`. Passing`-fsycl-device-only` with
228-
`-fsyclbin` will cause `-fsycl-device-only` to be considered unused.
228+
`-fsyclbin` will cause `-fsyclbin` to be considered unused.
229229

230230
The behavior is dependent on using the clang-linker-wrapper. As the current
231231
default offload compilation behavior is using the old offload model (driver

0 commit comments

Comments
 (0)