Skip to content

Commit c821dc9

Browse files
authored
[SYCL] Add support for multiple filtered outputs in sycl-post-link (#12727)
This PR adds the required changes to `sycl-post-link` support optional kernel features in AOT mode as described by the design doc in #12252. Additionally, it also updates the driver to invoke `sycl-post-link` with a device architecture when Intel GPU targets are passed in `-fsycl-targets`.
1 parent 1bd076b commit c821dc9

File tree

13 files changed

+554
-200
lines changed

13 files changed

+554
-200
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -10611,7 +10611,12 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
1061110611

1061210612
// Add output file table file option
1061310613
assert(Output.isFilename() && "output must be a filename");
10614-
addArgs(CmdArgs, TCArgs, {"-o", Output.getFilename()});
10614+
StringRef Device = JA.getOffloadingArch();
10615+
std::string OutputArg = Output.getFilename();
10616+
if (T.getSubArch() == llvm::Triple::SPIRSubArch_gen && Device.data())
10617+
OutputArg = ("intel_gpu_" + Device + "," + OutputArg).str();
10618+
10619+
addArgs(CmdArgs, TCArgs, {"-o", OutputArg});
1061510620

1061610621
const toolchains::SYCLToolChain &TC =
1061710622
static_cast<const toolchains::SYCLToolChain &>(getToolChain());

clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@
101101
/// -fsycl-targets=spir64_x86_64 should set a specific macro
102102
// RUN: %clangxx -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \
103103
// RUN: FileCheck %s --check-prefix=MACRO_X86_64
104-
// RUN: %clang_cl -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \
104+
// RUN: %clang_cl -c -fsycl -fsycl-targets=spir64_x86_64 -### -- %s 2>&1 | \
105105
// RUN: FileCheck %s --check-prefix=MACRO_X86_64
106106
// MACRO_X86_64: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown"
107107
// MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__"
@@ -111,7 +111,7 @@
111111
/// test for invalid intel arch
112112
// RUN: not %clangxx -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \
113113
// RUN: FileCheck %s --check-prefix=BAD_INPUT
114-
// RUN: not %clang_cl -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \
114+
// RUN: not %clang_cl -c -fsycl -fsycl-targets=intel_gpu_bad -### -- %s 2>&1 | \
115115
// RUN: FileCheck %s --check-prefix=BAD_INPUT
116116
// BAD_INPUT: error: SYCL target is invalid: 'intel_gpu_bad'
117117

@@ -233,3 +233,9 @@
233233
// CHECK_TOOLS_BEOPTS_MIX: opencl-aot{{.*}} "-DCPU"
234234
// CHECK_TOOLS_BEOPTS_MIX-NOT: "-DDG1"
235235
// CHECK_TOOLS_BEOPTS_MIX: ocloc{{.*}} "-device" "skl"{{.*}}"-DSKL2"
236+
237+
/// Check that target is passed to sycl-post-link for filtering
238+
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,intel_gpu_dg1 \
239+
// RUN: -### %s 2>&1 | FileCheck %s --check-prefix=CHECK_TOOLS_FILTER
240+
// CHECK_TOOLS_FILTER: sycl-post-link{{.*}} "-o" "intel_gpu_pvc,{{.*}}"
241+
// CHECK_TOOLS_FILTER: sycl-post-link{{.*}} "-o" "intel_gpu_dg1,{{.*}}"

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

+6
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,9 @@ def : TargetInfo<"x86_64", [], [], "", "", 1>;
143143
//defvar AspectList = [AspectCpu] # AllUSMAspects;
144144
//def : TargetInfo<"Test", AspectList, []>;
145145
//def : TargetInfo<"Test2", [AspectCpu] # AllUSMAspects, []>;
146+
147+
// TODO: The aspects listed for the intel_gpu targets right now are incomplete;
148+
// only the fp16/fp64/atomic64 aspects are listed.
149+
def : TargetInfo<"intel_gpu_cfl", [AspectFp16, AspectFp64, AspectAtomic64], [8, 16, 32]>;
150+
def : TargetInfo<"intel_gpu_tgllp", [AspectFp16, AspectAtomic64], [8, 16, 32]>;
151+
def : TargetInfo<"intel_gpu_pvc", [AspectFp16, AspectFp64, AspectAtomic64], [16, 32]>;

llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h

+9
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#ifndef LLVM_SYCLLOWERIR_MODULE_SPLITTER_H
1414
#define LLVM_SYCLLOWERIR_MODULE_SPLITTER_H
1515

16+
#include "SYCLDeviceRequirements.h"
17+
1618
#include "llvm/ADT/SetVector.h"
1719
#include "llvm/ADT/StringRef.h"
1820
#include "llvm/IR/Function.h"
@@ -108,6 +110,7 @@ class ModuleDesc {
108110
std::unique_ptr<Module> M;
109111
EntryPointGroup EntryPoints;
110112
bool IsTopLevel = false;
113+
mutable std::optional<SYCLDeviceRequirements> Reqs;
111114

112115
public:
113116
struct Properties {
@@ -193,6 +196,12 @@ class ModuleDesc {
193196

194197
ModuleDesc clone() const;
195198

199+
const SYCLDeviceRequirements &getOrComputeDeviceRequirements() const {
200+
if (!Reqs.has_value())
201+
Reqs = computeDeviceRequirements(*this);
202+
return *Reqs;
203+
}
204+
196205
#ifndef NDEBUG
197206
void verifyESIMDProperty() const;
198207
void dump() const;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//===----- SYCLDeviceRequirements.h - collect data for used aspects ------=-==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include "llvm/ADT/SmallString.h"
12+
#include "llvm/ADT/SmallVector.h"
13+
#include "llvm/ADT/StringRef.h"
14+
15+
#include <cstdint>
16+
#include <map>
17+
#include <optional>
18+
#include <set>
19+
#include <vector>
20+
21+
namespace llvm {
22+
23+
class StringRef;
24+
25+
namespace module_split {
26+
class ModuleDesc;
27+
}
28+
namespace util {
29+
class PropertyValue;
30+
}
31+
32+
struct SYCLDeviceRequirements {
33+
std::set<uint32_t> Aspects;
34+
std::set<uint32_t> FixedTarget;
35+
std::optional<llvm::SmallVector<uint64_t, 3>> ReqdWorkGroupSize;
36+
std::optional<llvm::SmallString<256>> JointMatrix;
37+
std::optional<llvm::SmallString<256>> JointMatrixMad;
38+
std::optional<uint32_t> SubGroupSize;
39+
40+
std::map<StringRef, util::PropertyValue> asMap() const;
41+
};
42+
43+
SYCLDeviceRequirements
44+
computeDeviceRequirements(const module_split::ModuleDesc &M);
45+
46+
} // namespace llvm

llvm/lib/SYCLLowerIR/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ add_llvm_component_library(LLVMSYCLLowerIR
6666
ModuleSplitter.cpp
6767
MutatePrintfAddrspace.cpp
6868
SYCLAddOptLevelAttribute.cpp
69+
SYCLDeviceRequirements.cpp
6970
SYCLPropagateAspectsUsage.cpp
7071
SYCLPropagateJointMatrixUsage.cpp
7172
SYCLUtils.cpp
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
//===----- SYCLDeviceRequirements.cpp - collect data for used aspects ----=-==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/SYCLLowerIR/SYCLDeviceRequirements.h"
10+
11+
#include "llvm/ADT/SmallString.h"
12+
#include "llvm/ADT/StringRef.h"
13+
#include "llvm/IR/Module.h"
14+
#include "llvm/SYCLLowerIR/ModuleSplitter.h"
15+
#include "llvm/Support/PropertySetIO.h"
16+
17+
#include <set>
18+
#include <vector>
19+
20+
using namespace llvm;
21+
22+
static int64_t ExtractSignedIntegerFromMDNodeOperand(const MDNode *N,
23+
unsigned OpNo) {
24+
Constant *C = cast<ConstantAsMetadata>(N->getOperand(OpNo).get())->getValue();
25+
return C->getUniqueInteger().getSExtValue();
26+
}
27+
static uint64_t ExtractUnsignedIntegerFromMDNodeOperand(const MDNode *N,
28+
unsigned OpNo) {
29+
Constant *C = cast<ConstantAsMetadata>(N->getOperand(OpNo).get())->getValue();
30+
return C->getUniqueInteger().getZExtValue();
31+
}
32+
static llvm::StringRef ExtractStringFromMDNodeOperand(const MDNode *N,
33+
unsigned OpNo) {
34+
MDString *S = cast<llvm::MDString>(N->getOperand(OpNo).get());
35+
return S->getString();
36+
}
37+
38+
SYCLDeviceRequirements
39+
llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) {
40+
SYCLDeviceRequirements Reqs;
41+
// Process all functions in the module
42+
for (const Function &F : MD.getModule()) {
43+
if (auto *MDN = F.getMetadata("sycl_used_aspects")) {
44+
for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) {
45+
auto Val = ExtractSignedIntegerFromMDNodeOperand(MDN, I);
46+
// Don't put internal aspects (with negative integer value) into the
47+
// requirements, they are used only for device image splitting.
48+
if (Val >= 0)
49+
Reqs.Aspects.insert(Val);
50+
}
51+
}
52+
53+
if (auto *MDN = F.getMetadata("sycl_fixed_targets")) {
54+
for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) {
55+
auto Val = ExtractUnsignedIntegerFromMDNodeOperand(MDN, I);
56+
Reqs.FixedTarget.insert(Val);
57+
}
58+
}
59+
60+
if (auto *MDN = F.getMetadata("reqd_work_group_size")) {
61+
llvm::SmallVector<uint64_t, 3> NewReqdWorkGroupSize;
62+
for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I)
63+
NewReqdWorkGroupSize.push_back(
64+
ExtractUnsignedIntegerFromMDNodeOperand(MDN, I));
65+
if (!Reqs.ReqdWorkGroupSize.has_value())
66+
Reqs.ReqdWorkGroupSize = NewReqdWorkGroupSize;
67+
}
68+
69+
if (auto *MDN = F.getMetadata("sycl_joint_matrix")) {
70+
auto Val = ExtractStringFromMDNodeOperand(MDN, 0);
71+
if (!Val.empty())
72+
Reqs.JointMatrix = Val;
73+
}
74+
75+
if (auto *MDN = F.getMetadata("sycl_joint_matrix_mad")) {
76+
auto Val = ExtractStringFromMDNodeOperand(MDN, 0);
77+
if (!Val.empty())
78+
Reqs.JointMatrixMad = Val;
79+
}
80+
}
81+
82+
// Process just the entry points in the module
83+
for (const Function *F : MD.entries()) {
84+
if (auto *MDN = F->getMetadata("intel_reqd_sub_group_size")) {
85+
// There should only be at most one function with
86+
// intel_reqd_sub_group_size metadata when considering the entry
87+
// points of a module, but not necessarily when considering all the
88+
// functions of a module: an entry point with a
89+
// intel_reqd_sub_group_size can call an ESIMD function through
90+
// invoke_esimd, and that function has intel_reqd_sub_group_size=1,
91+
// which is valid.
92+
assert(
93+
MDN->getNumOperands() == 1 &&
94+
"intel_reqd_sub_group_size metadata expects exactly one argument!");
95+
auto MDValue = ExtractUnsignedIntegerFromMDNodeOperand(MDN, 0);
96+
if (!Reqs.SubGroupSize)
97+
Reqs.SubGroupSize = MDValue;
98+
else
99+
assert(*Reqs.SubGroupSize == static_cast<uint32_t>(MDValue));
100+
}
101+
}
102+
return Reqs;
103+
}
104+
105+
std::map<StringRef, util::PropertyValue> SYCLDeviceRequirements::asMap() const {
106+
std::map<StringRef, util::PropertyValue> Requirements;
107+
108+
// For all properties except for "aspects", we'll only add the
109+
// value to the map if the corresponding value from
110+
// SYCLDeviceRequirements has a value/is non-empty.
111+
Requirements["aspects"] =
112+
std::vector<uint32_t>(Aspects.begin(), Aspects.end());
113+
114+
if (!FixedTarget.empty())
115+
Requirements["fixed_target"] =
116+
std::vector<uint32_t>(FixedTarget.begin(), FixedTarget.end());
117+
118+
// TODO: Before intel/llvm#10620, the reqd_work_group_size attribute
119+
// stores its values as uint32_t, but this needed to be expanded to
120+
// uint64_t. However, this change did not happen in ABI-breaking
121+
// window, so we attach the required work-group size as the
122+
// reqd_work_group_size_uint64_t attribute. At the next ABI-breaking
123+
// window, this can be changed back to reqd_work_group_size.
124+
if (ReqdWorkGroupSize.has_value())
125+
Requirements["reqd_work_group_size_uint64_t"] = *ReqdWorkGroupSize;
126+
127+
if (JointMatrix.has_value())
128+
Requirements["joint_matrix"] = *JointMatrix;
129+
130+
if (JointMatrixMad.has_value())
131+
Requirements["joint_matrix_mad"] = *JointMatrixMad;
132+
133+
if (SubGroupSize.has_value())
134+
Requirements["reqd_sub_group_size"] = *SubGroupSize;
135+
136+
return Requirements;
137+
}

llvm/test/tools/sycl-post-link/help.test

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ CHECK: sycl-post-link options:
4545
CHECK: --device-globals - Lower and generate information about device global variables
4646
CHECK: -f - Enable binary output on terminals
4747
CHECK: --ir-output-only - Output single IR file
48-
CHECK: -o <filename> - Output filename
48+
CHECK: -o <target filename pair> - Specifies an output file. Multiple output files can be specified. Additionally, a target may be specified alongside an output file, which has the effect that when module splitting is performed, the modules that are in that output table are filtered so those modules are compatible with the target.
4949
CHECK: --out-dir=<dirname> - Directory where files listed in the result file table will be output
5050
CHECK: --spec-const=<value> - lower and generate specialization constants information
5151
CHECK: =native - lower spec constants to native spirv instructions so that these values could be set at runtime

0 commit comments

Comments
 (0)