Skip to content

Commit dc6f622

Browse files
committed
Reland "Revert "[HIP][Clang][Driver] Move BC preference logic into ROCm detection (llvm#149294)""
This reverts commit 991e385.
1 parent 702bd09 commit dc6f622

File tree

11 files changed

+147
-69
lines changed

11 files changed

+147
-69
lines changed

clang/include/clang/Driver/ToolChain.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,8 @@ class ToolChain {
823823

824824
/// Get paths for device libraries.
825825
virtual llvm::SmallVector<BitCodeLibraryInfo, 12>
826-
getDeviceLibs(const llvm::opt::ArgList &Args) const;
826+
getDeviceLibs(const llvm::opt::ArgList &Args,
827+
const Action::OffloadKind DeviceOffloadingKind) const;
827828

828829
/// Add the system specific linker arguments to use
829830
/// for the given HIP runtime library type.

clang/lib/Driver/ToolChain.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1638,7 +1638,8 @@ void ToolChain::addSYCLIncludeArgs(const ArgList &DriverArgs,
16381638
ArgStringList &CC1Args) const {}
16391639

16401640
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
1641-
ToolChain::getDeviceLibs(const ArgList &DriverArgs) const {
1641+
ToolChain::getDeviceLibs(const ArgList &DriverArgs,
1642+
const Action::OffloadKind DeviceOffloadingKind) const {
16421643
return {};
16431644
}
16441645

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 95 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,68 @@ using namespace clang::driver::toolchains;
3131
using namespace clang;
3232
using namespace llvm::opt;
3333

34+
RocmInstallationDetector::CommonBitcodeLibsPreferences::
35+
CommonBitcodeLibsPreferences(const Driver &D,
36+
const llvm::opt::ArgList &DriverArgs,
37+
StringRef GPUArch,
38+
const Action::OffloadKind DeviceOffloadingKind,
39+
const bool NeedsASanRT)
40+
: ABIVer(DeviceLibABIVersion::fromCodeObjectVersion(
41+
tools::getAMDGPUCodeObjectVersion(D, DriverArgs))) {
42+
const auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
43+
const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
44+
45+
IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP;
46+
47+
const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
48+
Wave64 =
49+
!HasWave32 || DriverArgs.hasFlag(options::OPT_mwavefrontsize64,
50+
options::OPT_mno_wavefrontsize64, false);
51+
52+
const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP ||
53+
DeviceOffloadingKind == Action::OFK_HIP;
54+
55+
// Default to enabling f32 denormals on subtargets where fma is fast with
56+
// denormals
57+
const bool DefaultDAZ =
58+
(Kind == llvm::AMDGPU::GK_NONE)
59+
? false
60+
: !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
61+
(ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
62+
// TODO: There are way too many flags that change this. Do we need to
63+
// check them all?
64+
DAZ = IsKnownOffloading
65+
? DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
66+
options::OPT_fno_gpu_flush_denormals_to_zero,
67+
DefaultDAZ)
68+
: DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || DefaultDAZ;
69+
70+
FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only) ||
71+
DriverArgs.hasFlag(options::OPT_ffinite_math_only,
72+
options::OPT_fno_finite_math_only, false);
73+
74+
UnsafeMathOpt =
75+
DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations) ||
76+
DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
77+
options::OPT_fno_unsafe_math_optimizations, false);
78+
79+
FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math) ||
80+
DriverArgs.hasFlag(options::OPT_ffast_math,
81+
options::OPT_fno_fast_math, false);
82+
83+
const bool DefaultSqrt = IsKnownOffloading ? true : false;
84+
CorrectSqrt =
85+
DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
86+
DriverArgs.hasFlag(
87+
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
88+
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt);
89+
// GPU Sanitizer currently only supports ASan and is enabled through host
90+
// ASan.
91+
GPUSan = (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
92+
options::OPT_fno_gpu_sanitize, true) &&
93+
NeedsASanRT);
94+
}
95+
3496
void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
3597
assert(!Path.empty());
3698

@@ -944,33 +1006,14 @@ void ROCMToolChain::addClangTargetOptions(
9441006
ABIVer, noGPULib))
9451007
return;
9461008

947-
bool Wave64 = isWave64(DriverArgs, Kind);
948-
// TODO: There are way too many flags that change this. Do we need to check
949-
// them all?
950-
bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
951-
getDefaultDenormsAreZeroForTarget(Kind);
952-
bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
953-
954-
bool UnsafeMathOpt =
955-
DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
956-
bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
957-
bool CorrectSqrt =
958-
DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
959-
960-
// GPU Sanitizer currently only supports ASan and is enabled through host
961-
// ASan.
962-
bool GPUSan = DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
963-
options::OPT_fno_gpu_sanitize, true) &&
964-
getSanitizerArgs(DriverArgs).needsAsanRt();
965-
9661009
// Add the OpenCL specific bitcode library.
9671010
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
9681011
BCLibs.emplace_back(RocmInstallation->getOpenCLPath().str());
9691012

9701013
// Add the generic set of libraries.
9711014
BCLibs.append(RocmInstallation->getCommonBitcodeLibs(
972-
DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
973-
FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false));
1015+
DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind,
1016+
getSanitizerArgs(DriverArgs).needsAsanRt()));
9741017

9751018
for (auto [BCFile, Internalize] : BCLibs) {
9761019
if (Internalize)
@@ -1009,41 +1052,37 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(
10091052

10101053
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
10111054
RocmInstallationDetector::getCommonBitcodeLibs(
1012-
const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
1013-
bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
1014-
bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan,
1015-
bool isOpenMP) const {
1055+
const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
1056+
StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind,
1057+
const bool NeedsASanRT) const {
10161058
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs;
10171059

1060+
CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch,
1061+
DeviceOffloadingKind, NeedsASanRT};
1062+
10181063
auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
10191064
bool Internalize = true) {
10201065
BCLib.ShouldInternalize = Internalize;
10211066
BCLibs.push_back(BCLib);
10221067
};
10231068
auto AddSanBCLibs = [&]() {
1024-
if (GPUSan)
1069+
if (Pref.GPUSan)
10251070
AddBCLib(getAsanRTLPath(), false);
10261071
};
10271072

10281073
AddSanBCLibs();
10291074
AddBCLib(getOCMLPath());
1030-
// FIXME: OpenMP has ockl and ocml contained in libomptarget.bc. However,
1031-
// we cannot exclude ocml here because of the crazy always-compile clang
1032-
// headers for cuda, hip, and openmp. A more sane approach is to use libm
1033-
// offload-arch-specific bitcode files as is done for FORTRAN. Currently,
1034-
// libomptarget-<offload-arch>.bc files is built by compiling headers with
1035-
// __BUILD_MATH_BUILTINS_LIB__ turning static libm functions to extern.
1036-
if (!isOpenMP)
1075+
if (!Pref.IsOpenMP)
10371076
AddBCLib(getOCKLPath());
1038-
else if (GPUSan && isOpenMP)
1077+
else if (Pref.GPUSan && Pref.IsOpenMP)
10391078
AddBCLib(getOCKLPath(), false);
1040-
AddBCLib(getDenormalsAreZeroPath(DAZ));
1041-
AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath));
1042-
AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath));
1043-
AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt));
1044-
AddBCLib(getWavefrontSize64Path(Wave64));
1079+
AddBCLib(getDenormalsAreZeroPath(Pref.DAZ));
1080+
AddBCLib(getUnsafeMathPath(Pref.UnsafeMathOpt || Pref.FastRelaxedMath));
1081+
AddBCLib(getFiniteOnlyPath(Pref.FiniteOnly || Pref.FastRelaxedMath));
1082+
AddBCLib(getCorrectlyRoundedSqrtPath(Pref.CorrectSqrt));
1083+
AddBCLib(getWavefrontSize64Path(Pref.Wave64));
10451084
AddBCLib(LibDeviceFile);
1046-
auto ABIVerPath = getABIVersionPath(ABIVer);
1085+
auto ABIVerPath = getABIVersionPath(Pref.ABIVer);
10471086
if (!ABIVerPath.empty())
10481087
AddBCLib(ABIVerPath);
10491088

@@ -1058,14 +1097,22 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
10581097
}
10591098

10601099
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
1061-
ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
1062-
const std::string &GPUArch,
1063-
bool isOpenMP) const {
1064-
RocmInstallationDetector RocmInstallation(getDriver(), getTriple(),
1065-
DriverArgs, true, true);
1066-
return amdgpu::dlr::getCommonDeviceLibNames(
1067-
DriverArgs, getSanitizerArgs(DriverArgs), getDriver(), GPUArch, isOpenMP,
1068-
RocmInstallation);
1100+
ROCMToolChain::getCommonDeviceLibNames(
1101+
const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
1102+
Action::OffloadKind DeviceOffloadingKind) const {
1103+
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
1104+
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
1105+
1106+
StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(CanonArch);
1107+
auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion(
1108+
getAMDGPUCodeObjectVersion(getDriver(), DriverArgs));
1109+
if (!RocmInstallation->checkCommonBitcodeLibs(CanonArch, LibDeviceFile,
1110+
ABIVer))
1111+
return {};
1112+
1113+
return RocmInstallation->getCommonBitcodeLibs(
1114+
DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind,
1115+
getSanitizerArgs(DriverArgs).needsAsanRt());
10691116
}
10701117

10711118
bool AMDGPUToolChain::shouldSkipSanitizeOption(

clang/lib/Driver/ToolChains/AMDGPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
194194
llvm::SmallVector<BitCodeLibraryInfo, 12>
195195
getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
196196
const std::string &GPUArch,
197-
bool isOpenMP = false) const;
197+
Action::OffloadKind DeviceOffloadingKind) const;
198198

199199
SanitizerMask getSupportedSanitizers() const override {
200200
return SanitizerKind::Address;

clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
414414
true))
415415
return;
416416

417-
for (auto BCFile : getDeviceLibs(DriverArgs)) {
417+
for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) {
418418
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
419419
: "-mlink-bitcode-file");
420420
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
@@ -574,16 +574,18 @@ AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
574574
}
575575

576576
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
577-
AMDGPUOpenMPToolChain::getDeviceLibs(const llvm::opt::ArgList &Args) const {
577+
AMDGPUOpenMPToolChain::getDeviceLibs(
578+
const llvm::opt::ArgList &Args,
579+
const Action::OffloadKind DeviceOffloadingKind) const {
578580
if (!Args.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true))
579581
return {};
580582

581583
StringRef GpuArch = getProcessorFromTargetID(
582584
getTriple(), Args.getLastArgValue(options::OPT_march_EQ));
583585

584586
SmallVector<BitCodeLibraryInfo, 12> BCLibs;
585-
for (auto BCLib : getCommonDeviceLibNames(Args, GpuArch.str(),
586-
/*IsOpenMP=*/true))
587+
for (auto BCLib :
588+
getCommonDeviceLibNames(Args, GpuArch.str(), DeviceOffloadingKind))
587589
BCLibs.emplace_back(BCLib);
588590

589591
return BCLibs;

clang/lib/Driver/ToolChains/AMDGPUOpenMP.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final
119119

120120
unsigned GetDefaultDwarfVersion() const override { return 5; }
121121
llvm::SmallVector<BitCodeLibraryInfo, 12>
122-
getDeviceLibs(const llvm::opt::ArgList &Args) const override;
122+
getDeviceLibs(const llvm::opt::ArgList &Args,
123+
const Action::OffloadKind DeviceOffloadKind) const override;
123124

124125
const ToolChain &HostTC;
125126
};

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ void HIPAMDToolChain::addClangTargetOptions(
278278
return; // No DeviceLibs for SPIR-V.
279279
}
280280

281-
for (auto BCFile : getDeviceLibs(DriverArgs)) {
281+
for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) {
282282
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
283283
: "-mlink-bitcode-file");
284284
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
@@ -369,7 +369,8 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
369369
}
370370

371371
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
372-
HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
372+
HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs,
373+
Action::OffloadKind DeviceOffloadingKind) const {
373374
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
374375
if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib,
375376
true) ||
@@ -411,7 +412,8 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
411412
assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
412413

413414
// Add common device libraries like ocml etc.
414-
for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str()))
415+
for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str(),
416+
DeviceOffloadingKind))
415417
BCLibs.emplace_back(N);
416418

417419
// Add instrument lib.

clang/lib/Driver/ToolChains/HIPAMD.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain {
8585
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
8686
llvm::opt::ArgStringList &CC1Args) const override;
8787
llvm::SmallVector<BitCodeLibraryInfo, 12>
88-
getDeviceLibs(const llvm::opt::ArgList &Args) const override;
88+
getDeviceLibs(const llvm::opt::ArgList &Args,
89+
Action::OffloadKind DeviceOffloadKind) const override;
8990

9091
SanitizerMask getSupportedSanitizers() const override;
9192

clang/lib/Driver/ToolChains/HIPSPV.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,8 @@ void HIPSPVToolChain::addClangTargetOptions(
149149
CC1Args.append(
150150
{"-fvisibility=hidden", "-fapply-global-visibility-to-externs"});
151151

152-
for (const BitCodeLibraryInfo &BCFile : getDeviceLibs(DriverArgs))
152+
for (const BitCodeLibraryInfo &BCFile :
153+
getDeviceLibs(DriverArgs, DeviceOffloadingKind))
153154
CC1Args.append(
154155
{"-mlink-builtin-bitcode", DriverArgs.MakeArgString(BCFile.Path)});
155156
}
@@ -200,7 +201,9 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
200201
}
201202

202203
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
203-
HIPSPVToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
204+
HIPSPVToolChain::getDeviceLibs(
205+
const llvm::opt::ArgList &DriverArgs,
206+
const Action::OffloadKind DeviceOffloadingKind) const {
204207
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs;
205208
if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib,
206209
true))

clang/lib/Driver/ToolChains/HIPSPV.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain {
6969
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
7070
llvm::opt::ArgStringList &CC1Args) const override;
7171
llvm::SmallVector<BitCodeLibraryInfo, 12>
72-
getDeviceLibs(const llvm::opt::ArgList &Args) const override;
72+
getDeviceLibs(const llvm::opt::ArgList &Args,
73+
const Action::OffloadKind DeviceOffloadKind) const override;
7374

7475
SanitizerMask getSupportedSanitizers() const override;
7576

0 commit comments

Comments
 (0)