@@ -31,6 +31,68 @@ using namespace clang::driver::toolchains;
31
31
using namespace clang ;
32
32
using namespace llvm ::opt;
33
33
34
+ RocmInstallationDetector::CommonBitcodeLibsPreferences::
35
+ CommonBitcodeLibsPreferences (const Driver &D,
36
+ const llvm::opt::ArgList &DriverArgs,
37
+ StringRef GPUArch,
38
+ const Action::OffloadKind DeviceOffloadingKind,
39
+ const bool NeedsASanRT)
40
+ : ABIVer(DeviceLibABIVersion::fromCodeObjectVersion(
41
+ tools::getAMDGPUCodeObjectVersion (D, DriverArgs))) {
42
+ const auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
43
+ const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN (Kind);
44
+
45
+ IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP;
46
+
47
+ const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
48
+ Wave64 =
49
+ !HasWave32 || DriverArgs.hasFlag (options::OPT_mwavefrontsize64,
50
+ options::OPT_mno_wavefrontsize64, false );
51
+
52
+ const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP ||
53
+ DeviceOffloadingKind == Action::OFK_HIP;
54
+
55
+ // Default to enabling f32 denormals on subtargets where fma is fast with
56
+ // denormals
57
+ const bool DefaultDAZ =
58
+ (Kind == llvm::AMDGPU::GK_NONE)
59
+ ? false
60
+ : !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
61
+ (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
62
+ // TODO: There are way too many flags that change this. Do we need to
63
+ // check them all?
64
+ DAZ = IsKnownOffloading
65
+ ? DriverArgs.hasFlag (options::OPT_fgpu_flush_denormals_to_zero,
66
+ options::OPT_fno_gpu_flush_denormals_to_zero,
67
+ DefaultDAZ)
68
+ : DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) || DefaultDAZ;
69
+
70
+ FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only) ||
71
+ DriverArgs.hasFlag (options::OPT_ffinite_math_only,
72
+ options::OPT_fno_finite_math_only, false );
73
+
74
+ UnsafeMathOpt =
75
+ DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations) ||
76
+ DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
77
+ options::OPT_fno_unsafe_math_optimizations, false );
78
+
79
+ FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math) ||
80
+ DriverArgs.hasFlag (options::OPT_ffast_math,
81
+ options::OPT_fno_fast_math, false );
82
+
83
+ const bool DefaultSqrt = IsKnownOffloading ? true : false ;
84
+ CorrectSqrt =
85
+ DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
86
+ DriverArgs.hasFlag (
87
+ options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
88
+ options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt);
89
+ // GPU Sanitizer currently only supports ASan and is enabled through host
90
+ // ASan.
91
+ GPUSan = (DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
92
+ options::OPT_fno_gpu_sanitize, true ) &&
93
+ NeedsASanRT);
94
+ }
95
+
34
96
void RocmInstallationDetector::scanLibDevicePath (llvm::StringRef Path) {
35
97
assert (!Path.empty ());
36
98
@@ -944,33 +1006,14 @@ void ROCMToolChain::addClangTargetOptions(
944
1006
ABIVer, noGPULib))
945
1007
return ;
946
1008
947
- bool Wave64 = isWave64 (DriverArgs, Kind);
948
- // TODO: There are way too many flags that change this. Do we need to check
949
- // them all?
950
- bool DAZ = DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) ||
951
- getDefaultDenormsAreZeroForTarget (Kind);
952
- bool FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only);
953
-
954
- bool UnsafeMathOpt =
955
- DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations);
956
- bool FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math);
957
- bool CorrectSqrt =
958
- DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
959
-
960
- // GPU Sanitizer currently only supports ASan and is enabled through host
961
- // ASan.
962
- bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
963
- options::OPT_fno_gpu_sanitize, true ) &&
964
- getSanitizerArgs (DriverArgs).needsAsanRt ();
965
-
966
1009
// Add the OpenCL specific bitcode library.
967
1010
llvm::SmallVector<BitCodeLibraryInfo, 12 > BCLibs;
968
1011
BCLibs.emplace_back (RocmInstallation->getOpenCLPath ().str ());
969
1012
970
1013
// Add the generic set of libraries.
971
1014
BCLibs.append (RocmInstallation->getCommonBitcodeLibs (
972
- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
973
- FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false ));
1015
+ DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind ,
1016
+ getSanitizerArgs (DriverArgs). needsAsanRt () ));
974
1017
975
1018
for (auto [BCFile, Internalize] : BCLibs) {
976
1019
if (Internalize)
@@ -1009,41 +1052,37 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(
1009
1052
1010
1053
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
1011
1054
RocmInstallationDetector::getCommonBitcodeLibs (
1012
- const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
1013
- bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
1014
- bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan,
1015
- bool isOpenMP) const {
1055
+ const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
1056
+ StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind,
1057
+ const bool NeedsASanRT) const {
1016
1058
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 > BCLibs;
1017
1059
1060
+ CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch,
1061
+ DeviceOffloadingKind, NeedsASanRT};
1062
+
1018
1063
auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
1019
1064
bool Internalize = true ) {
1020
1065
BCLib.ShouldInternalize = Internalize;
1021
1066
BCLibs.push_back (BCLib);
1022
1067
};
1023
1068
auto AddSanBCLibs = [&]() {
1024
- if (GPUSan)
1069
+ if (Pref. GPUSan )
1025
1070
AddBCLib (getAsanRTLPath (), false );
1026
1071
};
1027
1072
1028
1073
AddSanBCLibs ();
1029
1074
AddBCLib (getOCMLPath ());
1030
- // FIXME: OpenMP has ockl and ocml contained in libomptarget.bc. However,
1031
- // we cannot exclude ocml here because of the crazy always-compile clang
1032
- // headers for cuda, hip, and openmp. A more sane approach is to use libm
1033
- // offload-arch-specific bitcode files as is done for FORTRAN. Currently,
1034
- // libomptarget-<offload-arch>.bc files is built by compiling headers with
1035
- // __BUILD_MATH_BUILTINS_LIB__ turning static libm functions to extern.
1036
- if (!isOpenMP)
1075
+ if (!Pref.IsOpenMP )
1037
1076
AddBCLib (getOCKLPath ());
1038
- else if (GPUSan && isOpenMP )
1077
+ else if (Pref. GPUSan && Pref. IsOpenMP )
1039
1078
AddBCLib (getOCKLPath (), false );
1040
- AddBCLib (getDenormalsAreZeroPath (DAZ));
1041
- AddBCLib (getUnsafeMathPath (UnsafeMathOpt || FastRelaxedMath));
1042
- AddBCLib (getFiniteOnlyPath (FiniteOnly || FastRelaxedMath));
1043
- AddBCLib (getCorrectlyRoundedSqrtPath (CorrectSqrt));
1044
- AddBCLib (getWavefrontSize64Path (Wave64));
1079
+ AddBCLib (getDenormalsAreZeroPath (Pref. DAZ ));
1080
+ AddBCLib (getUnsafeMathPath (Pref. UnsafeMathOpt || Pref. FastRelaxedMath ));
1081
+ AddBCLib (getFiniteOnlyPath (Pref. FiniteOnly || Pref. FastRelaxedMath ));
1082
+ AddBCLib (getCorrectlyRoundedSqrtPath (Pref. CorrectSqrt ));
1083
+ AddBCLib (getWavefrontSize64Path (Pref. Wave64 ));
1045
1084
AddBCLib (LibDeviceFile);
1046
- auto ABIVerPath = getABIVersionPath (ABIVer);
1085
+ auto ABIVerPath = getABIVersionPath (Pref. ABIVer );
1047
1086
if (!ABIVerPath.empty ())
1048
1087
AddBCLib (ABIVerPath);
1049
1088
@@ -1058,14 +1097,22 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
1058
1097
}
1059
1098
1060
1099
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
1061
- ROCMToolChain::getCommonDeviceLibNames (const llvm::opt::ArgList &DriverArgs,
1062
- const std::string &GPUArch,
1063
- bool isOpenMP) const {
1064
- RocmInstallationDetector RocmInstallation (getDriver (), getTriple (),
1065
- DriverArgs, true , true );
1066
- return amdgpu::dlr::getCommonDeviceLibNames (
1067
- DriverArgs, getSanitizerArgs (DriverArgs), getDriver (), GPUArch, isOpenMP,
1068
- RocmInstallation);
1100
+ ROCMToolChain::getCommonDeviceLibNames (
1101
+ const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
1102
+ Action::OffloadKind DeviceOffloadingKind) const {
1103
+ auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
1104
+ const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN (Kind);
1105
+
1106
+ StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile (CanonArch);
1107
+ auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion (
1108
+ getAMDGPUCodeObjectVersion (getDriver (), DriverArgs));
1109
+ if (!RocmInstallation->checkCommonBitcodeLibs (CanonArch, LibDeviceFile,
1110
+ ABIVer))
1111
+ return {};
1112
+
1113
+ return RocmInstallation->getCommonBitcodeLibs (
1114
+ DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind,
1115
+ getSanitizerArgs (DriverArgs).needsAsanRt ());
1069
1116
}
1070
1117
1071
1118
bool AMDGPUToolChain::shouldSkipSanitizeOption (
0 commit comments