Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Driver] Enable SYCL AOT compilation using --offload-arch #15353

Draft
wants to merge 16 commits into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,8 @@ def err_drv_sycl_missing_amdgpu_arch : Error<
"missing AMDGPU architecture for SYCL offloading; specify it with '-Xsycl-target-backend%select{|=%1}0 --offload-arch=<arch-name>'">;
def err_drv_sycl_thinlto_split_off: Error<
"'%0' is not supported when '%1' is set with '-fsycl'">;
def err_drv_sycl_offload_arch_new_driver: Error<
"'--offload-arch' is supported when '-fsycl' is set with '--offload-new-driver'">;
def warn_drv_sycl_offload_target_duplicate : Warning<
"SYCL offloading target '%0' is similar to target '%1' already specified; "
"will be ignored">, InGroup<SyclTarget>;
Expand Down
103 changes: 101 additions & 2 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1185,12 +1185,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
llvm::StringMap<StringRef> FoundNormalizedTriples;
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
llvm::StringSet<> SYCLTriples;
if (HasSYCLTargetsOption) {
// At this point, we know we have a valid combination
// of -fsycl*target options passed
Arg *SYCLTargetsValues = SYCLTargets;
if (SYCLTargetsValues) {
llvm::StringSet<> SYCLTriples;

if (SYCLTargetsValues->getNumValues()) {

// Multiple targets are currently not supported when using
Expand Down Expand Up @@ -1279,9 +1280,10 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
if (!Arch.empty())
DerivedArchs[DeviceTriple.getTriple()].insert(Arch);
}

if (!SYCLTriples.empty()) {
for (const auto &SYCLTriple : SYCLTriples) {
llvm::Triple Triple(SYCLTriple.getKey());
llvm::Triple Triple(MakeSYCLDeviceTriple(SYCLTriple.getKey()));
UniqueSYCLTriplesVec.push_back(Triple);
}
}
Expand All @@ -1290,6 +1292,103 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
Diag(clang::diag::warn_drv_empty_joined_argument)
<< SYCLTargetsValues->getAsString(C.getInputArgs());
}
}
// If the user specified --offload-arch, deduce the offloading
// target triple(s) from the set of architecture(s).
// Create a toolchain for each valid triple.
else if (HasValidSYCLRuntime &&
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && !IsHIP &&
!IsCuda) {
// SYCL offloading to Intel CPUs and Intel GPUs with ``--offload-arch``
// is currently enabled only with ``--offload-new-driver`` option.
// Emit a diagnostic if ``--offload-arch`` is invoked without
// ``--offload-new driver`` option.
if (!C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver,
false)) {
Diag(clang::diag::err_drv_sycl_offload_arch_new_driver);
return;
}
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
HostTC->getTriple());

// Attempt to deduce the offloading triple from the set of architectures.
// We need to temporarily create these toolchains so that we can access
// tools for inferring architectures.
llvm::DenseSet<StringRef> Archs;
if (NVPTXTriple) {
auto TempTC = std::make_unique<toolchains::CudaToolChain>(
*this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None);
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
Archs.insert(Arch);
}
if (AMDTriple) {
auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
*this, *AMDTriple, *HostTC, C.getInputArgs());
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
Archs.insert(Arch);
}
if (!AMDTriple && !NVPTXTriple) {
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true))
Archs.insert(Arch);
}
for (StringRef Arch : Archs) {
if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArchSYCL(
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
} else if (AMDTriple &&
IsSYCLSupportedAMDGPUArch(StringToOffloadArchSYCL(
getProcessorFromTargetID(*AMDTriple, Arch)))) {
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
} else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) {
DerivedArchs[MakeSYCLDeviceTriple("spir64_x86_64").getTriple()].insert(
Arch);
} else if (IsSYCLSupportedIntelGPUArch(StringToOffloadArchSYCL(Arch))) {
StringRef IntelGPUArch;
IntelGPUArch = mapIntelGPUArchName(Arch).data();
DerivedArchs[MakeSYCLDeviceTriple("spir64_gen").getTriple()].insert(
IntelGPUArch);
} else {
Diag(clang::diag::err_drv_invalid_sycl_target) << Arch;
return;
}
srividya-sundaram marked this conversation as resolved.
Show resolved Hide resolved
}
// If the set is empty then we failed to find a native architecture.
if (Archs.empty()) {
Diag(clang::diag::err_drv_invalid_sycl_target) << "native";
return;
}

for (const auto &TripleAndArchs : DerivedArchs)
SYCLTriples.insert(TripleAndArchs.first());

for (const auto &Val : SYCLTriples) {
llvm::Triple SYCLTargetTriple(MakeSYCLDeviceTriple(Val.getKey()));
std::string NormalizedName = SYCLTargetTriple.normalize();



// Make sure we don't have a duplicate triple.
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
if (Duplicate != FoundNormalizedTriples.end()) {
Diag(clang::diag::warn_drv_sycl_offload_target_duplicate)
<< Val.getKey() << Duplicate->second;
continue;
}

// Store the current triple so that we can check for duplicates in the
// following iterations.
FoundNormalizedTriples[NormalizedName] = Val.getKey();
UniqueSYCLTriplesVec.push_back(SYCLTargetTriple);
}

addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);

} else {
// If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
// For -fsycl-device-only, we also setup the implied triple as needed.
Expand Down
174 changes: 174 additions & 0 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,180 @@ using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;

// Struct that relates an AOT target value with
// Intel CPUs and GPUs.
struct StringToOffloadArchSYCLMap {
const char *ArchName;
SYCLSupportedOffloadArchs IntelArch;
};

// Mapping of supported SYCL offloading architectures.
static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = {
// Intel CPU mapping.
{"skylake-avx512", SYCLSupportedOffloadArchs::SKYLAKEAVX512},
{"core-avx2", SYCLSupportedOffloadArchs::COREAVX2},
{"corei7-avx", SYCLSupportedOffloadArchs::COREI7AVX},
{"corei7", SYCLSupportedOffloadArchs::COREI7},
{"westmere", SYCLSupportedOffloadArchs::WESTMERE},
{"sandybridge", SYCLSupportedOffloadArchs::SANDYBRIDGE},
{"ivybridge", SYCLSupportedOffloadArchs::IVYBRIDGE},
{"broadwell", SYCLSupportedOffloadArchs::BROADWELL},
{"coffeelake", SYCLSupportedOffloadArchs::COFFEELAKE},
{"alderlake", SYCLSupportedOffloadArchs::ALDERLAKE},
{"skylake", SYCLSupportedOffloadArchs::SKYLAKE},
{"skx", SYCLSupportedOffloadArchs::SKX},
{"cascadelake", SYCLSupportedOffloadArchs::CASCADELAKE},
{"icelake-client", SYCLSupportedOffloadArchs::ICELAKECLIENT},
{"icelake-server", SYCLSupportedOffloadArchs::ICELAKESERVER},
{"sapphirerapids", SYCLSupportedOffloadArchs::SAPPHIRERAPIDS},
{"graniterapids", SYCLSupportedOffloadArchs::GRANITERAPIDS},
// Intel GPU mapping.
{"bdw", SYCLSupportedOffloadArchs::BDW},
{"skl", SYCLSupportedOffloadArchs::SKL},
{"kbl", SYCLSupportedOffloadArchs::KBL},
{"cfl", SYCLSupportedOffloadArchs::CFL},
{"apl", SYCLSupportedOffloadArchs::APL},
{"bxt", SYCLSupportedOffloadArchs::BXT},
{"glk", SYCLSupportedOffloadArchs::GLK},
{"whl", SYCLSupportedOffloadArchs::WHL},
{"aml", SYCLSupportedOffloadArchs::AML},
{"cml", SYCLSupportedOffloadArchs::CML},
{"icllp", SYCLSupportedOffloadArchs::ICLLP},
{"icl", SYCLSupportedOffloadArchs::ICL},
{"ehl", SYCLSupportedOffloadArchs::EHL},
{"jsl", SYCLSupportedOffloadArchs::JSL},
{"tgllp", SYCLSupportedOffloadArchs::TGLLP},
{"tgl", SYCLSupportedOffloadArchs::TGL},
{"rkl", SYCLSupportedOffloadArchs::RKL},
{"adl_s", SYCLSupportedOffloadArchs::ADL_S},
{"rpl_s", SYCLSupportedOffloadArchs::RPL_S},
{"adl_p", SYCLSupportedOffloadArchs::ADL_P},
{"adl_n", SYCLSupportedOffloadArchs::ADL_N},
{"dg1", SYCLSupportedOffloadArchs::DG1},
{"acm_g10", SYCLSupportedOffloadArchs::ACM_G10},
{"dg2_g10", SYCLSupportedOffloadArchs::DG2_G10},
{"acm_g11", SYCLSupportedOffloadArchs::ACM_G11},
{"dg2_g10", SYCLSupportedOffloadArchs::DG2_G10},
{"dg2_g11", SYCLSupportedOffloadArchs::DG2_G11},
{"acm_g12", SYCLSupportedOffloadArchs::ACM_G12},
{"dg2_g12", SYCLSupportedOffloadArchs::DG2_G12},
{"pvc", SYCLSupportedOffloadArchs::PVC},
{"pvc_vg", SYCLSupportedOffloadArchs::PVC_VG},
{"mtl_u", SYCLSupportedOffloadArchs::MTL_U},
{"mtl_s", SYCLSupportedOffloadArchs::MTL_S},
{"arl_u", SYCLSupportedOffloadArchs::ARL_U},
{"arl_s", SYCLSupportedOffloadArchs::ARL_S},
{"mtl_h", SYCLSupportedOffloadArchs::MTL_H},
{"arl_h", SYCLSupportedOffloadArchs::ARL_H},
{"bmg_g21", SYCLSupportedOffloadArchs::BMG_G21},
{"lnl_m", SYCLSupportedOffloadArchs::LNL_M},
// AMD GPU Mapping
{"gfx700", SYCLSupportedOffloadArchs::GFX700},
{"gfx701", SYCLSupportedOffloadArchs::GFX701},
{"gfx702", SYCLSupportedOffloadArchs::GFX702},
{"gfx801", SYCLSupportedOffloadArchs::GFX801},
{"gfx802", SYCLSupportedOffloadArchs::GFX802},
{"gfx803", SYCLSupportedOffloadArchs::GFX803},
{"gfx805", SYCLSupportedOffloadArchs::GFX805},
{"gfx810", SYCLSupportedOffloadArchs::GFX810},
{"gfx900", SYCLSupportedOffloadArchs::GFX900},
{"gfx902", SYCLSupportedOffloadArchs::GFX902},
{"gfx904", SYCLSupportedOffloadArchs::GFX904},
{"gfx906", SYCLSupportedOffloadArchs::GFX906},
{"gfx908", SYCLSupportedOffloadArchs::GFX908},
{"gfx909", SYCLSupportedOffloadArchs::GFX909},
{"gfx90a", SYCLSupportedOffloadArchs::GFX90A},
{"gfx90c", SYCLSupportedOffloadArchs::GFX90C},
{"gfx940", SYCLSupportedOffloadArchs::GFX940},
{"gfx941", SYCLSupportedOffloadArchs::GFX941},
{"gfx942", SYCLSupportedOffloadArchs::GFX942},
{"gfx1010", SYCLSupportedOffloadArchs::GFX1010},
{"gfx1011", SYCLSupportedOffloadArchs::GFX1011},
{"gfx1012", SYCLSupportedOffloadArchs::GFX1012},
{"gfx1013", SYCLSupportedOffloadArchs::GFX1013},
{"gfx1030", SYCLSupportedOffloadArchs::GFX1030},
{"gfx1031", SYCLSupportedOffloadArchs::GFX1031},
{"gfx1032", SYCLSupportedOffloadArchs::GFX1032},
{"gfx1033", SYCLSupportedOffloadArchs::GFX1033},
{"gfx1034", SYCLSupportedOffloadArchs::GFX1034},
{"gfx1035", SYCLSupportedOffloadArchs::GFX1035},
{"gfx1036", SYCLSupportedOffloadArchs::GFX1036},
{"gfx1100", SYCLSupportedOffloadArchs::GFX1100},
{"gfx1101", SYCLSupportedOffloadArchs::GFX1101},
{"gfx1102", SYCLSupportedOffloadArchs::GFX1102},
{"gfx1103", SYCLSupportedOffloadArchs::GFX1103},
{"gfx1150", SYCLSupportedOffloadArchs::GFX1150},
{"gfx1151", SYCLSupportedOffloadArchs::GFX1151},
{"gfx1200", SYCLSupportedOffloadArchs::GFX1200},
{"gfx1201", SYCLSupportedOffloadArchs::GFX1201},
// NVidia GPU Mapping.
{"sm_50", SYCLSupportedOffloadArchs::SM_50},
{"sm_52", SYCLSupportedOffloadArchs::SM_52},
{"sm_53", SYCLSupportedOffloadArchs::SM_53},
{"sm_60", SYCLSupportedOffloadArchs::SM_60},
{"sm_61", SYCLSupportedOffloadArchs::SM_61},
{"sm_62", SYCLSupportedOffloadArchs::SM_62},
{"sm_70", SYCLSupportedOffloadArchs::SM_70},
{"sm_72", SYCLSupportedOffloadArchs::SM_72},
{"sm_75", SYCLSupportedOffloadArchs::SM_75},
{"sm_80", SYCLSupportedOffloadArchs::SM_80},
{"sm_86", SYCLSupportedOffloadArchs::SM_86},
{"sm_87", SYCLSupportedOffloadArchs::SM_87},
{"sm_89", SYCLSupportedOffloadArchs::SM_89},
{"sm_90", SYCLSupportedOffloadArchs::SM_90},
{"sm_90a", SYCLSupportedOffloadArchs::SM_90A}};

// Check if the user provided value for --offload-arch is a valid
// Intel CPU or Intel GPU target.
SYCLSupportedOffloadArchs
clang::driver::StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString) {
auto result = std::find_if(
std::begin(StringToArchNamesMap), std::end(StringToArchNamesMap),
[ArchNameAsString](const StringToOffloadArchSYCLMap &map) {
return ArchNameAsString == map.ArchName;
});
if (result == std::end(StringToArchNamesMap))
return SYCLSupportedOffloadArchs::UNKNOWN;
return result->IntelArch;
}

// This is a mapping between the user provided --offload-arch value for Intel
// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
// AOT compiler).
StringRef clang::driver::mapIntelGPUArchName(StringRef ArchName) {
StringRef Arch;
Arch = llvm::StringSwitch<StringRef>(ArchName)
.Case("bdw", "bdw")
.Case("skl", "skl")
.Case("kbl", "kbl")
.Case("cfl", "cfl")
.Cases("apl", "bxt", "apl")
.Case("glk", "glk")
.Case("whl", "whl")
.Case("aml", "aml")
.Case("cml", "cml")
.Cases("icllp", "icl", "icllp")
.Cases("ehl", "jsl", "ehl")
.Cases("tgllp", "tgl", "tgllp")
.Case("rkl", "rkl")
.Cases("adl_s", "rpl_s", "adl_s")
.Case("adl_p", "adl_p")
.Case("adl_n", "adl_n")
.Case("dg1", "dg1")
.Cases("acm_g10", "dg2_g10", "acm_g10")
.Cases("acm_g11", "dg2_g11", "acm_g11")
.Cases("acm_g12", "dg2_g12", "acm_g12")
.Case("pvc", "pvc")
.Case("pvc_vg", "pvc_vg")
.Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u")
.Case("mtl_h", "mtl_h")
.Case("arl_h", "arl_h")
.Case("bmg_g21", "bmg_g21")
.Case("lnl_m", "lnl_m")
.Default("");
return Arch;
}

SYCLInstallationDetector::SYCLInstallationDetector(const Driver &D)
: D(D), InstallationCandidates() {
InstallationCandidates.emplace_back(D.Dir + "/..");
Expand Down
Loading
Loading