Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/sycl' into cuda2
Browse files Browse the repository at this point in the history
  • Loading branch information
sarnex committed Oct 24, 2024
2 parents 01df10d + fbd3675 commit 6b1d065
Show file tree
Hide file tree
Showing 26 changed files with 808 additions and 45 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/pr-code-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ on:
pull_request:
branches:
- main
- sycl
- sycl-devops-pr/**
- sycl-rel-**
- 'users/**'

jobs:
Expand Down
4 changes: 2 additions & 2 deletions buildbot/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ def do_configure(args):

# For clang-format, clang-tidy and code coverage
llvm_enable_projects += ";clang-tools-extra;compiler-rt"
# Build with zstd enabled on CI.
llvm_enable_zstd = "ON"
# Build with zstd disabled on CI for now.
llvm_enable_zstd = "OFF"
if sys.platform != "darwin":
# libclc is required for CI validation
libclc_enabled = True
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,10 @@ def err_drv_sycl_missing_amdgpu_arch : Error<
"missing AMDGPU architecture for SYCL offloading; specify it with '-Xsycl-target-backend%select{|=%1}0 --offload-arch=<arch-name>'">;
def err_drv_sycl_thinlto_split_off: Error<
"'%0' is not supported when '%1' is set with '-fsycl'">;
def err_drv_sycl_offload_arch_new_driver: Error<
"'--offload-arch' is supported when '-fsycl' is set with '--offload-new-driver'">;
def err_drv_sycl_offload_arch_missing_value : Error<
"must pass in an explicit cpu or gpu architecture to '--offload-arch'">;
def warn_drv_sycl_offload_target_duplicate : Warning<
"SYCL offloading target '%0' is similar to target '%1' already specified; "
"will be ignored">, InGroup<SyclTarget>;
Expand Down
106 changes: 105 additions & 1 deletion clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1191,12 +1191,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
llvm::StringMap<StringRef> FoundNormalizedTriples;
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
// StringSet to contain SYCL target triples.
llvm::StringSet<> SYCLTriples;
if (HasSYCLTargetsOption) {
// At this point, we know we have a valid combination
// of -fsycl*target options passed
Arg *SYCLTargetsValues = SYCLTargets;
if (SYCLTargetsValues) {
llvm::StringSet<> SYCLTriples;
if (SYCLTargetsValues->getNumValues()) {

// Multiple targets are currently not supported when using
Expand Down Expand Up @@ -1296,6 +1297,109 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
Diag(clang::diag::warn_drv_empty_joined_argument)
<< SYCLTargetsValues->getAsString(C.getInputArgs());
}
}
// If the user specified --offload-arch, deduce the offloading
// target triple(s) from the set of architecture(s).
// Create a toolchain for each valid triple.
// We do not support SYCL offloading if any of the inputs is a
// .cu (for CUDA type) or .hip (for HIP type) file.
else if (HasValidSYCLRuntime &&
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && !IsHIP &&
!IsCuda) {
// SYCL offloading to AOT Targets with '--offload-arch'
// is currently enabled only with '--offload-new-driver' option.
// Emit a diagnostic if '--offload-arch' is invoked without
// '--offload-new driver' option.
if (!C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver, false)) {
Diag(clang::diag::err_drv_sycl_offload_arch_new_driver);
return;
}
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
HostTC->getTriple());

// Attempt to deduce the offloading triple from the set of architectures.
// We need to temporarily create these toolchains so that we can access
// tools for inferring architectures.
llvm::DenseSet<StringRef> Archs;
if (NVPTXTriple) {
auto TempTC = std::make_unique<toolchains::CudaToolChain>(
*this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None);
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
Archs.insert(Arch);
}
if (AMDTriple) {
auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
*this, *AMDTriple, *HostTC, C.getInputArgs());
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
Archs.insert(Arch);
}
if (!AMDTriple && !NVPTXTriple) {
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true))
Archs.insert(Arch);
}
for (StringRef Arch : Archs) {
if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch(
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
} else if (AMDTriple &&
IsSYCLSupportedAMDGPUArch(StringToOffloadArch(
getProcessorFromTargetID(*AMDTriple, Arch)))) {
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
} else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) {
DerivedArchs[MakeSYCLDeviceTriple("spir64_x86_64").getTriple()].insert(
Arch);
} else if (IsSYCLSupportedIntelGPUArch(StringToOffloadArchSYCL(Arch))) {
StringRef IntelGPUArch;
// For Intel Graphics AOT target, valid values for '--offload-arch'
// are mapped to valid device names accepted by OCLOC (the Intel GPU AOT
// compiler) via the '-device' option. The mapIntelGPUArchName
// function maps the accepted values for '--offload-arch' to enable SYCL
// offloading to Intel GPUs and the corresponding '-device' value passed
// to OCLOC.
IntelGPUArch = mapIntelGPUArchName(Arch).data();
DerivedArchs[MakeSYCLDeviceTriple("spir64_gen").getTriple()].insert(
IntelGPUArch);
} else {
Diag(clang::diag::err_drv_invalid_sycl_target) << Arch;
return;
}
}
// Emit an error if architecture value is not provided
// to --offload-arch.
if (Archs.empty()) {
Diag(clang::diag::err_drv_sycl_offload_arch_missing_value);
return;
}

for (const auto &TripleAndArchs : DerivedArchs)
SYCLTriples.insert(TripleAndArchs.first());

for (const auto &Val : SYCLTriples) {
llvm::Triple SYCLTargetTriple(MakeSYCLDeviceTriple(Val.getKey()));
std::string NormalizedName = SYCLTargetTriple.normalize();

// Make sure we don't have a duplicate triple.
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
if (Duplicate != FoundNormalizedTriples.end()) {
Diag(clang::diag::warn_drv_sycl_offload_target_duplicate)
<< Val.getKey() << Duplicate->second;
continue;
}

// Store the current triple so that we can check for duplicates in the
// following iterations.
FoundNormalizedTriples[NormalizedName] = Val.getKey();
UniqueSYCLTriplesVec.push_back(SYCLTargetTriple);
}

addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);

} else {
// If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
// For -fsycl-device-only, we also setup the implied triple as needed.
Expand Down
119 changes: 119 additions & 0 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,125 @@ using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;

// Struct that relates an AOT target value with
// Intel CPUs and Intel GPUs.
struct StringToOffloadArchSYCLMap {
const char *ArchName;
SYCLSupportedIntelArchs IntelArch;
};

// Mapping of supported SYCL offloading architectures.
static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = {
// Intel CPU mapping.
{"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512},
{"core-avx2", SYCLSupportedIntelArchs::COREAVX2},
{"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX},
{"corei7", SYCLSupportedIntelArchs::COREI7},
{"westmere", SYCLSupportedIntelArchs::WESTMERE},
{"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE},
{"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE},
{"broadwell", SYCLSupportedIntelArchs::BROADWELL},
{"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE},
{"alderlake", SYCLSupportedIntelArchs::ALDERLAKE},
{"skylake", SYCLSupportedIntelArchs::SKYLAKE},
{"skx", SYCLSupportedIntelArchs::SKX},
{"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE},
{"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT},
{"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER},
{"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS},
{"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS},
// Intel GPU mapping.
{"bdw", SYCLSupportedIntelArchs::BDW},
{"skl", SYCLSupportedIntelArchs::SKL},
{"kbl", SYCLSupportedIntelArchs::KBL},
{"cfl", SYCLSupportedIntelArchs::CFL},
{"apl", SYCLSupportedIntelArchs::APL},
{"bxt", SYCLSupportedIntelArchs::BXT},
{"glk", SYCLSupportedIntelArchs::GLK},
{"whl", SYCLSupportedIntelArchs::WHL},
{"aml", SYCLSupportedIntelArchs::AML},
{"cml", SYCLSupportedIntelArchs::CML},
{"icllp", SYCLSupportedIntelArchs::ICLLP},
{"icl", SYCLSupportedIntelArchs::ICL},
{"ehl", SYCLSupportedIntelArchs::EHL},
{"jsl", SYCLSupportedIntelArchs::JSL},
{"tgllp", SYCLSupportedIntelArchs::TGLLP},
{"tgl", SYCLSupportedIntelArchs::TGL},
{"rkl", SYCLSupportedIntelArchs::RKL},
{"adl_s", SYCLSupportedIntelArchs::ADL_S},
{"rpl_s", SYCLSupportedIntelArchs::RPL_S},
{"adl_p", SYCLSupportedIntelArchs::ADL_P},
{"adl_n", SYCLSupportedIntelArchs::ADL_N},
{"dg1", SYCLSupportedIntelArchs::DG1},
{"acm_g10", SYCLSupportedIntelArchs::ACM_G10},
{"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
{"acm_g11", SYCLSupportedIntelArchs::ACM_G11},
{"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
{"dg2_g11", SYCLSupportedIntelArchs::DG2_G11},
{"acm_g12", SYCLSupportedIntelArchs::ACM_G12},
{"dg2_g12", SYCLSupportedIntelArchs::DG2_G12},
{"pvc", SYCLSupportedIntelArchs::PVC},
{"pvc_vg", SYCLSupportedIntelArchs::PVC_VG},
{"mtl_u", SYCLSupportedIntelArchs::MTL_U},
{"mtl_s", SYCLSupportedIntelArchs::MTL_S},
{"arl_u", SYCLSupportedIntelArchs::ARL_U},
{"arl_s", SYCLSupportedIntelArchs::ARL_S},
{"mtl_h", SYCLSupportedIntelArchs::MTL_H},
{"arl_h", SYCLSupportedIntelArchs::ARL_H},
{"bmg_g21", SYCLSupportedIntelArchs::BMG_G21},
{"lnl_m", SYCLSupportedIntelArchs::LNL_M}};

// Check if the user provided value for --offload-arch is a valid
// SYCL supported Intel AOT target.
SYCLSupportedIntelArchs
clang::driver::StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString) {
auto result = std::find_if(
std::begin(StringToArchNamesMap), std::end(StringToArchNamesMap),
[ArchNameAsString](const StringToOffloadArchSYCLMap &map) {
return ArchNameAsString == map.ArchName;
});
if (result == std::end(StringToArchNamesMap))
return SYCLSupportedIntelArchs::UNKNOWN;
return result->IntelArch;
}

// This is a mapping between the user provided --offload-arch value for Intel
// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
// AOT compiler).
StringRef clang::driver::mapIntelGPUArchName(StringRef ArchName) {
StringRef Arch;
Arch = llvm::StringSwitch<StringRef>(ArchName)
.Case("bdw", "bdw")
.Case("skl", "skl")
.Case("kbl", "kbl")
.Case("cfl", "cfl")
.Cases("apl", "bxt", "apl")
.Case("glk", "glk")
.Case("whl", "whl")
.Case("aml", "aml")
.Case("cml", "cml")
.Cases("icllp", "icl", "icllp")
.Cases("ehl", "jsl", "ehl")
.Cases("tgllp", "tgl", "tgllp")
.Case("rkl", "rkl")
.Cases("adl_s", "rpl_s", "adl_s")
.Case("adl_p", "adl_p")
.Case("adl_n", "adl_n")
.Case("dg1", "dg1")
.Cases("acm_g10", "dg2_g10", "acm_g10")
.Cases("acm_g11", "dg2_g11", "acm_g11")
.Cases("acm_g12", "dg2_g12", "acm_g12")
.Case("pvc", "pvc")
.Case("pvc_vg", "pvc_vg")
.Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u")
.Case("mtl_h", "mtl_h")
.Case("arl_h", "arl_h")
.Case("bmg_g21", "bmg_g21")
.Case("lnl_m", "lnl_m")
.Default("");
return Arch;
}

SYCLInstallationDetector::SYCLInstallationDetector(const Driver &D)
: D(D), InstallationCandidates() {
InstallationCandidates.emplace_back(D.Dir + "/..");
Expand Down
Loading

0 comments on commit 6b1d065

Please sign in to comment.