88
99#include " DeviceCompilation.h"
1010#include " ESIMD.h"
11- #include " JITBinaryInfo.h"
12- #include " translation/Translation.h"
1311
14- #include < Driver/ToolChains/AMDGPU.h>
15- #include < Driver/ToolChains/Cuda.h>
16- #include < Driver/ToolChains/LazyDetector.h>
1712#include < clang/Basic/DiagnosticDriver.h>
1813#include < clang/Basic/Version.h>
1914#include < clang/CodeGen/CodeGenAction.h>
2015#include < clang/Driver/Compilation.h>
21- #include < clang/Driver/Driver.h>
2216#include < clang/Driver/Options.h>
2317#include < clang/Frontend/ChainedDiagnosticConsumer.h>
2418#include < clang/Frontend/CompilerInstance.h>
@@ -184,8 +178,7 @@ class RTCToolActionBase : public ToolAction {
184178 assert (!hasExecuted () && " Action should only be invoked on a single file" );
185179
186180 // Create a compiler instance to handle the actual work.
187- CompilerInstance Compiler (std::move (Invocation),
188- std::move (PCHContainerOps));
181+ CompilerInstance Compiler (std::move (Invocation), std::move (PCHContainerOps));
189182 Compiler.setFileManager (Files);
190183 // Suppress summary with number of warnings and errors being printed to
191184 // stdout.
@@ -319,7 +312,7 @@ class LLVMDiagnosticWrapper : public llvm::DiagnosticHandler {
319312} // anonymous namespace
320313
321314static void adjustArgs (const InputArgList &UserArgList,
322- const std::string &DPCPPRoot, BinaryFormat Format,
315+ const std::string &DPCPPRoot,
323316 SmallVectorImpl<std::string> &CommandLine) {
324317 DerivedArgList DAL{UserArgList};
325318 const auto &OptTable = getDriverOptTable ();
@@ -332,23 +325,6 @@ static void adjustArgs(const InputArgList &UserArgList,
332325 // unused argument warning.
333326 DAL.AddFlagArg (nullptr , OptTable.getOption (OPT_Qunused_arguments));
334327
335- if (Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN) {
336- auto [CPU, Features] =
337- Translator::getTargetCPUAndFeatureAttrs (nullptr , " " , Format);
338- (void )Features;
339- if (Format == BinaryFormat::AMDGCN) {
340- DAL.AddJoinedArg (nullptr , OptTable.getOption (OPT_fsycl_targets_EQ),
341- " amdgcn-amd-amdhsa" );
342- DAL.AddJoinedArg (nullptr , OptTable.getOption (OPT_Xsycl_backend_EQ),
343- " amdgcn-amd-amdhsa" );
344- DAL.AddJoinedArg (nullptr , OptTable.getOption (OPT_offload_arch_EQ), CPU);
345- } else {
346- DAL.AddJoinedArg (nullptr , OptTable.getOption (OPT_fsycl_targets_EQ),
347- " nvptx64-nvidia-cuda" );
348- DAL.AddFlagArg (nullptr , OptTable.getOption (OPT_Xsycl_backend));
349- DAL.AddJoinedArg (nullptr , OptTable.getOption (OPT_cuda_gpu_arch_EQ), CPU);
350- }
351- }
352328 ArgStringList ASL;
353329 for_each (DAL, [&DAL, &ASL](Arg *A) { A->render (DAL, ASL); });
354330 for_each (UserArgList,
@@ -385,9 +361,10 @@ static void setupTool(ClangTool &Tool, const std::string &DPCPPRoot,
385361 });
386362}
387363
388- Expected<std::string> jit_compiler::calculateHash (
389- InMemoryFile SourceFile, View<InMemoryFile> IncludeFiles,
390- const InputArgList &UserArgList, BinaryFormat Format) {
364+ Expected<std::string>
365+ jit_compiler::calculateHash (InMemoryFile SourceFile,
366+ View<InMemoryFile> IncludeFiles,
367+ const InputArgList &UserArgList) {
391368 TimeTraceScope TTS{" calculateHash" };
392369
393370 const std::string &DPCPPRoot = getDPCPPRoot ();
@@ -396,7 +373,7 @@ Expected<std::string> jit_compiler::calculateHash(
396373 }
397374
398375 SmallVector<std::string> CommandLine;
399- adjustArgs (UserArgList, DPCPPRoot, Format, CommandLine);
376+ adjustArgs (UserArgList, DPCPPRoot, CommandLine);
400377
401378 FixedCompilationDatabase DB{" ." , CommandLine};
402379 ClangTool Tool{DB, {SourceFile.Path }};
@@ -422,10 +399,11 @@ Expected<std::string> jit_compiler::calculateHash(
422399 return createStringError (" Calculating source hash failed" );
423400}
424401
425- Expected<ModuleUPtr> jit_compiler::compileDeviceCode (
426- InMemoryFile SourceFile, View<InMemoryFile> IncludeFiles,
427- const InputArgList &UserArgList, std::string &BuildLog,
428- LLVMContext &Context, BinaryFormat Format) {
402+ Expected<ModuleUPtr>
403+ jit_compiler::compileDeviceCode (InMemoryFile SourceFile,
404+ View<InMemoryFile> IncludeFiles,
405+ const InputArgList &UserArgList,
406+ std::string &BuildLog, LLVMContext &Context) {
429407 TimeTraceScope TTS{" compileDeviceCode" };
430408
431409 const std::string &DPCPPRoot = getDPCPPRoot ();
@@ -434,7 +412,7 @@ Expected<ModuleUPtr> jit_compiler::compileDeviceCode(
434412 }
435413
436414 SmallVector<std::string> CommandLine;
437- adjustArgs (UserArgList, DPCPPRoot, Format, CommandLine);
415+ adjustArgs (UserArgList, DPCPPRoot, CommandLine);
438416
439417 FixedCompilationDatabase DB{" ." , CommandLine};
440418 ClangTool Tool{DB, {SourceFile.Path }};
@@ -452,22 +430,12 @@ Expected<ModuleUPtr> jit_compiler::compileDeviceCode(
452430 return createStringError (BuildLog);
453431}
454432
455- // This function is a simplified copy of the device library selection process
456- // in `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V, or
457- // GPU targets (no AoT, no native CPU). Keep in sync!
433+ // This function is a simplified copy of the device library selection process in
434+ // `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V target
435+ // (no AoT, no third-party GPUs , no native CPU). Keep in sync!
458436static bool getDeviceLibraries (const ArgList &Args,
459437 SmallVectorImpl<std::string> &LibraryList,
460- DiagnosticsEngine &Diags, BinaryFormat Format) {
461- // For CUDA/HIP we only need devicelib, early exit here.
462- if (Format == BinaryFormat::PTX) {
463- LibraryList.push_back (
464- Args.MakeArgString (" devicelib-nvptx64-nvidia-cuda.bc" ));
465- return false ;
466- } else if (Format == BinaryFormat::AMDGCN) {
467- LibraryList.push_back (Args.MakeArgString (" devicelib-amdgcn-amd-amdhsa.bc" ));
468- return false ;
469- }
470-
438+ DiagnosticsEngine &Diags) {
471439 struct DeviceLibOptInfo {
472440 StringRef DeviceLibName;
473441 StringRef DeviceLibOption;
@@ -572,8 +540,7 @@ static Expected<ModuleUPtr> loadBitcodeLibrary(StringRef LibPath,
572540
573541Error jit_compiler::linkDeviceLibraries (llvm::Module &Module,
574542 const InputArgList &UserArgList,
575- std::string &BuildLog,
576- BinaryFormat Format) {
543+ std::string &BuildLog) {
577544 TimeTraceScope TTS{" linkDeviceLibraries" };
578545
579546 const std::string &DPCPPRoot = getDPCPPRoot ();
@@ -588,29 +555,11 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module,
588555 /* ShouldOwnClient=*/ false );
589556
590557 SmallVector<std::string> LibNames;
591- const bool FoundUnknownLib =
592- getDeviceLibraries (UserArgList, LibNames, Diags, Format);
558+ bool FoundUnknownLib = getDeviceLibraries (UserArgList, LibNames, Diags);
593559 if (FoundUnknownLib) {
594560 return createStringError (" Could not determine list of device libraries: %s" ,
595561 BuildLog.c_str ());
596562 }
597- const bool IsCudaHIP =
598- Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN;
599- if (IsCudaHIP) {
600- // Based on the OS and the format decide on the version of libspirv.
601- // NOTE: this will be problematic if cross-compiling between OSes.
602- std::string Libclc{" clc/" };
603- Libclc.append (
604- #ifdef _WIN32
605- " remangled-l32-signed_char.libspirv-"
606- #else
607- " remangled-l64-signed_char.libspirv-"
608- #endif
609- );
610- Libclc.append (Format == BinaryFormat::PTX ? " nvptx64-nvidia-cuda.bc"
611- : " amdgcn-amd-amdhsa.bc" );
612- LibNames.push_back (Libclc);
613- }
614563
615564 LLVMContext &Context = Module.getContext ();
616565 for (const std::string &LibName : LibNames) {
@@ -628,58 +577,6 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module,
628577 }
629578 }
630579
631- // For GPU targets we need to link against vendor provided libdevice.
632- if (IsCudaHIP) {
633- Triple T{Module.getTargetTriple ()};
634- Driver D{(Twine (DPCPPRoot) + " /bin/clang++" ).str (), T.getTriple (), Diags};
635- auto [CPU, Features] =
636- Translator::getTargetCPUAndFeatureAttrs (&Module, " " , Format);
637- (void )Features;
638- // Helper lambda to link modules.
639- auto LinkInLib = [&](const StringRef LibDevice) -> Error {
640- ModuleUPtr LibDeviceModule;
641- if (auto Error = loadBitcodeLibrary (LibDevice, Context)
642- .moveInto (LibDeviceModule)) {
643- return Error;
644- }
645- if (Linker::linkModules (Module, std::move (LibDeviceModule),
646- Linker::LinkOnlyNeeded)) {
647- return createStringError (" Unable to link libdevice: %s" ,
648- BuildLog.c_str ());
649- }
650- return Error::success ();
651- };
652- SmallVector<std::string, 12 > LibDeviceFiles;
653- if (Format == BinaryFormat::PTX) {
654- // For NVPTX we can get away with CudaInstallationDetector.
655- LazyDetector<CudaInstallationDetector> CudaInstallation{D, T,
656- UserArgList};
657- auto LibDevice = CudaInstallation->getLibDeviceFile (CPU);
658- if (LibDevice.empty ()) {
659- return createStringError (" Unable to find Cuda libdevice" );
660- }
661- LibDeviceFiles.push_back (LibDevice);
662- } else {
663- // AMDGPU requires entire toolchain in order to provide all common bitcode
664- // libraries.
665- clang::driver::toolchains::ROCMToolChain TC (D, T, UserArgList);
666- auto CommonDeviceLibs = TC.getCommonDeviceLibNames (
667- UserArgList, CPU, Action::OffloadKind::OFK_SYCL, false );
668- if (CommonDeviceLibs.empty ()) {
669- return createStringError (" Unable to find ROCm common device libraries" );
670- }
671- for (auto &Lib : CommonDeviceLibs) {
672- LibDeviceFiles.push_back (Lib.Path );
673- }
674- }
675- for (auto &LibDeviceFile : LibDeviceFiles) {
676- // llvm::Error converts to false on success.
677- if (auto Error = LinkInLib (LibDeviceFile)) {
678- return Error;
679- }
680- }
681- }
682-
683580 return Error::success ();
684581}
685582
0 commit comments