diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index d2c814205b733..4cc95dc93a3a2 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10568,7 +10568,12 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, // Add output file table file option assert(Output.isFilename() && "output must be a filename"); - addArgs(CmdArgs, TCArgs, {"-o", Output.getFilename()}); + StringRef Device = JA.getOffloadingArch(); + std::string OutputArg = Output.getFilename(); + if (T.getSubArch() == llvm::Triple::SPIRSubArch_gen && Device.data()) + OutputArg = ("intel_gpu_" + Device + "," + OutputArg).str(); + + addArgs(CmdArgs, TCArgs, {"-o", OutputArg}); const toolchains::SYCLToolChain &TC = static_cast(getToolChain()); diff --git a/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp b/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp index d2955b428b4ac..ebe41c2a8e3d1 100644 --- a/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp +++ b/clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp @@ -101,7 +101,7 @@ /// -fsycl-targets=spir64_x86_64 should set a specific macro // RUN: %clangxx -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=MACRO_X86_64 -// RUN: %clang_cl -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \ +// RUN: %clang_cl -c -fsycl -fsycl-targets=spir64_x86_64 -### -- %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=MACRO_X86_64 // MACRO_X86_64: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown" // MACRO_X86_64: "-D__SYCL_TARGET_INTEL_X86_64__" @@ -111,7 +111,7 @@ /// test for invalid intel arch // RUN: not %clangxx -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD_INPUT -// RUN: not %clang_cl -c -fsycl -fsycl-targets=intel_gpu_bad -### %s 2>&1 | \ +// RUN: not %clang_cl -c -fsycl -fsycl-targets=intel_gpu_bad -### -- %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD_INPUT // BAD_INPUT: error: SYCL target is invalid: 'intel_gpu_bad' @@ -233,3 +233,9 @@ // CHECK_TOOLS_BEOPTS_MIX: opencl-aot{{.*}} "-DCPU" // CHECK_TOOLS_BEOPTS_MIX-NOT: "-DDG1" // CHECK_TOOLS_BEOPTS_MIX: ocloc{{.*}} "-device" "skl"{{.*}}"-DSKL2" + +/// Check that target is passed to sycl-post-link for filtering +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,intel_gpu_dg1 \ +// RUN: -### %s 2>&1 | FileCheck %s --check-prefix=CHECK_TOOLS_FILTER +// CHECK_TOOLS_FILTER: sycl-post-link{{.*}} "-o" "intel_gpu_pvc,{{.*}}" +// CHECK_TOOLS_FILTER: sycl-post-link{{.*}} "-o" "intel_gpu_dg1,{{.*}}" diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index aaa55d3686ac5..ff9567c115bbd 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -143,3 +143,9 @@ def : TargetInfo<"x86_64", [], [], "", "", 1>; //defvar AspectList = [AspectCpu] # AllUSMAspects; //def : TargetInfo<"Test", AspectList, []>; //def : TargetInfo<"Test2", [AspectCpu] # AllUSMAspects, []>; + +// TODO: The aspects listed for the intel_gpu targets right now are incomplete; +// only the fp16/fp64/atomic64 aspects are listed. +def : TargetInfo<"intel_gpu_cfl", [AspectFp16, AspectFp64, AspectAtomic64], [8, 16, 32]>; +def : TargetInfo<"intel_gpu_tgllp", [AspectFp16, AspectAtomic64], [8, 16, 32]>; +def : TargetInfo<"intel_gpu_pvc", [AspectFp16, AspectFp64, AspectAtomic64], [16, 32]>; diff --git a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h index eb09e7528ff49..9ae433cedc668 100644 --- a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h +++ b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h @@ -13,6 +13,8 @@ #ifndef LLVM_SYCLLOWERIR_MODULE_SPLITTER_H #define LLVM_SYCLLOWERIR_MODULE_SPLITTER_H +#include "SYCLDeviceRequirements.h" + #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Function.h" @@ -108,6 +110,7 @@ class ModuleDesc { std::unique_ptr M; EntryPointGroup EntryPoints; bool IsTopLevel = false; + mutable std::optional Reqs; public: struct Properties { @@ -193,6 +196,12 @@ class ModuleDesc { ModuleDesc clone() const; + const SYCLDeviceRequirements &getOrComputeDeviceRequirements() const { + if (!Reqs.has_value()) + Reqs = computeDeviceRequirements(*this); + return *Reqs; + } + #ifndef NDEBUG void verifyESIMDProperty() const; void dump() const; diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h new file mode 100644 index 0000000000000..7ac19bd0f9f45 --- /dev/null +++ b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h @@ -0,0 +1,46 @@ +//===----- SYCLDeviceRequirements.h - collect data for used aspects ------=-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +#include +#include +#include +#include +#include + +namespace llvm { + +class StringRef; + +namespace module_split { +class ModuleDesc; +} +namespace util { +class PropertyValue; +} + +struct SYCLDeviceRequirements { + std::set Aspects; + std::set FixedTarget; + std::optional> ReqdWorkGroupSize; + std::optional> JointMatrix; + std::optional> JointMatrixMad; + std::optional SubGroupSize; + + std::map asMap() const; +}; + +SYCLDeviceRequirements +computeDeviceRequirements(const module_split::ModuleDesc &M); + +} // namespace llvm diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 59f409533b10d..1913aebb2306a 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -66,6 +66,7 @@ add_llvm_component_library(LLVMSYCLLowerIR ModuleSplitter.cpp MutatePrintfAddrspace.cpp SYCLAddOptLevelAttribute.cpp + SYCLDeviceRequirements.cpp SYCLPropagateAspectsUsage.cpp SYCLPropagateJointMatrixUsage.cpp SYCLUtils.cpp diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp new file mode 100644 index 0000000000000..6c0f1c952030b --- /dev/null +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp @@ -0,0 +1,137 @@ +//===----- SYCLDeviceRequirements.cpp - collect data for used aspects ----=-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/SYCLLowerIR/SYCLDeviceRequirements.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Module.h" +#include "llvm/SYCLLowerIR/ModuleSplitter.h" +#include "llvm/Support/PropertySetIO.h" + +#include +#include + +using namespace llvm; + +static int64_t ExtractSignedIntegerFromMDNodeOperand(const MDNode *N, + unsigned OpNo) { + Constant *C = cast(N->getOperand(OpNo).get())->getValue(); + return C->getUniqueInteger().getSExtValue(); +} +static uint64_t ExtractUnsignedIntegerFromMDNodeOperand(const MDNode *N, + unsigned OpNo) { + Constant *C = cast(N->getOperand(OpNo).get())->getValue(); + return C->getUniqueInteger().getZExtValue(); +} +static llvm::StringRef ExtractStringFromMDNodeOperand(const MDNode *N, + unsigned OpNo) { + MDString *S = cast(N->getOperand(OpNo).get()); + return S->getString(); +} + +SYCLDeviceRequirements +llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) { + SYCLDeviceRequirements Reqs; + // Process all functions in the module + for (const Function &F : MD.getModule()) { + if (auto *MDN = F.getMetadata("sycl_used_aspects")) { + for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { + auto Val = ExtractSignedIntegerFromMDNodeOperand(MDN, I); + // Don't put internal aspects (with negative integer value) into the + // requirements, they are used only for device image splitting. + if (Val >= 0) + Reqs.Aspects.insert(Val); + } + } + + if (auto *MDN = F.getMetadata("sycl_fixed_targets")) { + for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { + auto Val = ExtractUnsignedIntegerFromMDNodeOperand(MDN, I); + Reqs.FixedTarget.insert(Val); + } + } + + if (auto *MDN = F.getMetadata("reqd_work_group_size")) { + llvm::SmallVector NewReqdWorkGroupSize; + for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) + NewReqdWorkGroupSize.push_back( + ExtractUnsignedIntegerFromMDNodeOperand(MDN, I)); + if (!Reqs.ReqdWorkGroupSize.has_value()) + Reqs.ReqdWorkGroupSize = NewReqdWorkGroupSize; + } + + if (auto *MDN = F.getMetadata("sycl_joint_matrix")) { + auto Val = ExtractStringFromMDNodeOperand(MDN, 0); + if (!Val.empty()) + Reqs.JointMatrix = Val; + } + + if (auto *MDN = F.getMetadata("sycl_joint_matrix_mad")) { + auto Val = ExtractStringFromMDNodeOperand(MDN, 0); + if (!Val.empty()) + Reqs.JointMatrixMad = Val; + } + } + + // Process just the entry points in the module + for (const Function *F : MD.entries()) { + if (auto *MDN = F->getMetadata("intel_reqd_sub_group_size")) { + // There should only be at most one function with + // intel_reqd_sub_group_size metadata when considering the entry + // points of a module, but not necessarily when considering all the + // functions of a module: an entry point with a + // intel_reqd_sub_group_size can call an ESIMD function through + // invoke_esimd, and that function has intel_reqd_sub_group_size=1, + // which is valid. + assert( + MDN->getNumOperands() == 1 && + "intel_reqd_sub_group_size metadata expects exactly one argument!"); + auto MDValue = ExtractUnsignedIntegerFromMDNodeOperand(MDN, 0); + if (!Reqs.SubGroupSize) + Reqs.SubGroupSize = MDValue; + else + assert(*Reqs.SubGroupSize == static_cast(MDValue)); + } + } + return Reqs; +} + +std::map SYCLDeviceRequirements::asMap() const { + std::map Requirements; + + // For all properties except for "aspects", we'll only add the + // value to the map if the corresponding value from + // SYCLDeviceRequirements has a value/is non-empty. + Requirements["aspects"] = + std::vector(Aspects.begin(), Aspects.end()); + + if (!FixedTarget.empty()) + Requirements["fixed_target"] = + std::vector(FixedTarget.begin(), FixedTarget.end()); + + // TODO: Before intel/llvm#10620, the reqd_work_group_size attribute + // stores its values as uint32_t, but this needed to be expanded to + // uint64_t. However, this change did not happen in ABI-breaking + // window, so we attach the required work-group size as the + // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking + // window, this can be changed back to reqd_work_group_size. + if (ReqdWorkGroupSize.has_value()) + Requirements["reqd_work_group_size_uint64_t"] = *ReqdWorkGroupSize; + + if (JointMatrix.has_value()) + Requirements["joint_matrix"] = *JointMatrix; + + if (JointMatrixMad.has_value()) + Requirements["joint_matrix_mad"] = *JointMatrixMad; + + if (SubGroupSize.has_value()) + Requirements["reqd_sub_group_size"] = *SubGroupSize; + + return Requirements; +} diff --git a/llvm/test/tools/sycl-post-link/help.test b/llvm/test/tools/sycl-post-link/help.test index 6d0c9bbf606ae..6d41bc1f4fcac 100644 --- a/llvm/test/tools/sycl-post-link/help.test +++ b/llvm/test/tools/sycl-post-link/help.test @@ -45,7 +45,7 @@ CHECK: sycl-post-link options: CHECK: --device-globals - Lower and generate information about device global variables CHECK: -f - Enable binary output on terminals CHECK: --ir-output-only - Output single IR file -CHECK: -o - Output filename +CHECK: -o - Specifies an output file. Multiple output files can be specified. Additionally, a target may be specified alongside an output file, which has the effect that when module splitting is performed, the modules that are in that output table are filtered so those modules are compatible with the target. CHECK: --out-dir= - Directory where files listed in the result file table will be output CHECK: --spec-const= - lower and generate specialization constants information CHECK: =native - lower spec constants to native spirv instructions so that these values could be set at runtime diff --git a/llvm/test/tools/sycl-post-link/multiple-filtered-outputs.ll b/llvm/test/tools/sycl-post-link/multiple-filtered-outputs.ll new file mode 100644 index 0000000000000..1f014410d0a1c --- /dev/null +++ b/llvm/test/tools/sycl-post-link/multiple-filtered-outputs.ll @@ -0,0 +1,201 @@ +; This checks that sycl-post-link can accept multiple -o options, +; with some of the -o options being composed of a (target, filename) pair, +; and that the output tables from inputs with target info have the modules +; that are not compatible with that target filtered out. + +; RUN: sycl-post-link %s -symbols -split=auto \ +; RUN: -o %t.table \ +; RUN: -o intel_gpu_pvc,%t-pvc.table \ +; RUN: -o intel_gpu_tgllp,%t-tgllp.table \ +; RUN: -o intel_gpu_cfl,%t-cfl.table \ +; RUN: -o unrecognized_target,%t-unrecognized.table + +; RUN: FileCheck %s -input-file=%t_0.sym -check-prefix=CHECK-DOUBLE +; RUN: FileCheck %s -input-file=%t_1.sym -check-prefix=CHECK-SG8 +; RUN: FileCheck %s -input-file=%t_2.sym -check-prefix=CHECK-SG64 +; RUN: FileCheck %s -input-file=%t_3.sym -check-prefix=CHECK-SG32 +; RUN: FileCheck %s -input-file=%t_4.sym -check-prefix=CHECK-FLOAT +; RUN: FileCheck %s -input-file=%t_5.sym -check-prefix=CHECK-SG16 + +; RUN: FileCheck %s -input-file=%t.table -check-prefix=CHECK-ALL +; RUN: FileCheck %s -input-file=%t-unrecognized.table -check-prefix=CHECK-ALL +; RUN: FileCheck %s -input-file=%t-pvc.table -check-prefix=CHECK-PVC +; RUN: FileCheck %s -input-file=%t-tgllp.table -check-prefix=CHECK-TGLLP +; RUN: FileCheck %s -input-file=%t-cfl.table -check-prefix=CHECK-CFL + +; CHECK-DOUBLE: double_kernel +; CHECK-FLOAT: float_kernel +; CHECK-SG8: reqd_sub_group_size_kernel_8 +; CHECK-SG16: reqd_sub_group_size_kernel_16 +; CHECK-SG32: reqd_sub_group_size_kernel_32 +; CHECK-SG64: reqd_sub_group_size_kernel_64 + +; An output without a target will have no filtering performed on the output table. +; Additionally, an unrecognized target will perform the same. +; CHECK-ALL: _0.sym +; CHECK-ALL-NEXT: _1.sym +; CHECK-ALL-NEXT: _2.sym +; CHECK-ALL-NEXT: _3.sym +; CHECK-ALL-NEXT: _4.sym +; CHECK-ALL-NEXT: _5.sym +; CHECK-ALL-EMPTY: + +; PVC does not support sg8 (=1) or sg64 (=2) +; CHECK-PVC: _0.sym +; CHECK-PVC-NEXT: _3.sym +; CHECK-PVC-NEXT: _4.sym +; CHECK-PVC-NEXT: _5.sym +; CHECK-PVC-EMPTY: + +; TGLLP does not support fp64 (=0) or sg64 (=2) +; CHECK-TGLLP: _1.sym +; CHECK-TGLLP-NEXT: _3.sym +; CHECK-TGLLP-NEXT: _4.sym +; CHECK-TGLLP-NEXT: _5.sym +; CHECK-TGLLP-EMPTY: + +; CFL does not support sg64 (=2) +; CHECK-CFL: _0.sym +; CHECK-CFL-NEXT: _1.sym +; CHECK-CFL-NEXT: _3.sym +; CHECK-CFL-NEXT: _4.sym +; CHECK-CFL-NEXT: _5.sym +; CHECK-CFL-EMPTY: + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +; Function Attrs: mustprogress norecurse nounwind +define weak_odr dso_local spir_kernel void @double_kernel(ptr addrspace(1) noundef align 8 %_arg_out) local_unnamed_addr #0 !srcloc !65 !kernel_arg_buffer_location !66 !sycl_used_aspects !67 !sycl_fixed_targets !68 !sycl_kernel_omit_args !69 { +entry: + %0 = load double, ptr addrspace(1) %_arg_out, align 8, !tbaa !70 + %mul.i = fmul double %0, 2.000000e-01 + store double %mul.i, ptr addrspace(1) %_arg_out, align 8, !tbaa !70 + ret void +} + +; Function Attrs: mustprogress norecurse nounwind +define weak_odr dso_local spir_kernel void @float_kernel(ptr addrspace(1) noundef align 4 %_arg_out) local_unnamed_addr #0 !srcloc !74 !kernel_arg_buffer_location !66 !sycl_fixed_targets !68 !sycl_kernel_omit_args !69 { +entry: + %0 = load float, ptr addrspace(1) %_arg_out, align 4, !tbaa !75 + %mul.i = fmul float %0, 0x3FC99999A0000000 + store float %mul.i, ptr addrspace(1) %_arg_out, align 4, !tbaa !75 + ret void +} + +; Function Attrs: mustprogress norecurse nounwind +define weak_odr dso_local spir_kernel void @reqd_sub_group_size_kernel_8() local_unnamed_addr #0 !srcloc !77 !kernel_arg_buffer_location !68 !intel_reqd_sub_group_size !78 !sycl_fixed_targets !68 !sycl_kernel_omit_args !68 { +entry: + ret void +} + +; Function Attrs: mustprogress norecurse nounwind +define weak_odr dso_local spir_kernel void @reqd_sub_group_size_kernel_16() local_unnamed_addr #0 !srcloc !77 !kernel_arg_buffer_location !68 !intel_reqd_sub_group_size !79 !sycl_fixed_targets !68 !sycl_kernel_omit_args !68 { +entry: + ret void +} + +; Function Attrs: mustprogress norecurse nounwind +define weak_odr dso_local spir_kernel void @reqd_sub_group_size_kernel_32() local_unnamed_addr #0 !srcloc !77 !kernel_arg_buffer_location !68 !intel_reqd_sub_group_size !80 !sycl_fixed_targets !68 !sycl_kernel_omit_args !68 { +entry: + ret void +} + +; Function Attrs: mustprogress norecurse nounwind +define weak_odr dso_local spir_kernel void @reqd_sub_group_size_kernel_64() local_unnamed_addr #0 !srcloc !77 !kernel_arg_buffer_location !68 !intel_reqd_sub_group_size !81 !sycl_fixed_targets !68 !sycl_kernel_omit_args !68 { +entry: + ret void +} + +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) + +attributes #0 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="double.cpp" "sycl-optlevel"="3" "uniform-work-group-size"="true" } + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!sycl_aspects = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47, !48, !49, !50, !51, !52, !53, !54, !55, !56, !57, !58, !59, !60, !61, !62, !63} +!llvm.ident = !{!64} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"cpu", i32 1} +!5 = !{!"gpu", i32 2} +!6 = !{!"accelerator", i32 3} +!7 = !{!"custom", i32 4} +!8 = !{!"fp16", i32 5} +!9 = !{!"fp64", i32 6} +!10 = !{!"image", i32 9} +!11 = !{!"online_compiler", i32 10} +!12 = !{!"online_linker", i32 11} +!13 = !{!"queue_profiling", i32 12} +!14 = !{!"usm_device_allocations", i32 13} +!15 = !{!"usm_host_allocations", i32 14} +!16 = !{!"usm_shared_allocations", i32 15} +!17 = !{!"usm_system_allocations", i32 17} +!18 = !{!"ext_intel_pci_address", i32 18} +!19 = !{!"ext_intel_gpu_eu_count", i32 19} +!20 = !{!"ext_intel_gpu_eu_simd_width", i32 20} +!21 = !{!"ext_intel_gpu_slices", i32 21} +!22 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} +!23 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} +!24 = !{!"ext_intel_max_mem_bandwidth", i32 24} +!25 = !{!"ext_intel_mem_channel", i32 25} +!26 = !{!"usm_atomic_host_allocations", i32 26} +!27 = !{!"usm_atomic_shared_allocations", i32 27} +!28 = !{!"atomic64", i32 28} +!29 = !{!"ext_intel_device_info_uuid", i32 29} +!30 = !{!"ext_oneapi_srgb", i32 30} +!31 = !{!"ext_oneapi_native_assert", i32 31} +!32 = !{!"host_debuggable", i32 32} +!33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} +!34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"ext_oneapi_bindless_images", i32 42} +!43 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!44 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!45 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!46 = !{!"ext_oneapi_interop_memory_import", i32 46} +!47 = !{!"ext_oneapi_interop_memory_export", i32 47} +!48 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!49 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!50 = !{!"ext_oneapi_mipmap", i32 50} +!51 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!52 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!53 = !{!"ext_intel_esimd", i32 53} +!54 = !{!"ext_oneapi_ballot_group", i32 54} +!55 = !{!"ext_oneapi_fixed_size_group", i32 55} +!56 = !{!"ext_oneapi_opportunistic_group", i32 56} +!57 = !{!"ext_oneapi_tangle_group", i32 57} +!58 = !{!"ext_intel_matrix", i32 58} +!59 = !{!"int64_base_atomics", i32 7} +!60 = !{!"int64_extended_atomics", i32 8} +!61 = !{!"usm_system_allocator", i32 17} +!62 = !{!"usm_restricted_shared_allocations", i32 16} +!63 = !{!"host", i32 0} +!64 = !{!"clang version 19.0.0git (/ws/llvm/clang a7f3a637bdd6299831f903bbed9e8d069fea5c86)"} +!65 = !{i32 233} +!66 = !{i32 -1} +!67 = !{i32 6} +!68 = !{} +!69 = !{i1 false} +!70 = !{!71, !71, i64 0} +!71 = !{!"double", !72, i64 0} +!72 = !{!"omnipotent char", !73, i64 0} +!73 = !{!"Simple C++ TBAA"} +!74 = !{i32 364} +!75 = !{!76, !76, i64 0} +!76 = !{!"float", !72, i64 0} +!77 = !{i32 529} +!78 = !{i32 8} +!79 = !{i32 16} +!80 = !{i32 32} +!81 = !{i32 64} diff --git a/llvm/tools/sycl-post-link/CMakeLists.txt b/llvm/tools/sycl-post-link/CMakeLists.txt index 3905e836aaae8..cfb9b1a27560f 100644 --- a/llvm/tools/sycl-post-link/CMakeLists.txt +++ b/llvm/tools/sycl-post-link/CMakeLists.txt @@ -27,7 +27,6 @@ add_llvm_tool(sycl-post-link SpecConstants.cpp SYCLDeviceLibReqMask.cpp SYCLKernelParamOptInfo.cpp - SYCLDeviceRequirements.cpp ADDITIONAL_HEADER_DIRS ${LLVMGenXIntrinsics_SOURCE_DIR}/GenXIntrinsics/include ${LLVMGenXIntrinsics_BINARY_DIR}/GenXIntrinsics/include diff --git a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp deleted file mode 100644 index 5255ce7bf2a66..0000000000000 --- a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp +++ /dev/null @@ -1,138 +0,0 @@ -//===----- SYCLDeviceRequirements.cpp - collect data for used aspects ----=-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "SYCLDeviceRequirements.h" - -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/Module.h" -#include "llvm/SYCLLowerIR/ModuleSplitter.h" -#include "llvm/Support/PropertySetIO.h" - -#include -#include - -using namespace llvm; - -void llvm::getSYCLDeviceRequirements( - const module_split::ModuleDesc &MD, - std::map &Requirements) { - auto ExtractSignedIntegerFromMDNodeOperand = [=](const MDNode *N, - unsigned OpNo) -> int64_t { - Constant *C = - cast(N->getOperand(OpNo).get())->getValue(); - return C->getUniqueInteger().getSExtValue(); - }; - - auto ExtractUnsignedIntegerFromMDNodeOperand = - [=](const MDNode *N, unsigned OpNo) -> uint64_t { - Constant *C = - cast(N->getOperand(OpNo).get())->getValue(); - return C->getUniqueInteger().getZExtValue(); - }; - - // { LLVM-IR metadata name , [SYCL/Device requirements] property name }, see: - // https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OptionalDeviceFeatures.md#create-the-sycldevice-requirements-property-set - // Scan the module and if the metadata is present fill the corresponing - // property with metadata's aspects - constexpr std::pair ReqdMDs[] = { - {"sycl_used_aspects", "aspects"}, {"sycl_fixed_targets", "fixed_target"}}; - - for (const auto &[MDName, MappedName] : ReqdMDs) { - std::set Values; - for (const Function &F : MD.getModule()) { - if (const MDNode *MDN = F.getMetadata(MDName)) { - for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { - if (std::string(MDName) == "sycl_used_aspects") { - // Don't put internal aspects (with negative integer value) into the - // requirements, they are used only for device image splitting. - auto Val = ExtractSignedIntegerFromMDNodeOperand(MDN, I); - if (Val >= 0) - Values.insert(Val); - } else { - Values.insert(ExtractUnsignedIntegerFromMDNodeOperand(MDN, I)); - } - } - } - } - - // We don't need the "fixed_target" property if it's empty - if (std::string(MDName) == "sycl_fixed_targets" && Values.empty()) - continue; - Requirements[MappedName] = - std::vector(Values.begin(), Values.end()); - } - - std::optional> ReqdWorkGroupSize; - for (const Function &F : MD.getModule()) { - if (const MDNode *MDN = F.getMetadata("reqd_work_group_size")) { - llvm::SmallVector NewReqdWorkGroupSize; - for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) - NewReqdWorkGroupSize.push_back( - ExtractUnsignedIntegerFromMDNodeOperand(MDN, I)); - if (!ReqdWorkGroupSize) - ReqdWorkGroupSize = NewReqdWorkGroupSize; - } - } - - // TODO: Before intel/llvm#10620, the reqd_work_group_size attribute - // stores its values as uint32_t, but this needed to be expanded to - // uint64_t. However, this change did not happen in ABI-breaking - // window, so we attach the required work-group size as the - // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking - // window, this can be changed back to reqd_work_group_size. - if (ReqdWorkGroupSize) - Requirements["reqd_work_group_size_uint64_t"] = *ReqdWorkGroupSize; - - auto ExtractStringFromMDNodeOperand = - [=](const MDNode *N, unsigned OpNo) -> llvm::SmallString<256> { - MDString *S = cast(N->getOperand(OpNo).get()); - return S->getString(); - }; - - // { LLVM-IR metadata name , [SYCL/Device requirements] property name }, see: - // https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OptionalDeviceFeatures.md#create-the-sycldevice-requirements-property-set - // Scan the module and if the metadata is present fill the corresponing - // property with metadata's aspects - constexpr std::pair MatrixMDs[] = { - {"sycl_joint_matrix", "joint_matrix"}, - {"sycl_joint_matrix_mad", "joint_matrix_mad"}}; - - for (const auto &[MDName, MappedName] : MatrixMDs) { - llvm::SmallString<256> Val; - for (const Function &F : MD.getModule()) - if (const MDNode *MDN = F.getMetadata(MDName)) - Val = ExtractStringFromMDNodeOperand( - MDN, 0); // there is always only one operand - if (Val.empty()) - continue; - Requirements[MappedName] = Val; - } - - // There should only be at most one function with - // intel_reqd_sub_group_size metadata when considering the entry - // points of a module, but not necessarily when considering all the - // functions of a module: an entry point with a - // intel_reqd_sub_group_size can call an ESIMD function through - // invoke_esimd, and that function has intel_reqd_sub_group_size=1, - // which is valid. - std::optional SubGroupSize; - for (const Function *F : MD.entries()) { - if (auto *MDN = F->getMetadata("intel_reqd_sub_group_size")) { - assert(MDN->getNumOperands() == 1); - auto MDValue = ExtractUnsignedIntegerFromMDNodeOperand(MDN, 0); - if (!SubGroupSize) - SubGroupSize = MDValue; - else - assert(*SubGroupSize == static_cast(MDValue)); - } - } - // Do not attach reqd_sub_group_size if there is no attached metadata - if (SubGroupSize) - Requirements["reqd_sub_group_size"] = *SubGroupSize; -} diff --git a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.h b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.h deleted file mode 100644 index 5ef5c9aea847e..0000000000000 --- a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.h +++ /dev/null @@ -1,30 +0,0 @@ -//===----- SYCLDeviceRequirements.h - collect data for used aspects ------=-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include - -namespace llvm { - -class StringRef; - -namespace module_split { -class ModuleDesc; -} -namespace util { -class PropertyValue; -} - -void getSYCLDeviceRequirements( - const module_split::ModuleDesc &M, - std::map &Requirements); - -} // namespace llvm diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 3040270aea540..135b4068e526f 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -14,11 +14,11 @@ //===----------------------------------------------------------------------===// #include "SYCLDeviceLibReqMask.h" -#include "SYCLDeviceRequirements.h" #include "SYCLKernelParamOptInfo.h" #include "SpecConstants.h" #include "Support.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -34,12 +34,14 @@ #include "llvm/Linker/Linker.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h" +#include "llvm/SYCLLowerIR/DeviceConfigFile.hpp" #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h" #include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" #include "llvm/SYCLLowerIR/ModuleSplitter.h" +#include "llvm/SYCLLowerIR/SYCLDeviceRequirements.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SanitizeDeviceGlobal.h" #include "llvm/Support/CommandLine.h" @@ -105,9 +107,32 @@ cl::opt OutputDir{ "Directory where files listed in the result file table will be output"), cl::value_desc("dirname"), cl::cat(PostLinkCat)}; -cl::opt OutputFilename{"o", cl::desc("Output filename"), - cl::value_desc("filename"), cl::init("-"), - cl::cat(PostLinkCat)}; +struct TargetFilenamePair { + std::optional Target; + std::string Filename; +}; + +struct TargetFilenamePairParser : public cl::basic_parser { + using cl::basic_parser::basic_parser; + bool parse(cl::Option &O, StringRef ArgName, StringRef &ArgValue, + TargetFilenamePair &Val) const { + auto [Target, Filename] = ArgValue.split(","); + if (Filename == "") + std::swap(Target, Filename); + Val = {Target.str(), Filename.str()}; + return false; + } +}; + +cl::list OutputFiles{ + "o", + cl::desc( + "Specifies an output file. Multiple output files can be " + "specified. Additionally, a target may be specified alongside an " + "output file, which has the effect that when module splitting is " + "performed, the modules that are in that output table are filtered " + "so those modules are compatible with the target."), + cl::value_desc("target filename pair"), cl::cat(PostLinkCat)}; cl::opt Force{"f", cl::desc("Enable binary output on terminals"), cl::cat(PostLinkCat)}; @@ -337,13 +362,14 @@ std::vector getKernelReqdWorkGroupSizeMetadata(const Function &Func) { std::string makeResultFileName(Twine Ext, int I, StringRef Suffix) { const StringRef Dir0 = OutputDir.getNumOccurrences() > 0 ? OutputDir - : sys::path::parent_path(OutputFilename); + : sys::path::parent_path(OutputFiles[0].Filename); const StringRef Sep = sys::path::get_separator(); std::string Dir = Dir0.str(); if (!Dir0.empty() && !Dir0.ends_with(Sep)) Dir += Sep.str(); - return Dir + sys::path::stem(OutputFilename).str() + Suffix.str() + "_" + - std::to_string(I) + Ext.str(); + return (Dir + sys::path::stem(OutputFiles[0].Filename) + Suffix + "_" + + Twine(I) + Ext) + .str(); } void saveModuleIR(Module &M, StringRef OutFilename) { @@ -382,9 +408,8 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); } { - std::map Requirements; - getSYCLDeviceRequirements(MD, Requirements); - PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, Requirements); + PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, + MD.getOrComputeDeviceRequirements().asMap()); } if (MD.Props.SpecConstsMet) { // extract spec constant maps per each module @@ -937,7 +962,80 @@ handleESIMD(module_split::ModuleDesc &&MDesc, bool &Modified, return Result; } -std::unique_ptr +// Checks if the given target and module are compatible. +// A target and module are compatible if all the optional kernel features +// the module uses are supported by that target (i.e. that module can be +// compiled for that target and then be executed on that target). This +// information comes from the device config file (DeviceConfigFile.td). +// For example, the intel_gpu_tgllp target does not support fp64 - therefore, +// a module using fp64 would *not* be compatible with intel_gpu_tgllp. +bool isTargetCompatibleWithModule(const std::optional &Target, + module_split::ModuleDesc &IrMD) { + // When the user does not specify a target, + // (e.g. -o out.table compared to -o intel_gpu_pvc,out-pvc.table) + // Target will have no value and we will not want to perform any filtering, so + // we return true here. + if (!Target.has_value()) + return true; + + // TODO: If a target not found in the device config file is passed, + // to sycl-post-link, then we should probably throw an error. However, + // since not all the information for all the targets is filled out + // right now, we return true, having the affect that unrecognized + // targets have no filtering applied to them. + if (!is_contained(DeviceConfigFile::TargetTable, *Target)) + return true; + + const DeviceConfigFile::TargetInfo &TargetInfo = + DeviceConfigFile::TargetTable[*Target]; + const SYCLDeviceRequirements &ModuleReqs = + IrMD.getOrComputeDeviceRequirements(); + // The device config file data stores the target's supported + // aspects as a vector of the strings, so we need to translate + // the values to a common format. + const NamedMDNode *Node = IrMD.getModule().getNamedMetadata("sycl_aspects"); + if (Node) { + SmallMapVector AspectNameToValue; + for (const MDNode *N : Node->operands()) { + assert(N->getNumOperands() == 2 && + "Each operand of sycl_aspects must be a pair."); + + // The aspect's name is the first operand. + const auto *AspectName = cast(N->getOperand(0)); + + // The aspect's integral value is the second operand. + const auto *AspectCAM = cast(N->getOperand(1)); + const Constant *AspectC = AspectCAM->getValue(); + + AspectNameToValue[AspectName->getString()] = + cast(AspectC)->getSExtValue(); + } + + // Make the set of aspects values the target supports. + SmallSet TargetAspectValueSet; + for (auto Aspect : TargetInfo.aspects) { + auto It = AspectNameToValue.find(Aspect); + assert(It != AspectNameToValue.end() && "Aspect value mapping unknown!"); + TargetAspectValueSet.insert(It->second); + } + + // Now check to see if all the requirements of the input module + // are compatbile with the target. + for (auto Aspect : ModuleReqs.Aspects) { + if (!TargetAspectValueSet.contains(Aspect)) + return false; + } + } + + // Check if module sub group size is compatible with the target. + if (ModuleReqs.SubGroupSize.has_value() && + !is_contained(TargetInfo.subGroupSizes, *ModuleReqs.SubGroupSize)) + return false; + + return true; +} + +std::vector> processInputModule(std::unique_ptr M) { // Construct the resulting table which will accumulate all the outputs. SmallVector ColumnTitles{ @@ -949,7 +1047,14 @@ processInputModule(std::unique_ptr M) { Expected> TableE = util::SimpleTable::create(ColumnTitles); CHECK_AND_EXIT(TableE.takeError()); - std::unique_ptr Table = std::move(TableE.get()); + std::vector> Tables; + for (auto OutputFile : OutputFiles) { + std::ignore = OutputFile; + Expected> TableE = + util::SimpleTable::create(ColumnTitles); + CHECK_AND_EXIT(TableE.takeError()); + Tables.push_back(std::move(TableE.get())); + } // Used in output filenames generation. int ID = 0; @@ -1049,14 +1154,14 @@ processInputModule(std::unique_ptr M) { "' can't be used"); } MMs.front().cleanup(); - saveModuleIR(MMs.front().getModule(), OutputFilename); - return Table; + saveModuleIR(MMs.front().getModule(), OutputFiles[0].Filename); + return Tables; } // Empty IR file name directs saveModule to generate one and save IR to // it: std::string OutIRFileName = ""; - if (!Modified && (OutputFilename.getNumOccurrences() == 0)) { + if (!Modified && (OutputFiles.getNumOccurrences() == 0)) { assert(!SplitOccurred); OutIRFileName = InputFilename; // ... non-empty means "skip IR writing" errs() << "sycl-post-link NOTE: no modifications to the input LLVM IR " @@ -1064,7 +1169,9 @@ processInputModule(std::unique_ptr M) { } for (module_split::ModuleDesc &IrMD : MMs) { IrPropSymFilenameTriple T = saveModule(IrMD, ID, OutIRFileName); - addTableRow(*Table, T); + for (const auto &[Table, OutputFile] : zip_equal(Tables, OutputFiles)) + if (isTargetCompatibleWithModule(OutputFile.Target, IrMD)) + addTableRow(*Table, T); } ++ID; @@ -1073,13 +1180,15 @@ processInputModule(std::unique_ptr M) { for (size_t i = 0; i != MMsWithDefaultSpecConsts.size(); ++i) { module_split::ModuleDesc &IrMD = MMsWithDefaultSpecConsts[i]; IrPropSymFilenameTriple T = saveModule(IrMD, ID, OutIRFileName); - addTableRow(*Table, T); + for (const auto &[Table, OutputFile] : zip_equal(Tables, OutputFiles)) + if (isTargetCompatibleWithModule(OutputFile.Target, IrMD)) + addTableRow(*Table, T); } ++ID; } } - return Table; + return Tables; } } // namespace @@ -1203,23 +1312,26 @@ int main(int argc, char **argv) { return 1; } - if (OutputFilename.getNumOccurrences() == 0) { - std::string S = + if (OutputFiles.getNumOccurrences() == 0) { + StringRef S = IROutputOnly ? (OutputAssembly ? ".out.ll" : "out.bc") : ".files"; - OutputFilename = (Twine(sys::path::stem(InputFilename)) + S).str(); + OutputFiles.push_back({{}, (sys::path::stem(InputFilename) + S).str()}); } - std::unique_ptr Table = processInputModule(std::move(M)); + std::vector> Tables = + processInputModule(std::move(M)); // Input module was processed and a single output file was requested. if (IROutputOnly) return 0; - // Emit the resulting table - std::error_code EC; - raw_fd_ostream Out{OutputFilename, EC, sys::fs::OF_None}; - checkError(EC, "error opening file '" + OutputFilename + "'"); - Table->write(Out); + // Emit the resulting tables + for (const auto &[Table, OutputFile] : zip_equal(Tables, OutputFiles)) { + std::error_code EC; + raw_fd_ostream Out{OutputFile.Filename, EC, sys::fs::OF_None}; + checkError(EC, "error opening file '" + OutputFile.Filename + "'"); + Table->write(Out); + } return 0; }