diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 7cac4d787778f..cb756246b8d11 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -209,7 +209,7 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { TargetMachine &TM = const_cast(MF->getTarget()); NVPTXTargetMachine &nvTM = static_cast(TM); const NVPTXMachineFunctionInfo *MFI = MF->getInfo(); - const char *Sym = MFI->getImageHandleSymbol(Index); + StringRef Sym = MFI->getImageHandleSymbol(Index); StringRef SymName = nvTM.getStrPool().save(Sym); MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName)); } @@ -224,16 +224,13 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { return; } - const NVPTXSubtarget &STI = MI->getMF()->getSubtarget(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCOp; - if (!STI.hasImageHandles()) { - if (lowerImageHandleOperand(MI, i, MCOp)) { - OutMI.addOperand(MCOp); - continue; - } + if (lowerImageHandleOperand(MI, i, MCOp)) { + OutMI.addOperand(MCOp); + continue; } if (lowerOperand(MO, MCOp)) @@ -1509,13 +1506,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { const AttributeList &PAL = F->getAttributes(); const NVPTXSubtarget &STI = TM.getSubtarget(*F); const auto *TLI = cast(STI.getTargetLowering()); + const NVPTXMachineFunctionInfo *MFI = + MF ? MF->getInfo() : nullptr; Function::const_arg_iterator I, E; unsigned paramIndex = 0; bool first = true; bool isKernelFunc = isKernelFunction(*F); bool isABI = (STI.getSmVersion() >= 20); - bool hasImageHandles = STI.hasImageHandles(); if (F->arg_empty() && !F->isVarArg()) { O << "()"; @@ -1533,25 +1531,30 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { first = false; // Handle image/sampler parameters - if (isKernelFunction(*F)) { + if (isKernelFunc) { if (isSampler(*I) || isImage(*I)) { + std::string ParamSym; + raw_string_ostream ParamStr(ParamSym); + ParamStr << F->getName() << "_param_" << paramIndex; + ParamStr.flush(); + bool EmitImagePtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym); if (isImage(*I)) { if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { - if (hasImageHandles) + if (EmitImagePtr) O << "\t.param .u64 .ptr .surfref "; else O << "\t.param .surfref "; O << TLI->getParamName(F, paramIndex); } else { // Default image is read_only - if (hasImageHandles) + if (EmitImagePtr) O << "\t.param .u64 .ptr .texref "; else O << "\t.param .texref "; O << TLI->getParamName(F, paramIndex); } } else { - if (hasImageHandles) + if (EmitImagePtr) O << "\t.param .u64 .ptr .samplerref "; else O << "\t.param .samplerref "; diff --git a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h index 77426f7f6da71..6670cb296f216 100644 --- a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -14,13 +14,14 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineFunction.h" namespace llvm { class NVPTXMachineFunctionInfo : public MachineFunctionInfo { private: - /// Stores a mapping from index to symbol name for removing image handles - /// on Fermi. + /// Stores a mapping from index to symbol name for image handles that are + /// replaced with image references SmallVector ImageHandleList; public: @@ -36,20 +37,27 @@ class NVPTXMachineFunctionInfo : public MachineFunctionInfo { /// Returns the index for the symbol \p Symbol. If the symbol was previously, /// added, the same index is returned. Otherwise, the symbol is added and the /// new index is returned. - unsigned getImageHandleSymbolIndex(const char *Symbol) { + unsigned getImageHandleSymbolIndex(StringRef Symbol) { // Is the symbol already present? for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i) - if (ImageHandleList[i] == std::string(Symbol)) + if (ImageHandleList[i] == Symbol) return i; // Nope, insert it - ImageHandleList.push_back(Symbol); + ImageHandleList.push_back(Symbol.str()); return ImageHandleList.size()-1; } /// Returns the symbol name at the given index. - const char *getImageHandleSymbol(unsigned Idx) const { + StringRef getImageHandleSymbol(unsigned Idx) const { assert(ImageHandleList.size() > Idx && "Bad index"); - return ImageHandleList[Idx].c_str(); + return ImageHandleList[Idx]; + } + + /// Check if the symbol has a mapping. Having a mapping means the handle is + /// replaced with a reference + bool checkImageHandleSymbol(StringRef Symbol) const { + return ImageHandleList.end() != + std::find(ImageHandleList.begin(), ImageHandleList.end(), Symbol); } }; } diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index f66504b09cb63..d740df59a3dde 100644 --- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -1809,6 +1809,8 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg()); switch (TexHandleDef.getOpcode()) { + case NVPTX::LD_i64_asi: + case NVPTX::LD_i64_areg_64: case NVPTX::LD_i64_avar: { // The handle is a parameter value being loaded, replace with the // parameter symbol @@ -1830,7 +1832,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { NewSymStr << MF.getName() << "_param_" << Param; InstrsToRemove.insert(&TexHandleDef); - Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()); + Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str()); return true; } case NVPTX::texsurf_handles: { @@ -1839,7 +1841,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal(); assert(GV->hasName() && "Global sampler must be named!"); InstrsToRemove.insert(&TexHandleDef); - Idx = MFI->getImageHandleSymbolIndex(GV->getName().data()); + Idx = MFI->getImageHandleSymbolIndex(GV->getName()); return true; } case NVPTX::nvvm_move_i64: diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index b953c173afa69..e5d680c19d921 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -55,7 +55,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const NVPTXTargetMachine &TM) : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0), - FullSmVersion(200), SmVersion(getSmVersion()), TM(TM), + FullSmVersion(200), SmVersion(getSmVersion()), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) { TSInfo = std::make_unique(); } @@ -66,16 +66,6 @@ const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const { return TSInfo.get(); } -bool NVPTXSubtarget::hasImageHandles() const { - // Enable handles for Kepler+, where CUDA supports indirect surfaces and - // textures - if (TM.getDrvInterface() == NVPTX::CUDA) - return (SmVersion >= 30); - - // Disabled, otherwise - return false; -} - bool NVPTXSubtarget::allowFP16Math() const { return hasFP16Math() && NoF16Math == false; } diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 4ae1204881e6c..997ef1b5810ff 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -42,7 +42,6 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { // FullSmVersion. unsigned int SmVersion; - const NVPTXTargetMachine &TM; NVPTXInstrInfo InstrInfo; NVPTXTargetLowering TLInfo; std::unique_ptr TSInfo; @@ -82,7 +81,6 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { bool hasClusters() const { return SmVersion >= 90 && PTXVersion >= 78; } bool hasLDG() const { return SmVersion >= 32; } bool hasHWROT32() const { return SmVersion >= 32; } - bool hasImageHandles() const; bool hasFP16Math() const { return SmVersion >= 53; } bool hasBF16Math() const { return SmVersion >= 80; } bool allowFP16Math() const; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 7a3ea204d0531..d259dbbb72c0a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -440,14 +440,10 @@ void NVPTXPassConfig::addIRPasses() { } bool NVPTXPassConfig::addInstSelector() { - const NVPTXSubtarget &ST = *getTM().getSubtargetImpl(); - addPass(createLowerAggrCopies()); addPass(createAllocaHoisting()); addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); - - if (!ST.hasImageHandles()) - addPass(createNVPTXReplaceImageHandlesPass()); + addPass(createNVPTXReplaceImageHandlesPass()); return false; } diff --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll index 2d038238e2d03..7a7904a2f0425 100644 --- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} @@ -9,38 +10,48 @@ declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32) declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1)) -; SM20-LABEL: .entry foo -; SM30-LABEL: .entry foo define ptx_kernel void @foo(i64 %img, ptr %red, i32 %idx) { -; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0]; -; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}] -; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0]; -; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}] +; CHECK-LABEL: foo( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0]; +; CHECK-NEXT: ld.param.u64 %rd2, [foo_param_1]; +; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2; +; CHECK-NEXT: ld.param.u32 %r1, [foo_param_2]; +; CHECK-NEXT: suld.b.1d.b32.trap {%r2}, [%rd1, {%r1}]; +; CHECK-NEXT: cvt.rn.f32.s32 %f1, %r2; +; CHECK-NEXT: st.global.f32 [%rd3], %f1; +; CHECK-NEXT: ret; %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx) -; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] -; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] %ret = sitofp i32 %val to float -; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]] -; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]] store float %ret, ptr %red ret void } @surf0 = internal addrspace(1) global i64 0, align 8 -; SM20-LABEL: .entry bar -; SM30-LABEL: .entry bar define ptx_kernel void @bar(ptr %red, i32 %idx) { -; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0 +; CHECK-LABEL: bar( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [bar_param_0]; +; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1; +; CHECK-NEXT: ld.param.u32 %r1, [bar_param_1]; +; CHECK-NEXT: suld.b.1d.b32.trap {%r2}, [surf0, {%r1}]; +; CHECK-NEXT: cvt.rn.f32.s32 %f1, %r2; +; CHECK-NEXT: st.global.f32 [%rd2], %f1; +; CHECK-NEXT: ret; %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0) -; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [surf0, {%r{{[0-9]+}}}] -; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFHANDLE]], {%r{{[0-9]+}}}] %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx) -; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] -; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] %ret = sitofp i32 %val to float -; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]] -; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]] store float %ret, ptr %red ret void } diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py index ba4b4787dce55..90d67666f1ed6 100644 --- a/llvm/test/CodeGen/NVPTX/surf-tex.py +++ b/llvm/test/CodeGen/NVPTX/surf-tex.py @@ -1,12 +1,12 @@ # RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll -# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll --check-prefixes=CHECK,CHECK-CUDA +# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll # RUN: %if ptxas %{ llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %} # We only need to run this second time for texture tests, because # there is a difference between unified and non-unified intrinsics. # # RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll -# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll --check-prefixes=CHECK,CHECK-NVCL +# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll # RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %} # Verify that all instructions and intrinsics defined in TableGen @@ -264,9 +264,7 @@ def gen_suld_tests(target, global_surf): ret void } ; CHECK-LABEL: .entry ${test_name}_global - ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf} - ; CHECK-CUDA: ${instruction} ${reg_ret}, [[[REG${reg_id}]], ${reg_access}] - ; CHECK-NVCL: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}] + ; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}] define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) { %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf}) %val = tail call ${retty} @${intrinsic}(i64 %gs, ${access}) @@ -309,7 +307,6 @@ def gen_suld_tests(target, global_surf): "reg_ret": get_ptx_vec_reg(vec, dtype), "reg_surf": get_ptx_surface(target), "reg_access": get_ptx_surface_access(geom), - "reg_id": get_table_gen_id(), } gen_test(template, params) generated_items.append((params["intrinsic"], params["instruction"])) @@ -359,9 +356,7 @@ def gen_sust_tests(target, global_surf): ret void } ; CHECK-LABEL: .entry ${test_name}_global - ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf} - ; CHECK-CUDA: ${instruction} [[[REG${reg_id}]], ${reg_access}], ${reg_value} - ; CHECK-NVCL: ${instruction} [${global_surf}, ${reg_access}], ${reg_value} + ; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value} define ptx_kernel void @${test_name}_global(${value}, ${access}) { %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf}) tail call void @${intrinsic}(i64 %gs, ${access}, ${value}) @@ -415,7 +410,6 @@ def gen_sust_tests(target, global_surf): "reg_value": get_ptx_vec_reg(vec, ctype), "reg_surf": get_ptx_surface(target), "reg_access": get_ptx_surface_access(geom), - "reg_id": get_table_gen_id(), } gen_test(template, params) generated_items.append((params["intrinsic"], params["instruction"])) @@ -611,9 +605,7 @@ def gen_tex_tests(target, global_tex, global_sampler): ret void } ; CHECK-LABEL: .entry ${test_name}_global - ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex} - ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}] - ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] + ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) { %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex}) ${get_sampler_handle} @@ -697,7 +689,6 @@ def gen_tex_tests(target, global_tex, global_sampler): "ptx_tex": get_ptx_texture(target), "ptx_access": get_ptx_texture_access(geom, ctype), "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler), - "reg_id": get_table_gen_id(), } gen_test(template, params) generated_items.append((params["intrinsic"], params["instruction"])) @@ -798,9 +789,7 @@ def gen_tld4_tests(target, global_tex, global_sampler): ret void } ; CHECK-LABEL: .entry ${test_name}_global - ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex} - ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}] - ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] + ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) { %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex}) ${get_sampler_handle} @@ -846,7 +835,6 @@ def gen_tld4_tests(target, global_tex, global_sampler): "ptx_tex": get_ptx_texture(target), "ptx_access": get_ptx_tld4_access(geom), "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler), - "reg_id": get_table_gen_id(), } gen_test(template, params) generated_items.append((params["intrinsic"], params["instruction"])) diff --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll index d051e5a746b73..5dc44cb1925b0 100644 --- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} @@ -9,13 +10,18 @@ declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32) declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1)) -; SM20-LABEL: .entry foo -; SM30-LABEL: .entry foo define ptx_kernel void @foo(i64 %img, i32 %val, i32 %idx) { -; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0]; -; SM20: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}} -; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0]; -; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}} +; CHECK-LABEL: foo( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0]; +; CHECK-NEXT: ld.param.u32 %r1, [foo_param_1]; +; CHECK-NEXT: ld.param.u32 %r2, [foo_param_2]; +; CHECK-NEXT: sust.b.1d.b32.trap [%rd1, {%r2}], {%r1}; +; CHECK-NEXT: ret; tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val) ret void } @@ -24,14 +30,18 @@ define ptx_kernel void @foo(i64 %img, i32 %val, i32 %idx) { @surf0 = internal addrspace(1) global i64 0, align 8 - -; SM20-LABEL: .entry bar -; SM30-LABEL: .entry bar define ptx_kernel void @bar(i32 %val, i32 %idx) { -; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0 +; CHECK-LABEL: bar( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [bar_param_0]; +; CHECK-NEXT: ld.param.u32 %r2, [bar_param_1]; +; CHECK-NEXT: sust.b.1d.b32.trap [surf0, {%r2}], {%r1}; +; CHECK-NEXT: ret; %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0) -; SM20: sust.b.1d.b32.trap [surf0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}} -; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}} tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %surfHandle, i32 %idx, i32 %val) ret void } diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll index 42a46f89d04b5..61837bde82ece 100644 --- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} @@ -9,17 +10,23 @@ target triple = "nvptx-unknown-cuda" declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32) declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1)) -; SM20-LABEL: .entry foo -; SM30-LABEL: .entry foo define ptx_kernel void @foo(i64 %img, ptr %red, i32 %idx) { -; SM20: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0]; -; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}] -; SM30: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0]; -; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}] +; CHECK-LABEL: foo( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<5>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0]; +; CHECK-NEXT: ld.param.u64 %rd2, [foo_param_1]; +; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2; +; CHECK-NEXT: ld.param.u32 %r1, [foo_param_2]; +; CHECK-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}]; +; CHECK-NEXT: st.global.f32 [%rd3], %f1; +; CHECK-NEXT: ret; %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx) %ret = extractvalue { float, float, float, float } %val, 0 -; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]] -; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]] store float %ret, ptr %red ret void } @@ -27,44 +34,61 @@ define ptx_kernel void @foo(i64 %img, ptr %red, i32 %idx) { @tex0 = internal addrspace(1) global i64 0, align 8 -; SM20-LABEL: .entry bar -; SM30-LABEL: .entry bar define ptx_kernel void @bar(ptr %red, i32 %idx) { -; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0 +; CHECK-LABEL: bar( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<5>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [bar_param_0]; +; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1; +; CHECK-NEXT: ld.param.u32 %r1, [bar_param_1]; +; CHECK-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}]; +; CHECK-NEXT: st.global.f32 [%rd2], %f1; +; CHECK-NEXT: ret; %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0) -; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}] -; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}] %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx) %ret = extractvalue { float, float, float, float } %val, 0 -; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]] -; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]] store float %ret, ptr %red ret void } declare float @texfunc(i64) -; SM20-LABEL: .entry baz -; SM30-LABEL: .entry baz define ptx_kernel void @baz(ptr %red, i32 %idx) { -; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0 +; CHECK-LABEL: baz( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<8>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [baz_param_0]; +; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1; +; CHECK-NEXT: ld.param.u32 %r1, [baz_param_1]; +; CHECK-NEXT: mov.u64 %rd3, tex0; +; CHECK-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}]; +; CHECK-NEXT: { // callseq 0, 0 +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: st.param.b64 [param0], %rd3; +; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: call.uni (retval0), +; CHECK-NEXT: texfunc, +; CHECK-NEXT: ( +; CHECK-NEXT: param0 +; CHECK-NEXT: ); +; CHECK-NEXT: ld.param.f32 %f5, [retval0]; +; CHECK-NEXT: } // callseq 0 +; CHECK-NEXT: add.rn.f32 %f7, %f1, %f5; +; CHECK-NEXT: st.global.f32 [%rd2], %f7; +; CHECK-NEXT: ret; %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0) -; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}] -; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}] %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx) %ret = extractvalue { float, float, float, float } %val, 0 -; SM20: call.uni ([[RETVAL:.*]]), -; SM30: call.uni ([[RETVAL:.*]]), -; SM20: texfunc, -; SM30: texfunc, %texcall = tail call float @texfunc(i64 %texHandle) -; SM20: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]] -; SM30: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]] -; SM20: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]] -; SM30: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]] %ret2 = fadd float %ret, %texcall -; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RET2]] -; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RET2]] store float %ret2, ptr %red ret void } diff --git a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll index a096e9c0aab7a..c9f9ccca82c6f 100644 --- a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll +++ b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20 -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %} @@ -15,85 +16,125 @@ declare i32 @llvm.nvvm.suq.height(i64) declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1)) -; SM20-LABEL: @t0 -; SM30-LABEL: @t0 define i32 @t0(i64 %texHandle) { -; SM20: txq.width.b32 -; SM30: txq.width.b32 +; CHECK-LABEL: t0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [t0_param_0]; +; CHECK-NEXT: txq.width.b32 %r1, [%rd1]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %width = tail call i32 @llvm.nvvm.txq.width(i64 %texHandle) ret i32 %width } -; SM20-LABEL: @t1 -; SM30-LABEL: @t1 define i32 @t1() { -; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], tex0 +; CHECK-LABEL: t1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: txq.width.b32 %r1, [tex0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0) -; SM20: txq.width.b32 %r{{[0-9]+}}, [tex0] -; SM30: txq.width.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]] %width = tail call i32 @llvm.nvvm.txq.width(i64 %texHandle) ret i32 %width } -; SM20-LABEL: @t2 -; SM30-LABEL: @t2 define i32 @t2(i64 %texHandle) { -; SM20: txq.height.b32 -; SM30: txq.height.b32 +; CHECK-LABEL: t2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [t2_param_0]; +; CHECK-NEXT: txq.height.b32 %r1, [%rd1]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %height = tail call i32 @llvm.nvvm.txq.height(i64 %texHandle) ret i32 %height } -; SM20-LABEL: @t3 -; SM30-LABEL: @t3 define i32 @t3() { -; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], tex0 +; CHECK-LABEL: t3( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: txq.height.b32 %r1, [tex0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0) -; SM20: txq.height.b32 %r{{[0-9]+}}, [tex0] -; SM30: txq.height.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]] %height = tail call i32 @llvm.nvvm.txq.height(i64 %texHandle) ret i32 %height } -; SM20-LABEL: @s0 -; SM30-LABEL: @s0 define i32 @s0(i64 %surfHandle) { -; SM20: suq.width.b32 -; SM30: suq.width.b32 +; CHECK-LABEL: s0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [s0_param_0]; +; CHECK-NEXT: suq.width.b32 %r1, [%rd1]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %width = tail call i32 @llvm.nvvm.suq.width(i64 %surfHandle) ret i32 %width } -; SM20-LABEL: @s1 -; SM30-LABEL: @s1 define i32 @s1() { -; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], surf0 +; CHECK-LABEL: s1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: suq.width.b32 %r1, [surf0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0) -; SM20: suq.width.b32 %r{{[0-9]+}}, [surf0] -; SM30: suq.width.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]] %width = tail call i32 @llvm.nvvm.suq.width(i64 %surfHandle) ret i32 %width } -; SM20-LABEL: @s2 -; SM30-LABEL: @s2 define i32 @s2(i64 %surfHandle) { -; SM20: suq.height.b32 -; SM30: suq.height.b32 +; CHECK-LABEL: s2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [s2_param_0]; +; CHECK-NEXT: suq.height.b32 %r1, [%rd1]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %height = tail call i32 @llvm.nvvm.suq.height(i64 %surfHandle) ret i32 %height } -; SM20-LABEL: @s3 -; SM30-LABEL: @s3 define i32 @s3() { -; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], surf0 +; CHECK-LABEL: s3( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: suq.height.b32 %r1, [surf0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0) -; SM20: suq.height.b32 %r{{[0-9]+}}, [surf0] -; SM30: suq.height.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]] %height = tail call i32 @llvm.nvvm.suq.height(i64 %surfHandle) ret i32 %height }