Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reland f9c8c01: [NVPTX] Aggressively try to replace image handles with references (#119730) #17592

Open
wants to merge 2 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
TargetMachine &TM = const_cast<TargetMachine &>(MF->getTarget());
NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine &>(TM);
const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
const char *Sym = MFI->getImageHandleSymbol(Index);
StringRef Sym = MFI->getImageHandleSymbol(Index);
StringRef SymName = nvTM.getStrPool().save(Sym);
MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
}
Expand All @@ -224,16 +224,13 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
return;
}

const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);

MCOperand MCOp;
if (!STI.hasImageHandles()) {
if (lowerImageHandleOperand(MI, i, MCOp)) {
OutMI.addOperand(MCOp);
continue;
}
if (lowerImageHandleOperand(MI, i, MCOp)) {
OutMI.addOperand(MCOp);
continue;
}

if (lowerOperand(MO, MCOp))
Expand Down Expand Up @@ -1509,13 +1506,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
const AttributeList &PAL = F->getAttributes();
const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
const NVPTXMachineFunctionInfo *MFI =
MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;

Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
bool first = true;
bool isKernelFunc = isKernelFunction(*F);
bool isABI = (STI.getSmVersion() >= 20);
bool hasImageHandles = STI.hasImageHandles();

if (F->arg_empty() && !F->isVarArg()) {
O << "()";
Expand All @@ -1533,25 +1531,30 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
first = false;

// Handle image/sampler parameters
if (isKernelFunction(*F)) {
if (isKernelFunc) {
if (isSampler(*I) || isImage(*I)) {
std::string ParamSym;
raw_string_ostream ParamStr(ParamSym);
ParamStr << F->getName() << "_param_" << paramIndex;
ParamStr.flush();
bool EmitImagePtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
if (isImage(*I)) {
if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
if (hasImageHandles)
if (EmitImagePtr)
O << "\t.param .u64 .ptr .surfref ";
else
O << "\t.param .surfref ";
O << TLI->getParamName(F, paramIndex);
}
else { // Default image is read_only
if (hasImageHandles)
if (EmitImagePtr)
O << "\t.param .u64 .ptr .texref ";
else
O << "\t.param .texref ";
O << TLI->getParamName(F, paramIndex);
}
} else {
if (hasImageHandles)
if (EmitImagePtr)
O << "\t.param .u64 .ptr .samplerref ";
else
O << "\t.param .samplerref ";
Expand Down
22 changes: 15 additions & 7 deletions llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H

#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineFunction.h"

namespace llvm {
class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
private:
/// Stores a mapping from index to symbol name for removing image handles
/// on Fermi.
/// Stores a mapping from index to symbol name for image handles that are
/// replaced with image references
SmallVector<std::string, 8> ImageHandleList;

public:
Expand All @@ -36,20 +37,27 @@ class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
/// Returns the index for the symbol \p Symbol. If the symbol was previously,
/// added, the same index is returned. Otherwise, the symbol is added and the
/// new index is returned.
unsigned getImageHandleSymbolIndex(const char *Symbol) {
unsigned getImageHandleSymbolIndex(StringRef Symbol) {
// Is the symbol already present?
for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i)
if (ImageHandleList[i] == std::string(Symbol))
if (ImageHandleList[i] == Symbol)
return i;
// Nope, insert it
ImageHandleList.push_back(Symbol);
ImageHandleList.push_back(Symbol.str());
return ImageHandleList.size()-1;
}

/// Returns the symbol name at the given index.
const char *getImageHandleSymbol(unsigned Idx) const {
StringRef getImageHandleSymbol(unsigned Idx) const {
assert(ImageHandleList.size() > Idx && "Bad index");
return ImageHandleList[Idx].c_str();
return ImageHandleList[Idx];
}

/// Check if the symbol has a mapping. Having a mapping means the handle is
/// replaced with a reference
bool checkImageHandleSymbol(StringRef Symbol) const {
return ImageHandleList.end() !=
std::find(ImageHandleList.begin(), ImageHandleList.end(), Symbol);
}
};
}
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1809,6 +1809,8 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg());

switch (TexHandleDef.getOpcode()) {
case NVPTX::LD_i64_asi:
case NVPTX::LD_i64_areg_64:
case NVPTX::LD_i64_avar: {
// The handle is a parameter value being loaded, replace with the
// parameter symbol
Expand All @@ -1830,7 +1832,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
NewSymStr << MF.getName() << "_param_" << Param;

InstrsToRemove.insert(&TexHandleDef);
Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str());
Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str());
return true;
}
case NVPTX::texsurf_handles: {
Expand All @@ -1839,7 +1841,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
assert(GV->hasName() && "Global sampler must be named!");
InstrsToRemove.insert(&TexHandleDef);
Idx = MFI->getImageHandleSymbolIndex(GV->getName().data());
Idx = MFI->getImageHandleSymbolIndex(GV->getName());
return true;
}
case NVPTX::nvvm_move_i64:
Expand Down
12 changes: 1 addition & 11 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const NVPTXTargetMachine &TM)
: NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
FullSmVersion(200), SmVersion(getSmVersion()), TM(TM),
FullSmVersion(200), SmVersion(getSmVersion()),
TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {
TSInfo = std::make_unique<NVPTXSelectionDAGInfo>();
}
Expand All @@ -66,16 +66,6 @@ const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const {
return TSInfo.get();
}

bool NVPTXSubtarget::hasImageHandles() const {
// Enable handles for Kepler+, where CUDA supports indirect surfaces and
// textures
if (TM.getDrvInterface() == NVPTX::CUDA)
return (SmVersion >= 30);

// Disabled, otherwise
return false;
}

bool NVPTXSubtarget::allowFP16Math() const {
return hasFP16Math() && NoF16Math == false;
}
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// FullSmVersion.
unsigned int SmVersion;

const NVPTXTargetMachine &TM;
NVPTXInstrInfo InstrInfo;
NVPTXTargetLowering TLInfo;
std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
Expand Down Expand Up @@ -82,7 +81,6 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasClusters() const { return SmVersion >= 90 && PTXVersion >= 78; }
bool hasLDG() const { return SmVersion >= 32; }
bool hasHWROT32() const { return SmVersion >= 32; }
bool hasImageHandles() const;
bool hasFP16Math() const { return SmVersion >= 53; }
bool hasBF16Math() const { return SmVersion >= 80; }
bool allowFP16Math() const;
Expand Down
6 changes: 1 addition & 5 deletions llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -440,14 +440,10 @@ void NVPTXPassConfig::addIRPasses() {
}

bool NVPTXPassConfig::addInstSelector() {
const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();

addPass(createLowerAggrCopies());
addPass(createAllocaHoisting());
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));

if (!ST.hasImageHandles())
addPass(createNVPTXReplaceImageHandlesPass());
addPass(createNVPTXReplaceImageHandlesPass());

return false;
}
Expand Down
53 changes: 32 additions & 21 deletions llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %}

Expand All @@ -9,38 +10,48 @@ declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))


; SM20-LABEL: .entry foo
; SM30-LABEL: .entry foo
define ptx_kernel void @foo(i64 %img, ptr %red, i32 %idx) {
; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
; CHECK-LABEL: foo(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .f32 %f<2>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0];
; CHECK-NEXT: ld.param.u64 %rd2, [foo_param_1];
; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2;
; CHECK-NEXT: ld.param.u32 %r1, [foo_param_2];
; CHECK-NEXT: suld.b.1d.b32.trap {%r2}, [%rd1, {%r1}];
; CHECK-NEXT: cvt.rn.f32.s32 %f1, %r2;
; CHECK-NEXT: st.global.f32 [%rd3], %f1;
; CHECK-NEXT: ret;
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
store float %ret, ptr %red
ret void
}

@surf0 = internal addrspace(1) global i64 0, align 8

; SM20-LABEL: .entry bar
; SM30-LABEL: .entry bar
define ptx_kernel void @bar(ptr %red, i32 %idx) {
; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
; CHECK-LABEL: bar(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .f32 %f<2>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [bar_param_0];
; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1;
; CHECK-NEXT: ld.param.u32 %r1, [bar_param_1];
; CHECK-NEXT: suld.b.1d.b32.trap {%r2}, [surf0, {%r1}];
; CHECK-NEXT: cvt.rn.f32.s32 %f1, %r2;
; CHECK-NEXT: st.global.f32 [%rd2], %f1;
; CHECK-NEXT: ret;
%surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [surf0, {%r{{[0-9]+}}}]
; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFHANDLE]], {%r{{[0-9]+}}}]
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx)
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
store float %ret, ptr %red
ret void
}
Expand Down
24 changes: 6 additions & 18 deletions llvm/test/CodeGen/NVPTX/surf-tex.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll --check-prefixes=CHECK,CHECK-CUDA
# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
# RUN: %if ptxas %{ llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}

# We only need to run this second time for texture tests, because
# there is a difference between unified and non-unified intrinsics.
#
# RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll --check-prefixes=CHECK,CHECK-NVCL
# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll
# RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %}

# Verify that all instructions and intrinsics defined in TableGen
Expand Down Expand Up @@ -264,9 +264,7 @@ def gen_suld_tests(target, global_surf):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
; CHECK-CUDA: ${instruction} ${reg_ret}, [[[REG${reg_id}]], ${reg_access}]
; CHECK-NVCL: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) {
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
%val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
Expand Down Expand Up @@ -309,7 +307,6 @@ def gen_suld_tests(target, global_surf):
"reg_ret": get_ptx_vec_reg(vec, dtype),
"reg_surf": get_ptx_surface(target),
"reg_access": get_ptx_surface_access(geom),
"reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
Expand Down Expand Up @@ -359,9 +356,7 @@ def gen_sust_tests(target, global_surf):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
; CHECK-CUDA: ${instruction} [[[REG${reg_id}]], ${reg_access}], ${reg_value}
; CHECK-NVCL: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
define ptx_kernel void @${test_name}_global(${value}, ${access}) {
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
Expand Down Expand Up @@ -415,7 +410,6 @@ def gen_sust_tests(target, global_surf):
"reg_value": get_ptx_vec_reg(vec, ctype),
"reg_surf": get_ptx_surface(target),
"reg_access": get_ptx_surface_access(geom),
"reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
Expand Down Expand Up @@ -611,9 +605,7 @@ def gen_tex_tests(target, global_tex, global_sampler):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) {
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
${get_sampler_handle}
Expand Down Expand Up @@ -697,7 +689,6 @@ def gen_tex_tests(target, global_tex, global_sampler):
"ptx_tex": get_ptx_texture(target),
"ptx_access": get_ptx_texture_access(geom, ctype),
"ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
"reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
Expand Down Expand Up @@ -798,9 +789,7 @@ def gen_tld4_tests(target, global_tex, global_sampler):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) {
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
${get_sampler_handle}
Expand Down Expand Up @@ -846,7 +835,6 @@ def gen_tld4_tests(target, global_tex, global_sampler):
"ptx_tex": get_ptx_texture(target),
"ptx_access": get_ptx_tld4_access(geom),
"ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
"reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
Expand Down
Loading
Loading