-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[AMDGPU] Add NoaliasAddrSpace to AAMDnodes #149247
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-amdgpu Author: None (Shoreshen) ChangesThis is the following PR of #136553 which calculate NoaliasAddrSpace. This PR carries the info calculated into MIR by adding it into AAMDnodes Full diff: https://github.com/llvm/llvm-project/pull/149247.diff 6 Files Affected:
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 2de26c0c1f7c7..959003eff860e 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -759,18 +759,18 @@ class MDString : public Metadata {
/// memory access used by the alias-analysis infrastructure.
struct AAMDNodes {
explicit AAMDNodes() = default;
- explicit AAMDNodes(MDNode *T, MDNode *TS, MDNode *S, MDNode *N)
- : TBAA(T), TBAAStruct(TS), Scope(S), NoAlias(N) {}
+ explicit AAMDNodes(MDNode *T, MDNode *TS, MDNode *S, MDNode *N, MDNode *NAS)
+ : TBAA(T), TBAAStruct(TS), Scope(S), NoAlias(N), NoAliasAddrSpace(NAS) {}
bool operator==(const AAMDNodes &A) const {
return TBAA == A.TBAA && TBAAStruct == A.TBAAStruct && Scope == A.Scope &&
- NoAlias == A.NoAlias;
+ NoAlias == A.NoAlias && NoAliasAddrSpace == A.NoAliasAddrSpace;
}
bool operator!=(const AAMDNodes &A) const { return !(*this == A); }
explicit operator bool() const {
- return TBAA || TBAAStruct || Scope || NoAlias;
+ return TBAA || TBAAStruct || Scope || NoAlias || NoAliasAddrSpace;
}
/// The tag for type-based alias analysis.
@@ -785,6 +785,9 @@ struct AAMDNodes {
/// The tag specifying the noalias scope.
MDNode *NoAlias = nullptr;
+ /// The tag specifying the noalias address space scope.
+ MDNode *NoAliasAddrSpace = nullptr;
+
// Shift tbaa Metadata node to start off bytes later
LLVM_ABI static MDNode *shiftTBAA(MDNode *M, size_t off);
@@ -806,6 +809,8 @@ struct AAMDNodes {
Result.TBAAStruct = Other.TBAAStruct == TBAAStruct ? TBAAStruct : nullptr;
Result.Scope = Other.Scope == Scope ? Scope : nullptr;
Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr;
+ Result.NoAliasAddrSpace =
+ Other.NoAliasAddrSpace == NoAliasAddrSpace ? NoAliasAddrSpace : nullptr;
return Result;
}
@@ -818,6 +823,7 @@ struct AAMDNodes {
TBAAStruct ? shiftTBAAStruct(TBAAStruct, Offset) : nullptr;
Result.Scope = Scope;
Result.NoAlias = NoAlias;
+ Result.NoAliasAddrSpace = NoAliasAddrSpace;
return Result;
}
@@ -833,6 +839,7 @@ struct AAMDNodes {
Result.TBAAStruct = TBAAStruct;
Result.Scope = Scope;
Result.NoAlias = NoAlias;
+ Result.NoAliasAddrSpace = NoAliasAddrSpace;
return Result;
}
@@ -860,12 +867,12 @@ struct AAMDNodes {
template<>
struct DenseMapInfo<AAMDNodes> {
static inline AAMDNodes getEmptyKey() {
- return AAMDNodes(DenseMapInfo<MDNode *>::getEmptyKey(),
- nullptr, nullptr, nullptr);
+ return AAMDNodes(DenseMapInfo<MDNode *>::getEmptyKey(), nullptr, nullptr,
+ nullptr, nullptr);
}
static inline AAMDNodes getTombstoneKey() {
- return AAMDNodes(DenseMapInfo<MDNode *>::getTombstoneKey(),
+ return AAMDNodes(DenseMapInfo<MDNode *>::getTombstoneKey(), nullptr,
nullptr, nullptr, nullptr);
}
@@ -873,7 +880,8 @@ struct DenseMapInfo<AAMDNodes> {
return DenseMapInfo<MDNode *>::getHashValue(Val.TBAA) ^
DenseMapInfo<MDNode *>::getHashValue(Val.TBAAStruct) ^
DenseMapInfo<MDNode *>::getHashValue(Val.Scope) ^
- DenseMapInfo<MDNode *>::getHashValue(Val.NoAlias);
+ DenseMapInfo<MDNode *>::getHashValue(Val.NoAlias) ^
+ DenseMapInfo<MDNode *>::getHashValue(Val.NoAliasAddrSpace);
}
static bool isEqual(const AAMDNodes &LHS, const AAMDNodes &RHS) {
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index c871070eb037e..7025b8354564a 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -525,6 +525,8 @@ AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const {
Result.TBAAStruct = nullptr;
Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
+ Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace(
+ NoAliasAddrSpace, Other.NoAliasAddrSpace);
return Result;
}
@@ -533,6 +535,8 @@ AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const {
Result.TBAA = Result.TBAAStruct = nullptr;
Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
+ Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace(
+ NoAliasAddrSpace, Other.NoAliasAddrSpace);
return Result;
}
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 0d251697f2567..c612f8de7b50b 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -1273,6 +1273,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << ", !noalias ";
AAInfo.NoAlias->printAsOperand(OS, MST);
}
+ if (AAInfo.NoAliasAddrSpace) {
+ OS << ", !noalias.addrspace ";
+ AAInfo.NoAliasAddrSpace->printAsOperand(OS, MST);
+ }
if (getRanges()) {
OS << ", !range ";
getRanges()->printAsOperand(OS, MST);
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index f0448b06e7e82..0961b3eaf56e3 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -1778,6 +1778,7 @@ AAMDNodes Instruction::getAAMetadata() const {
Result.TBAAStruct = Info.lookup(LLVMContext::MD_tbaa_struct);
Result.Scope = Info.lookup(LLVMContext::MD_alias_scope);
Result.NoAlias = Info.lookup(LLVMContext::MD_noalias);
+ Result.NoAliasAddrSpace = Info.lookup(LLVMContext::MD_noalias_addrspace);
}
return Result;
}
@@ -1787,6 +1788,7 @@ void Instruction::setAAMetadata(const AAMDNodes &N) {
setMetadata(LLVMContext::MD_tbaa_struct, N.TBAAStruct);
setMetadata(LLVMContext::MD_alias_scope, N.Scope);
setMetadata(LLVMContext::MD_noalias, N.NoAlias);
+ setMetadata(LLVMContext::MD_noalias_addrspace, N.NoAliasAddrSpace);
}
void Instruction::setNoSanitizeMetadata() {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
index 340e293cda7b5..6ee0c74863770 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
@@ -12,7 +12,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da
; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+ ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
; GFX942-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
@@ -23,7 +23,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+ ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
; GFX11-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data)
ret void
@@ -38,7 +38,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+ ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
;
@@ -50,7 +50,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+ ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
index c82ae2fbcbbdc..bf3697924c22c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
@@ -13,7 +13,7 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %d
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
+ ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, !noalias.addrspace !0)
; GFX90A_GFX942-NEXT: S_ENDPGM 0
%ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret void
@@ -30,7 +30,7 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
+ ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, !noalias.addrspace !0)
; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull Request Overview
This PR adds NoaliasAddrSpace metadata support to AAMDNodes, extending the existing alias analysis infrastructure to carry noalias address space information through the MIR (Machine Intermediate Representation). This follows a previous PR that calculated NoaliasAddrSpace.
- Updates the AAMDNodes structure to include NoAliasAddrSpace field
- Integrates NoAliasAddrSpace metadata handling in instruction metadata operations
- Updates test expectations to reflect the new !noalias.addrspace metadata in generated MIR
Reviewed Changes
Copilot reviewed 6 out of 6 changed files in this pull request and generated 1 comment.
Show a summary per file
File | Description |
---|---|
llvm/include/llvm/IR/Metadata.h | Adds NoAliasAddrSpace field to AAMDNodes struct and updates related operations |
llvm/lib/IR/Metadata.cpp | Updates instruction metadata getters/setters to handle NoAliasAddrSpace |
llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | Updates merge and concat operations to handle NoAliasAddrSpace metadata |
llvm/lib/CodeGen/MachineOperand.cpp | Adds printing support for !noalias.addrspace metadata in machine operands |
llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll | Updates test expectations to include !noalias.addrspace metadata |
llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll | Updates test expectations to include !noalias.addrspace metadata |
@@ -759,18 +759,18 @@ class MDString : public Metadata { | |||
/// memory access used by the alias-analysis infrastructure. | |||
struct AAMDNodes { | |||
explicit AAMDNodes() = default; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The constructor signature change is a breaking change that removes the 4-parameter constructor. Consider providing both constructors for backward compatibility, with the 4-parameter version defaulting NoAliasAddrSpace to nullptr.
explicit AAMDNodes() = default; | |
explicit AAMDNodes() = default; | |
explicit AAMDNodes(MDNode *T, MDNode *TS, MDNode *S, MDNode *N) | |
: AAMDNodes(T, TS, S, N, nullptr) {} |
Copilot uses AI. Check for mistakes.
This is the following PR of #136553 which calculate NoaliasAddrSpace.
This PR carries the info calculated into MIR by adding it into AAMDnodes