From 604988c69b59c61bf35526d945f8a7fa8e9af434 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler <robert.imschweiler@amd.com> Date: Thu, 27 Mar 2025 17:54:38 -0500 Subject: [PATCH 1/3] [GlobalISel]: G_UNMERGE_VALUES for vectors with different element sizes This commit adds support for using different source and destination vector element sizes for G_UNMERGE_VALUES, e.g.: `%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)` This LLVM defect was identified via the AMD Fuzzing project. --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 32 ++-- llvm/lib/CodeGen/MachineVerifier.cpp | 10 +- .../AMDGPU/GlobalISel/insertelement.ll | 55 +++++++ .../GlobalISel/legalize-unmerge-values.mir | 155 ++++++++++++++++++ 4 files changed, 236 insertions(+), 16 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index a9f80860124fb..4fcad22587f66 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -8281,9 +8281,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { const unsigned NumDst = MI.getNumOperands() - 1; Register SrcReg = MI.getOperand(NumDst).getReg(); - Register Dst0Reg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(Dst0Reg); - if (DstTy.isPointer()) + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + if (DstTy.getScalarType().isPointer()) return UnableToLegalize; // TODO SrcReg = coerceToScalar(SrcReg); @@ -8293,14 +8292,25 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { // Expand scalarizing unmerge as bitcast to integer and shift. LLT IntTy = MRI.getType(SrcReg); - MIRBuilder.buildTrunc(Dst0Reg, SrcReg); - - const unsigned DstSize = DstTy.getSizeInBits(); - unsigned Offset = DstSize; - for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { - auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); - auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt); - MIRBuilder.buildTrunc(MI.getOperand(I), Shift); + const unsigned DstSize = DstTy.getScalarSizeInBits(); + SmallVector<Register> VectorElems; + Register Shift; + for (unsigned I = 0, Offset = 0; I != NumDst; Offset += DstSize) { + if (Offset) { + auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); + Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt).getReg(0); + } else { + Shift = SrcReg; + } + if (DstTy.isVector()) { + VectorElems.emplace_back(MIRBuilder.buildTrunc(DstTy.getScalarType(), Shift).getReg(0)); + if (VectorElems.size() == DstTy.getNumElements()) { + MIRBuilder.buildBuildVector(MI.getOperand(I++), VectorElems); + VectorElems.clear(); + } + } else { + MIRBuilder.buildTrunc(MI.getOperand(I++), Shift); + } } MI.eraseFromParent(); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index a7dbceb88c4c8..7cbf41038f6e4 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1510,11 +1510,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { LLT SrcTy = MRI->getType(MI->getOperand(NumDsts).getReg()); if (DstTy.isVector()) { - // This case is the converse of G_CONCAT_VECTORS. - if (!SrcTy.isVector() || - (SrcTy.getScalarType() != DstTy.getScalarType() && - !SrcTy.isPointerVector()) || - SrcTy.isScalableVector() != DstTy.isScalableVector() || + // This case is the converse of G_CONCAT_VECTORS, but relaxed since + // G_UNMERGE_VALUES can handle src and dst vectors with different + // element sizes: + // %1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>) + if (SrcTy.isScalableVector() != DstTy.isScalableVector() || SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits()) report("G_UNMERGE_VALUES source operand does not match vector " "destination operands", diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 5eca04c02a9f9..96889f7a957b2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -6508,3 +6508,58 @@ entry: %insert = insertelement <5 x double> %vec, double %val, i32 %idx ret <5 x double> %insert } + +; Found by fuzzer, reduced with llvm-reduce. +define amdgpu_kernel void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) { +; GPRIDX-LABEL: insert_very_small_from_very_large: +; GPRIDX: ; %bb.0: ; %bb +; GPRIDX-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0 +; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40 +; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) +; GPRIDX-NEXT: s_lshr_b32 s2, s12, 1 +; GPRIDX-NEXT: s_and_b32 s2, s2, 1 +; GPRIDX-NEXT: s_lshl_b32 s2, s2, 1 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: flat_store_byte v[0:1], v2 +; GPRIDX-NEXT: s_endpgm +; +; GFX10-LABEL: insert_very_small_from_very_large: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0 +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_lshr_b32 s2, s12, 1 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: s_and_b32 s2, s2, 1 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: s_lshl_b32 s2, s2, 1 +; GFX10-NEXT: v_mov_b32_e32 v2, s2 +; GFX10-NEXT: flat_store_byte v[0:1], v2 +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: insert_very_small_from_very_large: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b512 s[8:23], s[4:5], 0x0 +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x40 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_lshr_b32 s2, s8, 1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_and_b32 s2, s2, 1 +; GFX11-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-NEXT: s_lshl_b32 s2, s2, 1 +; GFX11-NEXT: v_mov_b32_e32 v2, s2 +; GFX11-NEXT: flat_store_b8 v[0:1], v2 +; GFX11-NEXT: s_endpgm +bb: + %0 = bitcast <32 x i16> %L3 to i512 + %1 = trunc i512 %0 to i8 + %2 = trunc i8 %1 to i2 + %3 = bitcast i2 %2 to <2 x i1> + %I = insertelement <2 x i1> %3, i1 false, i32 0 + store <2 x i1> %I, ptr %ptr, align 1 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir index c231aa8334d45..3500df7c99b6e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -96,6 +96,41 @@ body: | $vgpr1 = COPY %4 ... +--- +name: test_unmerge_v2s8_v2s16 +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_unmerge_v2s8_v2s16 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>) + %3:_(<2 x s16>) = G_ANYEXT %1 + %4:_(<2 x s16>) = G_ANYEXT %2 + $vgpr0 = COPY %3 + $vgpr1 = COPY %4 +... + --- name: test_unmerge_s16_v3s16 body: | @@ -120,6 +155,50 @@ body: | $vgpr2 = COPY %6 ... +--- +name: test_unmerge_v2s8_v3s16 +body: | + bb.0: + ; CHECK-LABEL: name: test_unmerge_v2s8_v3s16 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C3]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST5]](<2 x s16>) + %0:_(<3 x s16>) = G_IMPLICIT_DEF + %1:_(<2 x s8>), %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0 + %4:_(<2 x s16>) = G_ANYEXT %1 + %5:_(<2 x s16>) = G_ANYEXT %2 + %6:_(<2 x s16>) = G_ANYEXT %3 + $vgpr0 = COPY %4 + $vgpr1 = COPY %5 + $vgpr2 = COPY %6 +... + --- name: test_unmerge_s16_v4s16 @@ -191,6 +270,62 @@ body: | $vgpr5 = COPY %12 ... +--- +name: test_unmerge_v4s8_v6s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_unmerge_v4s8_v6s16 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8), [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV16]](s8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV17]](s8) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV18]](s8) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV19]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8), [[UV32:%[0-9]+]]:_(s8), [[UV33:%[0-9]+]]:_(s8), [[UV34:%[0-9]+]]:_(s8), [[UV35:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV32]](s8) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV33]](s8) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV34]](s8) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV35]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR2]](<4 x s32>) + %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<4 x s8>), %2:_(<4 x s8>), %3:_(<4 x s8>) = G_UNMERGE_VALUES %0 + %4:_(<4 x s32>) = G_ANYEXT %1 + %5:_(<4 x s32>) = G_ANYEXT %2 + %6:_(<4 x s32>) = G_ANYEXT %3 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 + $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5 + $vgpr8_vgpr9_vgpr10_vgpr11 = COPY %6 +... + +--- +name: test_unmerge_v3s32_v6s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_unmerge_v3s32_v6s16 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>) + %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s32>) = G_UNMERGE_VALUES %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + --- name: test_unmerge_s8_s16 @@ -1090,3 +1225,23 @@ body: | $vgpr9_vgpr10_vgpr11 = COPY %8 ... + +--- +name: test_unmerge_v3s32_v12s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_unmerge_v3s32_v12s16 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>) + ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[UV1]](<3 x s32>) + %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<3 x s32>), %2:_(<3 x s32>) = G_UNMERGE_VALUES %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 + $vgpr3_vgpr4_vgpr5 = COPY %2 + +... From 733758961f93ee7d261b18c2284d0c16f8cb8e57 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler <robert.imschweiler@amd.com> Date: Thu, 27 Mar 2025 18:02:22 -0500 Subject: [PATCH 2/3] fix formatting --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 4fcad22587f66..6350ae063e859 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -8303,7 +8303,8 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { Shift = SrcReg; } if (DstTy.isVector()) { - VectorElems.emplace_back(MIRBuilder.buildTrunc(DstTy.getScalarType(), Shift).getReg(0)); + VectorElems.emplace_back( + MIRBuilder.buildTrunc(DstTy.getScalarType(), Shift).getReg(0)); if (VectorElems.size() == DstTy.getNumElements()) { MIRBuilder.buildBuildVector(MI.getOperand(I++), VectorElems); VectorElems.clear(); From 8a72c64a936391cd2eb07ef7032f6a72a1d0d59d Mon Sep 17 00:00:00 2001 From: Robert Imschweiler <robert.imschweiler@amd.com> Date: Mon, 7 Apr 2025 04:22:17 -0500 Subject: [PATCH 3/3] use named values in test --- llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 96889f7a957b2..650bd3cb47244 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -6555,11 +6555,11 @@ define amdgpu_kernel void @insert_very_small_from_very_large(<32 x i16> %L3, ptr ; GFX11-NEXT: flat_store_b8 v[0:1], v2 ; GFX11-NEXT: s_endpgm bb: - %0 = bitcast <32 x i16> %L3 to i512 - %1 = trunc i512 %0 to i8 - %2 = trunc i8 %1 to i2 - %3 = bitcast i2 %2 to <2 x i1> - %I = insertelement <2 x i1> %3, i1 false, i32 0 - store <2 x i1> %I, ptr %ptr, align 1 + %a = bitcast <32 x i16> %L3 to i512 + %b = trunc i512 %a to i8 + %c = trunc i8 %b to i2 + %d = bitcast i2 %c to <2 x i1> + %insert = insertelement <2 x i1> %d, i1 false, i32 0 + store <2 x i1> %insert, ptr %ptr, align 1 ret void }