Skip to content

Commit cf3242f

Browse files
authored
[InstCombine] Pull shuffles out of binops with splatted ops (#137948)
Given a binary op on splatted vector and a splatted constant, InstCombine will normally pull the shuffle out in `InstCombinerImpl::foldVectorBinop`: ```llvm define <4 x i32> @f(i32 %x) { %x.insert = insertelement <4 x i32> poison, i32 %x, i64 0 %x.splat = shufflevector <4 x i32> %x.insert, <4 x i32> poison, <4 x i32> zeroinitializer %res = add <4 x i32> %x.splat, splat (i32 42) ret <4 x i32> %res } ``` ```llvm define <4 x i32> @f(i32 %x) { %x.insert = insertelement <4 x i32> poison, i32 %x, i64 0 %1 = add <4 x i32> %x.insert, <i32 42, i32 poison, i32 poison, i32 poison> %res = shufflevector <4 x i32> %1, <4 x i32> poison, <4 x i32> zeroinitializer ret <4 x i32> %res } ``` However, this currently only operates on fixed length vectors. Splats of scalable vectors don't currently have their shuffle pulled out, e.g: ```llvm define <vscale x 4 x i32> @f(i32 %x) { %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i64 0 %x.splat = shufflevector <vscale x 4 x i32> %x.insert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer %res = add <vscale x 4 x i32> %x.splat, splat (i32 42) ret <vscale x 4 x i32> %res } ``` Having this canonical form with the shuffle pulled out is important as VectorCombine relies on it in order to scalarize binary ops in `scalarizeBinopOrCmp`, which would prevent the need for #137786. This also brings it in line for scalable binary ops with two non-constant operands: https://godbolt.org/z/M9f7ebzca This adds a combine just after the fixed-length version, but restricted to splats at index 0 so that it also handles the scalable case: So the whilst the existing combine looks like: `Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)` This patch adds: `Op(shuffle(V1, 0), (splat C)) -> shuffle(Op(V1, (splat C)), 0)` I think this could be generalized to other splat indexes that aren't zero, but I think it would be dead code since only fixed-length vectors can have non-zero shuffle indices, which would be covered by the existing combine.
1 parent 9981afc commit cf3242f

File tree

3 files changed

+59
-2
lines changed

3 files changed

+59
-2
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2271,6 +2271,27 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
22712271
}
22722272
}
22732273

2274+
// Similar to the combine above, but handles the case for scalable vectors
2275+
// where both shuffle(V1, 0) and C are splats.
2276+
//
2277+
// Op(shuffle(V1, 0), (splat C)) -> shuffle(Op(V1, (splat C)), 0)
2278+
if (isa<ScalableVectorType>(Inst.getType()) &&
2279+
match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Poison(),
2280+
m_ZeroMask())),
2281+
m_ImmConstant(C)))) {
2282+
if (Constant *Splat = C->getSplatValue()) {
2283+
bool ConstOp1 = isa<Constant>(RHS);
2284+
VectorType *V1Ty = cast<VectorType>(V1->getType());
2285+
Constant *NewC = ConstantVector::getSplat(V1Ty->getElementCount(), Splat);
2286+
2287+
Value *NewLHS = ConstOp1 ? V1 : NewC;
2288+
Value *NewRHS = ConstOp1 ? NewC : V1;
2289+
VectorType *VTy = cast<VectorType>(Inst.getType());
2290+
SmallVector<int> Mask(VTy->getElementCount().getKnownMinValue(), 0);
2291+
return createBinOpShuffle(NewLHS, NewRHS, Mask);
2292+
}
2293+
}
2294+
22742295
// Try to reassociate to sink a splat shuffle after a binary operation.
22752296
if (Inst.isAssociative() && Inst.isCommutative()) {
22762297
// Canonicalize shuffle operand as LHS.

llvm/test/Transforms/InstCombine/getelementptr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,8 +282,8 @@ define <2 x i1> @test13_fixed_scalable(i64 %X, ptr %P, <2 x i64> %y) nounwind {
282282
define <vscale x 2 x i1> @test13_scalable_scalable(i64 %X, ptr %P, <vscale x 2 x i64> %y) nounwind {
283283
; CHECK-LABEL: @test13_scalable_scalable(
284284
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[X:%.*]], i64 0
285-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
286-
; CHECK-NEXT: [[A_IDX:%.*]] = shl nsw <vscale x 2 x i64> [[DOTSPLAT]], splat (i64 3)
285+
; CHECK-NEXT: [[TMP3:%.*]] = shl nsw <vscale x 2 x i64> [[DOTSPLATINSERT]], splat (i64 3)
286+
; CHECK-NEXT: [[A_IDX:%.*]] = shufflevector <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
287287
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
288288
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
289289
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP2]], i64 0

llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1789,3 +1789,39 @@ define <4 x i32> @PR46872(<4 x i32> %x) {
17891789
ret <4 x i32> %a
17901790
}
17911791

1792+
define <vscale x 4 x i32> @scalable_splat_binop_constant_rhs(<vscale x 4 x i32> %x) {
1793+
; CHECK-LABEL: @scalable_splat_binop_constant_rhs(
1794+
; CHECK-NEXT: [[R1:%.*]] = add <vscale x 4 x i32> [[R:%.*]], splat (i32 42)
1795+
; CHECK-NEXT: [[R2:%.*]] = shufflevector <vscale x 4 x i32> [[R1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1796+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R2]]
1797+
;
1798+
1799+
%splatx = shufflevector <vscale x 4 x i32> %x, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1800+
%r = add <vscale x 4 x i32> %splatx, splat (i32 42)
1801+
ret <vscale x 4 x i32> %r
1802+
}
1803+
1804+
define <vscale x 4 x float> @scalable_splat_binop_constant_lhs(<vscale x 4 x float> %x) {
1805+
; CHECK-LABEL: @scalable_splat_binop_constant_lhs(
1806+
; CHECK-NEXT: [[R1:%.*]] = fadd <vscale x 4 x float> [[R:%.*]], splat (float 4.200000e+01)
1807+
; CHECK-NEXT: [[R2:%.*]] = shufflevector <vscale x 4 x float> [[R1]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
1808+
; CHECK-NEXT: ret <vscale x 4 x float> [[R2]]
1809+
;
1810+
1811+
%splatx = shufflevector <vscale x 4 x float> %x, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
1812+
%r = fadd <vscale x 4 x float> splat (float 42.0), %splatx
1813+
ret <vscale x 4 x float> %r
1814+
}
1815+
1816+
; Negative test - shouldn't pull shuffle out as it udiv isn't safe to speculate.
1817+
define <vscale x 4 x i32> @scalable_splat_binop_constant_ub(<vscale x 4 x i32> %x) {
1818+
; CHECK-LABEL: @scalable_splat_binop_constant_ub(
1819+
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <vscale x 4 x i32> [[X:%.*]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1820+
; CHECK-NEXT: [[R:%.*]] = udiv <vscale x 4 x i32> splat (i32 42), [[SPLATX]]
1821+
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
1822+
;
1823+
1824+
%splatx = shufflevector <vscale x 4 x i32> %x, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1825+
%r = udiv <vscale x 4 x i32> splat (i32 42), %splatx
1826+
ret <vscale x 4 x i32> %r
1827+
}

0 commit comments

Comments
 (0)