Skip to content

Commit b83311f

Browse files
fda0igcbot
authored andcommitted
Fix GenISA_Int4VectorPack (clear top 4 input bits)
Original GenISA_Int4VectorPack implementation assumed that upper 4 input bits will always be equal to zero. Add `and` instructions which will clear upper 4 bits.
1 parent 4074fd7 commit b83311f

File tree

3 files changed

+16
-16
lines changed

3 files changed

+16
-16
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21328,21 +21328,20 @@ void EmitPass::emitInt4VectorPack(llvm::GenIntrinsicInst *GII) {
2132821328
}
2132921329

2133021330
CVariable *immFour = m_currShader->ImmToVariable(4, ISA_TYPE_UB);
21331+
CVariable *immMask4Bit = m_currShader->ImmToVariable(0b1111, ISA_TYPE_UB);
2133121332

2133221333
for (int rowOffset = 0; rowOffset < dstNum; rowOffset += execSize) {
21333-
CVariable *row0 = m_currShader->GetNewAlias(cinput, ISA_TYPE_UB, rowOffset * 2, execSize);
2133421334
CVariable *dst = m_currShader->GetNewAlias(m_destination, ISA_TYPE_UB, rowOffset, execSize);
21335+
CVariable *row0 = m_currShader->GetNewAlias(cinput, ISA_TYPE_UB, rowOffset * 2, execSize);
21336+
m_encoder->And(dst, row0, immMask4Bit);
2133521337

2133621338
// There is an edge case where we pack <3 x i8> into <2 x i8>.
2133721339
// The missing 4th i8 cannot be loaded.
2133821340
if (rowOffset * 2 + execSize < cinput->GetNumberElement()) {
2133921341
CVariable *row1 = m_currShader->GetNewAlias(cinput, ISA_TYPE_UB, rowOffset * 2 + execSize, execSize);
21340-
CVariable *shl4 = m_currShader->GetNewVariable(row1, "shl4");
21341-
m_encoder->Shl(shl4, row1, immFour);
21342-
m_encoder->Or(dst, row0, shl4);
21343-
} else {
21344-
// Due to missing last element pass row0 directly to dst.
21345-
m_encoder->Copy(dst, row0);
21342+
CVariable *row1Shl4 = m_currShader->GetNewVariable(row1, "row1Shl4");
21343+
m_encoder->Shl(row1Shl4, row1, immFour);
21344+
m_encoder->Or(dst, dst, row1Shl4);
2134621345
}
2134721346
}
2134821347

IGC/Compiler/tests/EmitVISAPass/emit-int4-vector/pack-3-to-2-uniform.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,10 @@ entry:
3131
%i1 = insertelement <3 x i8> %i0, i8 %l1, i32 1
3232
%i2 = insertelement <3 x i8> %i1, i8 %l2, i32 2
3333

34-
; CHECK: shl (M1_NM, 1) [[SHIFTED1:[A-z0-9]*]](0,0)<1> [[LOADED:[A-z0-9]*]](0,1)<0;1,0> 0x4:ub
35-
; CHECK: or (M1_NM, 1) [[PACKED:[A-z0-9]*]](0,0)<1> [[LOADED]](0,0)<0;1,0> [[SHIFTED1]](0,0)<0;1,0>
36-
; CHECK-NOT: shl
37-
; CHECK-NOT: or
38-
; CHECK: mov (M1_NM, 1) [[PACKED]](0,1)<1> [[LOADED]](0,2)<0;1,0>
34+
; CHECK: and (M1_NM, 1) [[PACKED:[A-z0-9]+]](0,0)<1> [[LOADED:[A-z0-9]+]](0,0)<0;1,0> 0xf:ub
35+
; CHECK: shl (M1_NM, 1) [[SHIFTED:[A-z0-9]+]](0,0)<1> [[LOADED]](0,1)<0;1,0> 0x4:ub
36+
; CHECK: or (M1_NM, 1) [[PACKED]](0,0)<1> [[PACKED]](0,0)<0;1,0> [[SHIFTED]](0,0)<0;1,0>
37+
; CHECK: and (M1_NM, 1) [[PACKED]](0,1)<1> [[LOADED]](0,2)<0;1,0> 0xf:ub
3938
%packed = call <2 x i8> @llvm.genx.GenISA.Int4VectorPack.v2i8.v3i8(<3 x i8> %i2)
4039

4140
; Store the packed result

IGC/Compiler/tests/EmitVISAPass/emit-int4-vector/pack-4-to-2.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,12 @@ entry:
3333
%loaded = load i32, i32 addrspace(1)* %srcOffsetted, align 4
3434
%loadedCast = bitcast i32 %loaded to <4 x i8>
3535

36-
; CHECK: shl (M1, 16) [[SHIFTED1:[A-z0-9]*]](0,0)<1> [[LOADED:[A-z0-9]*]](0,16)<1;1,0> 0x4:ub
37-
; CHECK: or (M1, 16) [[PACKED:[A-z0-9]*]](0,0)<1> [[LOADED]](0,0)<1;1,0> [[SHIFTED1]](0,0)<1;1,0>
38-
; CHECK: shl (M1, 16) [[SHIFTED2:[A-z0-9]*]](0,0)<1> [[LOADED]](0,48)<1;1,0> 0x4:ub
39-
; CHECK: or (M1, 16) [[PACKED]](0,16)<1> [[LOADED]](0,32)<1;1,0> [[SHIFTED2]](0,0)<1;1,0>
36+
; CHECK: and (M1, 16) [[PACKED:[A-z0-9]+]](0,0)<1> [[LOADED:[A-z0-9]+]](0,0)<1;1,0> 0xf:ub
37+
; CHECK: shl (M1, 16) [[SHIFTED1:[A-z0-9]+]](0,0)<1> [[LOADED]](0,16)<1;1,0> 0x4:ub
38+
; CHECK: or (M1, 16) [[PACKED]](0,0)<1> [[PACKED]](0,0)<1;1,0> [[SHIFTED1]](0,0)<1;1,0>
39+
; CHECK: and (M1, 16) [[PACKED]](0,16)<1> [[LOADED]](0,32)<1;1,0> 0xf:ub
40+
; CHECK: shl (M1, 16) [[SHIFTED2:[A-z0-9]+]](0,0)<1> [[LOADED]](0,48)<1;1,0> 0x4:ub
41+
; CHECK: or (M1, 16) [[PACKED]](0,16)<1> [[PACKED]](0,16)<1;1,0> [[SHIFTED2]](0,0)<1;1,0>
4042
%packed = call <2 x i8> @llvm.genx.GenISA.Int4VectorPack.v2i8.v4i8(<4 x i8> %loadedCast)
4143

4244
; Store the packed result

0 commit comments

Comments
 (0)