Skip to content

Commit d775e45

Browse files
jszt1igcbot
authored andcommitted
Revisit MismatchDetected for LowerGEPForPrivMem
MismatchDetected solved assert in the PrivateMemoryResolution pass caused by mismatched widths. LowerGEPForPrivMem solves that case but in case of alloca over the allowed size it will leave mismatched widths for the later PrivateMemoryResolution causing the assert to fail. Extended mismatch detection for cases with struct of array/struct of vector.
1 parent 2ff5130 commit d775e45

File tree

3 files changed

+35
-6
lines changed

3 files changed

+35
-6
lines changed

IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ StatusPrivArr2Reg LowerGEPForPrivMem::CheckIfAllocaPromotable(llvm::AllocaInst *
282282

283283
allowedAllocaSizeInBytes = (allowedAllocaSizeInBytes * 8) / SIMDSize;
284284
}
285-
SOALayoutChecker checker(*pAlloca, m_ctx->type == ShaderType::OPENCL_SHADER);
285+
SOALayoutChecker checker(*pAlloca, m_ctx->type == ShaderType::OPENCL_SHADER, true);
286286
SOALayoutInfo SOAInfo = checker.getOrGatherInfo();
287287
if (!SOAInfo.canUseSOALayout) {
288288
return StatusPrivArr2Reg::CannotUseSOALayout;
@@ -357,7 +357,7 @@ StatusPrivArr2Reg LowerGEPForPrivMem::CheckIfAllocaPromotable(llvm::AllocaInst *
357357
return StatusPrivArr2Reg::OK;
358358
}
359359

360-
SOALayoutChecker::SOALayoutChecker(AllocaInst &allocaToCheck, bool isOCL) : allocaRef(allocaToCheck) {
360+
SOALayoutChecker::SOALayoutChecker(AllocaInst &allocaToCheck, bool isOCL, bool mismatchedWidthsSupport) : allocaRef(allocaToCheck), mismatchedWidthsSupport(mismatchedWidthsSupport) {
361361
auto F = allocaToCheck.getParent()->getParent();
362362
pDL = &F->getParent()->getDataLayout();
363363
newAlgoControl = IGC_GET_FLAG_VALUE(EnablePrivMemNewSOATranspose);
@@ -571,9 +571,12 @@ bool IGC::SOALayoutChecker::MismatchDetected(Instruction &I) {
571571
return false;
572572

573573
Type *allocaTy = allocaRef.getAllocatedType();
574-
bool allocaIsVecOrArr = allocaTy->isVectorTy() || allocaTy->isArrayTy();
574+
bool allocaIsVecOrArrOrStruct = allocaTy->isVectorTy() || allocaTy->isArrayTy() || allocaTy->isStructTy();
575575

576-
if (!allocaIsVecOrArr)
576+
if (!allocaIsVecOrArrOrStruct)
577+
return false;
578+
579+
if(mismatchedWidthsSupport)
577580
return false;
578581

579582
auto DL = I.getParent()->getParent()->getParent()->getDataLayout();
@@ -590,15 +593,26 @@ bool IGC::SOALayoutChecker::MismatchDetected(Instruction &I) {
590593
allocaTy = arrTy->getElementType();
591594
} else if (auto *vec = dyn_cast<IGCLLVM::FixedVectorType>(allocaTy)) {
592595
allocaTy = vec->getElementType();
596+
} else if (auto *strct = dyn_cast<StructType>(allocaTy)){
597+
if (auto *arrTy = dyn_cast<ArrayType>(strct->getStructElementType(0))) {
598+
allocaTy = arrTy->getElementType();
599+
} else if (auto *vec = dyn_cast<IGCLLVM::FixedVectorType>(strct->getStructElementType(0))){
600+
allocaTy = vec->getElementType();
601+
}
593602
}
594603

595604
if (auto *arrTy = dyn_cast<ArrayType>(pUserTy)) {
596605
pUserTy = arrTy->getElementType();
597606
} else if (auto *vec = dyn_cast<IGCLLVM::FixedVectorType>(pUserTy)) {
598607
pUserTy = vec->getElementType();
608+
} else if (auto *strct = dyn_cast<StructType>(pUserTy)){
609+
if (auto *arrTy = dyn_cast<ArrayType>(strct->getStructElementType(0))) {
610+
pUserTy = arrTy->getElementType();
611+
} else if (auto *vec = dyn_cast<IGCLLVM::FixedVectorType>(strct->getStructElementType(0))){
612+
pUserTy = vec->getElementType();
613+
}
599614
}
600615
}
601-
602616
auto allocaSize = DL.getTypeAllocSize(allocaTy);
603617
auto vecTySize = DL.getTypeAllocSize(pUserTy);
604618

IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class SOALayoutChecker : public llvm::InstVisitor<SOALayoutChecker, bool> {
7070
friend llvm::InstVisitor<SOALayoutChecker, bool>;
7171

7272
// isOCL is for testing, it will be removed once testing is done.
73-
SOALayoutChecker(llvm::AllocaInst &allocaToCheck, bool isOCL);
73+
SOALayoutChecker(llvm::AllocaInst &allocaToCheck, bool isOCL, bool mismatchedWidthsSupport=false);
7474
SOALayoutChecker() = delete;
7575
~SOALayoutChecker() = default;
7676
SOALayoutChecker(SOALayoutChecker &) = delete;
@@ -87,6 +87,10 @@ class SOALayoutChecker : public llvm::InstVisitor<SOALayoutChecker, bool> {
8787
const llvm::DataLayout *pDL;
8888
std::unique_ptr<SOALayoutInfo> pInfo;
8989

90+
// If mismatched widths reach PrivateMemoryResolution, it should turn off optimization
91+
// But if they reach LowerGEPForPrivMemPass they get resolved properly
92+
bool mismatchedWidthsSupport;
93+
9094
// ===== fields for new algo =====
9195
// todo: combine the new and old together
9296
//

IGC/Compiler/tests/PrivateMemoryResolution/SOA_promotion/soa-mismatch-detection.ll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
1818
target triple = "spir64-unknown-unknown"
1919

2020
%"struct.ispc::vec_t" = type { i32, i32, i32 }
21+
%g = type { [512 x i32] }
2122

2223
; Function Attrs: nofree nosync nounwind
2324
define spir_kernel void @test(ptr nocapture writeonly %d, <8 x i32> %r0, <8 x i32> %payloadHeader, <3 x i32> %enqueuedLocalSize, i16 %localIdX, i16 %localIdY, i16 %localIdZ, ptr nocapture readnone %privateBase) {
@@ -55,6 +56,16 @@ exit:
5556
store <4 x i32> zeroinitializer, ptr %offset.i.i.i.i, align 4
5657
%offset_gep = getelementptr i8, ptr %offset.i.i.i.i, i32 16
5758
store i32 0, ptr %offset_gep, align 4
59+
; This case is valid because float and i32 have 32 bits
60+
; CHECK: insertelement <2 x float> {{.*}}, float {{.*}}, i32 0
61+
; CHECK: insertelement <2 x float> {{.*}}, float {{.*}}, i32 1
62+
%st = alloca %g
63+
%load3 = load <2 x float>, ptr %st
64+
65+
; This case is not valid because i32 and i8 have different sizes
66+
; CHECK: %load4 = load <2 x i8>, ptr %st2
67+
%st2 = alloca %g
68+
%load4 = load <2 x i8>, ptr %st2
5869

5970
ret void
6071
}

0 commit comments

Comments
 (0)