Skip to content

Commit 453b7cf

Browse files
mshelegoigcbot
authored andcommitted
[IGC VC] Support different GRF sizes in GenXDepressurizer
Dynamically set optimization thresholds for depressurizer based on the number and size of GRF registers
1 parent 8eb77fc commit 453b7cf

File tree

4 files changed

+216
-18
lines changed

4 files changed

+216
-18
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2025 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
#ifndef VC_UTILS_GENX_GRFSIZE_H
10+
#define VC_UTILS_GENX_GRFSIZE_H
11+
12+
#include "vc/Support/BackendConfig.h"
13+
#include "vc/Utils/GenX/KernelInfo.h"
14+
15+
namespace vc {
16+
17+
inline int getGRFSize(const llvm::GenXBackendConfig *BC,
18+
const llvm::GenXSubtarget *ST, const KernelMetadata &KM) {
19+
int NumGRF = -1;
20+
// Set by compile option.
21+
if (BC->isAutoLargeGRFMode())
22+
NumGRF = 0;
23+
if (BC->getGRFSize())
24+
NumGRF = BC->getGRFSize();
25+
// Set by kernel metadata.
26+
if (KM.getGRFSize()) {
27+
unsigned NumGRFPerKernel = *KM.getGRFSize();
28+
if (NumGRFPerKernel == 0 || ST->isValidGRFSize(NumGRFPerKernel))
29+
NumGRF = NumGRFPerKernel;
30+
}
31+
return NumGRF;
32+
}
33+
34+
} // namespace vc
35+
36+
#endif // VC_UTILS_GENX_GRFSIZE_H

IGC/VectorCompiler/lib/GenXCodeGen/GenXCisaBuilder.cpp

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2019-2024 Intel Corporation
3+
Copyright (C) 2019-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -40,6 +40,7 @@ SPDX-License-Identifier: MIT
4040
#include "vc/Support/GenXDiagnostic.h"
4141
#include "vc/Support/ShaderDump.h"
4242
#include "vc/Utils/GenX/GlobalVariable.h"
43+
#include "vc/Utils/GenX/GRFSize.h"
4344
#include "vc/Utils/GenX/Intrinsics.h"
4445
#include "vc/Utils/GenX/IntrinsicsWrapper.h"
4546
#include "vc/Utils/GenX/KernelInfo.h"
@@ -949,19 +950,7 @@ static void addKernelAttrsFromMetadata(VISAKernel &Kernel,
949950
Kernel.AddKernelAttribute("NBarrierCnt", sizeof(BarrierCnt), &BarrierCnt);
950951
}
951952

952-
int NumGRF = -1;
953-
// Set by compile option.
954-
if (BC->isAutoLargeGRFMode())
955-
NumGRF = 0;
956-
if (BC->getGRFSize())
957-
NumGRF = BC->getGRFSize();
958-
// Set by kernel metadata.
959-
if (KM.getGRFSize()) {
960-
unsigned NumGRFPerKernel = *KM.getGRFSize();
961-
if (NumGRFPerKernel == 0 || Subtarget->isValidGRFSize(NumGRFPerKernel))
962-
NumGRF = NumGRFPerKernel;
963-
}
964-
953+
int NumGRF = vc::getGRFSize(BC, Subtarget, KM);
965954
if (NumGRF != -1)
966955
Kernel.AddKernelAttribute("NumGRF", sizeof(NumGRF), &NumGRF);
967956
}

IGC/VectorCompiler/lib/GenXCodeGen/GenXDepressurizer.cpp

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2023 Intel Corporation
3+
Copyright (C) 2017-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -109,11 +109,16 @@ SPDX-License-Identifier: MIT
109109
#include "GenXIntrinsics.h"
110110
#include "GenXLiveness.h"
111111
#include "GenXModule.h"
112+
#include "GenXTargetMachine.h"
112113
#include "GenXUtil.h"
113114

115+
#include "vc/Utils/GenX/GRFSize.h"
116+
114117
#include "llvm/ADT/SmallSet.h"
115118
#include "llvm/ADT/Statistic.h"
116119
#include "llvm/Analysis/LoopInfo.h"
120+
#include "llvm/CodeGen/TargetPassConfig.h"
121+
#include "llvm/InitializePasses.h"
117122
#include "llvm/IR/BasicBlock.h"
118123
#include "llvm/IR/Dominators.h"
119124
#include "llvm/IR/Function.h"
@@ -330,10 +335,15 @@ struct SinkCandidate {
330335
// GenX depressurizer pass
331336
class GenXDepressurizer : public FGPassImplInterface,
332337
public IDMixin<GenXDepressurizer> {
333-
enum { FlagThreshold = 6, AddrThreshold = 32, GRFThreshold = 2560,
334-
FlagGRFTolerance = 3840 };
338+
const unsigned FlagThreshold = 6;
339+
const unsigned AddrThreshold = 32;
340+
unsigned GRFThreshold = 0;
341+
unsigned FlagGRFTolerance = 0;
335342
bool Modified = false;
336343
GenXGroupBaling *Baling = nullptr;
344+
const GenXBackendConfig *BC = nullptr;
345+
const GenXSubtarget *ST = nullptr;
346+
const vc::KernelMetadata *KM = nullptr;
337347
DominatorTree *DT = nullptr;
338348
LoopInfoBase<BasicBlock, Loop> *LI = nullptr;
339349
PseudoCFG *PCFG = nullptr;
@@ -385,6 +395,8 @@ using GenXDepressurizerWrapper = FunctionGroupWrapperPass<GenXDepressurizer>;
385395
}
386396
INITIALIZE_PASS_BEGIN(GenXDepressurizerWrapper, "GenXDepressurizerWrapper",
387397
"GenXDepressurizerWrapper", false, false)
398+
INITIALIZE_PASS_DEPENDENCY(GenXBackendConfig)
399+
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
388400
INITIALIZE_PASS_DEPENDENCY(DominatorTreeGroupWrapperPassWrapper)
389401
INITIALIZE_PASS_DEPENDENCY(GenXLivenessWrapper)
390402
INITIALIZE_PASS_DEPENDENCY(GenXGroupBalingWrapper)
@@ -399,10 +411,14 @@ ModulePass *llvm::createGenXDepressurizerWrapperPass() {
399411
void GenXDepressurizer::getAnalysisUsage(AnalysisUsage &AU) {
400412
AU.addRequired<DominatorTreeGroupWrapperPass>();
401413
AU.addRequired<GenXGroupBaling>();
414+
AU.addRequired<GenXBackendConfig>();
415+
AU.addRequired<TargetPassConfig>();
402416
AU.addPreserved<DominatorTreeGroupWrapperPass>();
403417
AU.addPreserved<GenXModule>();
404418
AU.addPreserved<GenXLiveness>();
405419
AU.addPreserved<GenXGroupBaling>();
420+
AU.addPreserved<GenXBackendConfig>();
421+
AU.addPreserved<TargetPassConfig>();
406422
AU.addPreserved<FunctionGroupAnalysis>();
407423
AU.setPreservesCFG();
408424
}
@@ -418,6 +434,20 @@ bool GenXDepressurizer::runOnFunctionGroup(FunctionGroup &FG) {
418434
Modified = false;
419435
SunkCount = 0;
420436
Baling = &getAnalysis<GenXGroupBaling>();
437+
BC = &getAnalysis<GenXBackendConfig>();
438+
ST = &getAnalysis<TargetPassConfig>()
439+
.getTM<GenXTargetMachine>()
440+
.getGenXSubtarget();
441+
vc::KernelMetadata KM(FG.getHead());
442+
// Minimal general register size is 32 bytes and GRF can consist of at least
443+
// 32 registers. Thresholds should be set according to the actual GRF size.
444+
unsigned RegSizeFactor = ST->getGRFByteSize() / 32;
445+
int GRFSize = vc::getGRFSize(BC, ST, KM);
446+
unsigned RegNumFactor = (GRFSize > 0 ? GRFSize : 128) / 32;
447+
// Historically the general register pressure threshold was set to 2560 for
448+
// 128*32 byte GRF case and the flag tolerance threshold was set to 1.5x of it.
449+
GRFThreshold = 640 * RegSizeFactor * RegNumFactor;
450+
FlagGRFTolerance = GRFThreshold * 3 / 2;
421451
// Process functions in the function group in reverse order, so we know the
422452
// max pressure in a subroutine when we see a call to it.
423453
for (auto fgi = FG.rbegin(), fge = FG.rend(); fgi != fge; ++fgi) {
@@ -882,7 +912,8 @@ void GenXDepressurizer::attemptSinking(Instruction *InsertBefore,
882912
bool IsFlag = Liveness::isFlag(Inst);
883913
bool IsAddr = Liveness::isAddr(Inst);
884914
if (!IsFlag && !IsAddr &&
885-
Inst->getType()->getPrimitiveSizeInBits() < 32 * 8) {
915+
Inst->getType()->getPrimitiveSizeInBits() <
916+
ST->getGRFByteSize() * genx::ByteBits) {
886917
// don't bother with anything smaller than a GRF unless it is a flag
887918
continue;
888919
}
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=XeHPG -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XeHPG
10+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=XeHPC -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XeHPC
11+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Xe2 -vc-grf-size=64 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-Xe2-64
12+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Xe2 -vc-grf-size=128 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-Xe2-128
13+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Xe2 -vc-grf-size=256 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-Xe2-256
14+
15+
; CHECK-LABEL: @test1
16+
define dllexport void @test1(<16 x half> %arg) #0 {
17+
entry:
18+
%fp = fpext <16 x half> %arg to <16 x float>
19+
; CHECK: br label %loop
20+
br label %loop
21+
22+
loop:
23+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
24+
%res = phi <16 x float> [ zeroinitializer, %entry ], [ %res.next, %loop ]
25+
%i.next = add i32 %i, 1
26+
; CHECK-XeHPG-NOT: fpext
27+
; CHECK-XeHPC-NOT: fpext
28+
; CHECK-Xe2-64-NOT: fpext
29+
; CHECK-Xe2-128-NOT: fpext
30+
; CHECK-Xe2-256-NOT: fpext
31+
%res.next = fadd <16 x float> %res, %fp
32+
%cmp = icmp ult i32 %i.next, 100
33+
br i1 %cmp, label %loop, label %end
34+
35+
end:
36+
ret void
37+
}
38+
39+
; CHECK-LABEL: @test2
40+
define dllexport <1024 x i32> @test2(<1024 x i32> %pressure, <16 x half> %arg) #0 {
41+
entry:
42+
%fp = fpext <16 x half> %arg to <16 x float>
43+
; CHECK: br label %loop
44+
br label %loop
45+
46+
loop:
47+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
48+
%res = phi <16 x float> [ zeroinitializer, %entry ], [ %res.next, %loop ]
49+
%i.next = add i32 %i, 1
50+
; CHECK-XeHPG: fpext
51+
; CHECK-XeHPC-NOT: fpext
52+
; CHECK-Xe2-64: fpext
53+
; CHECK-Xe2-128-NOT: fpext
54+
; CHECK-Xe2-256-NOT: fpext
55+
%res.next = fadd <16 x float> %res, %fp
56+
%cmp = icmp ult i32 %i.next, 100
57+
br i1 %cmp, label %loop, label %end
58+
59+
end:
60+
ret <1024 x i32> %pressure
61+
}
62+
63+
; CHECK-LABEL: @test3
64+
define dllexport <2048 x i32> @test3(<2048 x i32> %pressure, <16 x half> %arg) #0 {
65+
entry:
66+
%fp = fpext <16 x half> %arg to <16 x float>
67+
; CHECK: br label %loop
68+
br label %loop
69+
70+
loop:
71+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
72+
%res = phi <16 x float> [ zeroinitializer, %entry ], [ %res.next, %loop ]
73+
%i.next = add i32 %i, 1
74+
; CHECK-XeHPG: fpext
75+
; CHECK-XeHPC: fpext
76+
; CHECK-Xe2-64: fpext
77+
; CHECK-Xe2-128: fpext
78+
; CHECK-Xe2-256-NOT: fpext
79+
%res.next = fadd <16 x float> %res, %fp
80+
%cmp = icmp ult i32 %i.next, 100
81+
br i1 %cmp, label %loop, label %end
82+
83+
end:
84+
ret <2048 x i32> %pressure
85+
}
86+
87+
; CHECK-LABEL: @test4
88+
define dllexport <4096 x i32> @test4(<4096 x i32> %pressure, <16 x half> %arg) #0 {
89+
entry:
90+
%fp = fpext <16 x half> %arg to <16 x float>
91+
; CHECK: br label %loop
92+
br label %loop
93+
94+
loop:
95+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
96+
%res = phi <16 x float> [ zeroinitializer, %entry ], [ %res.next, %loop ]
97+
%i.next = add i32 %i, 1
98+
; CHECK-XeHPG: fpext
99+
; CHECK-XeHPC: fpext
100+
; CHECK-Xe2-64: fpext
101+
; CHECK-Xe2-128: fpext
102+
; CHECK-Xe2-256: fpext
103+
%res.next = fadd <16 x float> %res, %fp
104+
%cmp = icmp ult i32 %i.next, 100
105+
br i1 %cmp, label %loop, label %end
106+
107+
end:
108+
ret <4096 x i32> %pressure
109+
}
110+
111+
; COM: Register pressure is beyond threshold for all platforms
112+
; COM: Check that instructions smaller than a one register are not moved into the loop
113+
; CHECK-LABEL: @test5
114+
define dllexport <4096 x i32> @test5(<4096 x i32> %pressure, <4 x half> %arg.4, <8 x half> %arg.8) #0 {
115+
entry:
116+
%fp.4 = fpext <4 x half> %arg.4 to <4 x float>
117+
%fp.8 = fpext <8 x half> %arg.8 to <8 x float>
118+
; CHECK: br label %loop
119+
br label %loop
120+
121+
loop:
122+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
123+
%res.4 = phi <4 x float> [ zeroinitializer, %entry ], [ %res.next.4, %loop ]
124+
%res.8 = phi <8 x float> [ zeroinitializer, %entry ], [ %res.next.8, %loop ]
125+
; CHECK-XeHPG-NOT: fpext <4 x half>
126+
; CHECK-XeHPG: fpext <8 x half>
127+
; CHECK-XeHPC-NOT: fpext
128+
; CHECK-Xe2-64-NOT: fpext
129+
; CHECK-Xe2-128-NOT: fpext
130+
; CHECK-Xe2-256-NOT: fpext
131+
%res.next.4 = fadd <4 x float> %res.4, %fp.4
132+
%res.next.8 = fadd <8 x float> %res.8, %fp.8
133+
%i.next = add i32 %i, 1
134+
%cmp = icmp ult i32 %i.next, 100
135+
br i1 %cmp, label %loop, label %end
136+
137+
end:
138+
ret <4096 x i32> %pressure
139+
}
140+
141+
attributes #0 = { "CMGenxMain" }
142+

0 commit comments

Comments
 (0)