-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[NFC][AMDGPU] Add an IR test for v_cvt_f16_bf8
#149627
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This was left during the upstream. Co-authored-by: Mekhanoshin, Stanislav <[email protected]>
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) ChangesThis was left during the upstream. Co-authored-by: Mekhanoshin, Stanislav <[email protected]> Full diff: https://github.com/llvm/llvm-project/pull/149627.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
index 25889ded91681..95653148b09f4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
@@ -9,6 +9,172 @@ declare half @llvm.amdgcn.cvt.f16.fp8(i32, i32)
declare <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16)
declare <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16)
+define amdgpu_ps float @test_cvt_f16_bf8_byte0(i32 %a) {
+; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte0:
+; GFX1250-SDAG-REAL16: ; %bb.0:
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0
+; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte0:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte0:
+; GFX1250-GISEL-REAL16: ; %bb.0:
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0
+; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte0:
+; GFX1250-GISEL-FAKE16: ; %bb.0:
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0
+; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 0)
+ %ret = fpext half %cvt to float
+ ret float %ret
+}
+
+define amdgpu_ps float @test_cvt_f16_bf8_byte1(i32 %a) {
+; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte1:
+; GFX1250-SDAG-REAL16: ; %bb.0:
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1
+; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte1:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte1:
+; GFX1250-GISEL-REAL16: ; %bb.0:
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1
+; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte1:
+; GFX1250-GISEL-FAKE16: ; %bb.0:
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1
+; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 1)
+ %ret = fpext half %cvt to float
+ ret float %ret
+}
+
+define amdgpu_ps float @test_cvt_f16_bf8_byte2(i32 %a) {
+; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte2:
+; GFX1250-SDAG-REAL16: ; %bb.0:
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2
+; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte2:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte2:
+; GFX1250-GISEL-REAL16: ; %bb.0:
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2
+; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte2:
+; GFX1250-GISEL-FAKE16: ; %bb.0:
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2
+; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 2)
+ %ret = fpext half %cvt to float
+ ret float %ret
+}
+
+define amdgpu_ps float @test_cvt_f16_bf8_byte3(i32 %a) {
+; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3:
+; GFX1250-SDAG-REAL16: ; %bb.0:
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
+; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3:
+; GFX1250-GISEL-REAL16: ; %bb.0:
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
+; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3:
+; GFX1250-GISEL-FAKE16: ; %bb.0:
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
+; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 3)
+ %ret = fpext half %cvt to float
+ ret float %ret
+}
+
+define amdgpu_ps float @test_cvt_f16_bf8_byte3_hi(i32 %a) {
+; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi:
+; GFX1250-SDAG-REAL16: ; %bb.0:
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.h, v0 byte_sel:3
+; GFX1250-SDAG-REAL16-NEXT: v_mov_b16_e32 v0.l, 0
+; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, 0, 0x5040100
+; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi:
+; GFX1250-GISEL-REAL16: ; %bb.0:
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
+; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-REAL16-NEXT: v_lshl_or_b32 v0, v0, 16, 0
+; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi:
+; GFX1250-GISEL-FAKE16: ; %bb.0:
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
+; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, 0
+; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 3)
+ %ins.0 = insertelement <2 x half> undef, half 0.0, i32 0
+ %ins.1 = insertelement <2 x half> %ins.0, half %cvt, i32 1
+ %ret = bitcast <2 x half> %ins.1 to float
+ ret float %ret
+}
+
define amdgpu_ps float @test_cvt_f16_fp8_byte0(i32 %a) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_fp8_byte0:
; GFX1250-SDAG-REAL16: ; %bb.0:
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
This was left during the upstream.
Co-authored-by: Mekhanoshin, Stanislav [email protected]