-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[X86] Use X86FixupInstTunings to select between (V)MOVSS/D and (V)BLENDPS/D #143312
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: 黃國庭 (houngkoungting) ChangesFix #142588 The switch-case instructions were identified with GPT O.O . Patch is 106.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143312.diff 55 Files Affected:
diff --git a/llvm/lib/Target/X86/X86FixupInstTuning.cpp b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
index 6bb7600dedcac..748ebcc8a5569 100644
--- a/llvm/lib/Target/X86/X86FixupInstTuning.cpp
+++ b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
@@ -221,8 +221,41 @@ bool X86FixupInstTuningPass::processInstruction(
auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
return ProcessUNPCKToIntDomain(NewOpc);
};
+
+ auto ProcessBLENDToMOV = [&](unsigned MovOpc) -> bool {
+ if (!MI.getOperand(NumOperands - 1).isImm() ||
+ MI.getOperand(NumOperands - 1).getImm() != 1)
+ return false;
+
+ bool Force = MF.getFunction().hasOptSize();
+ if (!Force && !NewOpcPreferable(MovOpc))
+ return false;
+ MI.setDesc(TII->get(MovOpc));
+ MI.removeOperand(NumOperands - 1);
+ return true;
+ };
switch (Opc) {
+ case X86::VBLENDPSrri:
+ case X86::VBLENDPSYrri:
+ case X86::VBLENDMPSZ128rrkz:
+ case X86::VBLENDMPSZ256rrkz:
+ case X86::VBLENDMPSZrrkz: {
+ int Imm = MI.getOperand(NumOperands - 1).getImm();
+ if (Imm != 1)
+ return false;
+ return ProcessBLENDToMOV(X86::VMOVSSrr);
+ }
+ case X86::VBLENDPDrri:
+ case X86::VBLENDPDYrri:
+ case X86::VBLENDMPDZ128rrkz:
+ case X86::VBLENDMPDZ256rrkz:
+ case X86::VBLENDMPDZrrkz: {
+ int Imm = MI.getOperand(NumOperands - 1).getImm();
+ if (Imm != 1)
+ return false;
+ return ProcessBLENDToMOV(X86::VMOVSDrr);
+ }
case X86::VPERMILPDri:
return ProcessVPERMILPDri(X86::VSHUFPDrri);
case X86::VPERMILPDYri:
diff --git a/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
index 254a53fcac4de..65273870c3dfb 100644
--- a/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
+++ b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -11,7 +11,7 @@ define void @endless_loop() {
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX1-NEXT: vmovaps %ymm0, (%eax)
; AVX1-NEXT: vmovaps %ymm1, (%eax)
; AVX1-NEXT: vzeroupper
@@ -21,7 +21,7 @@ define void @endless_loop() {
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vbroadcastss (%eax), %xmm0
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; AVX2-NEXT: vmovss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3,4,5,6],ymm0[7]
diff --git a/llvm/test/CodeGen/X86/avx-insertelt.ll b/llvm/test/CodeGen/X86/avx-insertelt.ll
index 18ca01290c914..81f3058f19579 100644
--- a/llvm/test/CodeGen/X86/avx-insertelt.ll
+++ b/llvm/test/CodeGen/X86/avx-insertelt.ll
@@ -8,7 +8,7 @@ define <8 x float> @insert_f32_firstelt_of_low_subvector(<8 x float> %x, float %
; ALL-LABEL: insert_f32_firstelt_of_low_subvector:
; ALL: # %bb.0:
; ALL-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
-; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
+; ALL-NEXT: vmovss {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
; ALL-NEXT: retq
%i0 = insertelement <8 x float> %x, float %s, i32 0
ret <8 x float> %i0
@@ -94,7 +94,7 @@ define <8 x float> @insert_f32_firstelt_of_high_subvector(<8 x float> %x, float
; AVX-LABEL: insert_f32_firstelt_of_high_subvector:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
;
@@ -202,9 +202,9 @@ define <4 x i64> @insert_i64_firstelt_of_high_subvector(<4 x i64> %x, i64 %s) {
define <8 x float> @insert_f32_firstelts(<8 x float> %x, float %s) {
; AVX-LABEL: insert_f32_firstelts:
; AVX: # %bb.0:
-; AVX-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
index a8574c0b7516c..30bf1a261f4b7 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
@@ -1843,7 +1843,7 @@ define <2 x double> @test_mm_cvtu64_sd(<2 x double> %__A, i64 %__B) {
; X86-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
; X86-NEXT: vaddsd %xmm1, %xmm2, %xmm1
-; X86-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-NEXT: retl
;
; X64-LABEL: test_mm_cvtu64_sd:
@@ -1891,7 +1891,7 @@ define <4 x float> @test_mm_cvtu64_ss(<4 x float> %__A, i64 %__B) {
; X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
; X86-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X86-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa %esp, 4
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index c1ef500d9d3de..aae48aba93be6 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -10483,7 +10483,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
; CHECK: ## %bb.0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
-; CHECK-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
+; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x10,0xc1]
; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%q = load float, ptr %ptr_b
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 926af4e9957af..f9b5994a18d36 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -6505,7 +6505,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
; CHECK: # %bb.0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; CHECK-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: ret{{[l|q]}}
%q = load float, ptr %ptr_b
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
index a7ca23792e6fe..a2af7df44010e 100644
--- a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
@@ -11,7 +11,7 @@ define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
; NOAVX512MOVZXC-LABEL: test_mm_move_epi32:
; NOAVX512MOVZXC: # %bb.0:
; NOAVX512MOVZXC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
-; NOAVX512MOVZXC-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT: vmovss %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf2,0x10,0xc0]
; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
%res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
diff --git a/llvm/test/CodeGen/X86/build-vector-512.ll b/llvm/test/CodeGen/X86/build-vector-512.ll
index 789196c5e4848..69d17fe3ab69f 100644
--- a/llvm/test/CodeGen/X86/build-vector-512.ll
+++ b/llvm/test/CodeGen/X86/build-vector-512.ll
@@ -578,7 +578,7 @@ define <16 x float> @test_buildvector_16f32_2_var(float %a0, float %a1) {
; AVX-32-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,17,0,0]
; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1
-; AVX-32-NEXT: vblendps {{.*#+}} xmm3 = xmm2[0],xmm0[1,2,3]
+; AVX-32-NEXT: vmovss {{.*#+}} xmm3 = xmm2[0],xmm0[1,2,3]
; AVX-32-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX-32-NEXT: vinsertps {{.*#+}} xmm3 = xmm0[0,1,2],xmm2[0]
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
@@ -626,7 +626,7 @@ define <16 x float> @test_buildvector_16f32_2_load(ptr %p0, ptr %p1) {
; AVX-32-NEXT: vbroadcastss (%ecx), %xmm1
; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpermi2ps %zmm1, %zmm2, %zmm0
-; AVX-32-NEXT: vblendps {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
+; AVX-32-NEXT: vmovss {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
; AVX-32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-32-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],xmm2[0]
; AVX-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
@@ -640,7 +640,7 @@ define <16 x float> @test_buildvector_16f32_2_load(ptr %p0, ptr %p1) {
; AVX-64-NEXT: vbroadcastss (%rdi), %xmm1
; AVX-64-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-64-NEXT: vpermi2ps %zmm1, %zmm2, %zmm0
-; AVX-64-NEXT: vblendps {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
+; AVX-64-NEXT: vmovss {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],xmm2[0]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
diff --git a/llvm/test/CodeGen/X86/buildvec-extract.ll b/llvm/test/CodeGen/X86/buildvec-extract.ll
index 545c57fed4b2c..9d856ed7647ca 100644
--- a/llvm/test/CodeGen/X86/buildvec-extract.ll
+++ b/llvm/test/CodeGen/X86/buildvec-extract.ll
@@ -42,7 +42,7 @@ define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
; AVX-LABEL: extract0_i32_zext_insert0_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 0
%z = zext i32 %e to i64
@@ -85,7 +85,7 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
; AVX: # %bb.0:
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 1
%z = zext i32 %e to i64
@@ -130,7 +130,7 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
; AVX: # %bb.0:
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 2
%z = zext i32 %e to i64
diff --git a/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll
index 556b0deaf4c83..8b3aa2964db02 100644
--- a/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll
+++ b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll
@@ -51,7 +51,7 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind {
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512-NEXT: retq
@@ -149,7 +149,7 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind {
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0
@@ -235,12 +235,12 @@ define void @v_test_canonicalize_v2half(<2 x half> addrspace(1)* %out) nounwind
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm2
; AVX512-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX512-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3]
+; AVX512-NEXT: vmovss {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm2, %xmm2
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; AVX512-NEXT: vmovd %xmm0, (%rdi)
diff --git a/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll b/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll
index b42fd957d7f4f..086df87d1d5ff 100644
--- a/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll
+++ b/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll
@@ -44,12 +44,12 @@ define <4 x float> @insert_f32(float %a0, <4 x float> %a1) {
;
; AVX-LABEL: insert_f32:
; AVX: # %bb.0:
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
;
; AVX512-LABEL: insert_f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: retq
%1 = insertelement <4 x float> %a1, float %a0, i32 0
ret <4 x float> %1
diff --git a/llvm/test/CodeGen/X86/combine-and.ll b/llvm/test/CodeGen/X86/combine-and.ll
index e5594dc9c5e3c..173457ff46677 100644
--- a/llvm/test/CodeGen/X86/combine-and.ll
+++ b/llvm/test/CodeGen/X86/combine-and.ll
@@ -37,7 +37,7 @@ define <4 x i32> @test1(<4 x i32> %A) {
; AVX-LABEL: test1:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0>
ret <4 x i32> %1
@@ -195,7 +195,7 @@ define <4 x i32> @test11(<4 x i32> %A) {
; AVX-LABEL: test11:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %1
diff --git a/llvm/test/CodeGen/X86/combine-or-shuffle.ll b/llvm/test/CodeGen/X86/combine-or-shuffle.ll
index 95b5fcf8eac52..137c3d9dec7bd 100644
--- a/llvm/test/CodeGen/X86/combine-or-shuffle.ll
+++ b/llvm/test/CodeGen/X86/combine-or-shuffle.ll
@@ -86,10 +86,20 @@ define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; SSE4-NEXT: retq
;
-; AVX-LABEL: test4:
-; AVX: # %bb.0:
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX-NEXT: retq
+; AVX1-LABEL: test4:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test4:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test4:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
%or = or <4 x i32> %shuf1, %shuf2
@@ -108,10 +118,20 @@ define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE4-NEXT: retq
;
-; AVX-LABEL: test5:
-; AVX: # %bb.0:
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; AVX-NEXT: retq
+; AVX1-LABEL: test5:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test5:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test5:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
%or = or <4 x i32> %shuf1, %shuf2
@@ -241,10 +261,20 @@ define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; SSE4-NEXT: retq
;
-; AVX-LABEL: test11:
-; AVX: # %bb.0:
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX-NEXT: retq
+; AVX1-LABEL: test11:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test11:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test11:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: retq
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
%or = or <4 x i32> %and1, %and2
@@ -263,10 +293,20 @@ define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE4-NEXT: retq
;
-; AVX-LABEL: test12:
-; AVX: # %bb.0:
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; AVX-NEXT: retq
+; AVX1-LABEL: test12:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test12:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test12:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512-NEXT: retq
%and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
%and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
%or = or <4 x i32> %and1, %and2
@@ -395,18 +435,18 @@ define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
; AVX1-LABEL: test18:
; AVX1: # %bb.0:
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
+; AVX1-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test18:
; AVX2: # %bb.0:
; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
+; AVX2-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
index 33bc93d0fe4db..95d350d45d901 100644
--- a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
+++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
@@ -1343,7 +1343,7 @@ define <2 x double> @test_fminimumnum_vector_nan...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Target/X86/X86FixupInstTuning.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/X86/X86FixupInstTuning.cpp b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
index 748ebcc8a..54be3974f 100644
--- a/llvm/lib/Target/X86/X86FixupInstTuning.cpp
+++ b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
@@ -221,7 +221,7 @@ bool X86FixupInstTuningPass::processInstruction(
auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
return ProcessUNPCKToIntDomain(NewOpc);
};
-
+
auto ProcessBLENDToMOV = [&](unsigned MovOpc) -> bool {
if (!MI.getOperand(NumOperands - 1).isImm() ||
MI.getOperand(NumOperands - 1).getImm() != 1)
|
Fix #142588
Following @RKSimon’s suggestion, the transformation applies only when the blend mask is exactly 1, indicating that the instruction behaves like a move. Additionally, the conversion will only be performed when optimizing for size or when the target prefers MOVSS/D over BLENDPS/D for performance reasons.
The switch-case instructions were identified with GPT O.O .