Skip to content

Commit 83a5dd4

Browse files
committed
Add another assembly test showing no shift instructions in masked load intrinsic
1 parent df7fcb1 commit 83a5dd4

File tree

1 file changed

+39
-24
lines changed

1 file changed

+39
-24
lines changed
+39-24
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
// verify that simd mask reductions do not introduce additional bit shift operations
2-
//@ revisions: x86 aarch64
3-
//@ [x86] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
4-
//@ [x86] needs-llvm-components: x86
5-
//@ [aarch64] compile-flags: --target=aarch64-unknown-linux-gnu
6-
//@ [aarch64] needs-llvm-components: aarch64
7-
//@ [aarch64] min-llvm-version: 15.0
1+
// verify that simd masked load does not introduce additional bit shift operations
2+
//@ revisions: x86-avx x86-avx512
3+
//@ [x86-avx] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
4+
//@ [x86-avx] compile-flags: -C target-feature=+avx
5+
//@ [x86-avx] needs-llvm-components: x86
6+
//@ [x86-avx512] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
7+
//@ [x86-avx512] compile-flags: -C target-feature=+avx512f,+avx512vl,+avx512bw,+avx512dq
8+
//@ [x86-avx512] needs-llvm-components: x86
89
//@ assembly-output: emit-asm
910
//@ compile-flags: --crate-type=lib -O
1011

@@ -20,29 +21,43 @@ pub trait Sized {}
2021
trait Copy {}
2122

2223
#[repr(simd)]
23-
pub struct mask8x16([i8; 16]);
24+
pub struct f32x8([f32; 8]);
25+
26+
#[repr(simd)]
27+
pub struct m32x8([i32; 8]);
28+
29+
#[repr(simd)]
30+
pub struct f64x4([f64; 4]);
31+
32+
#[repr(simd)]
33+
pub struct m64x4([i64; 4]);
2434

2535
extern "rust-intrinsic" {
26-
fn simd_reduce_all<T>(x: T) -> bool;
27-
fn simd_reduce_any<T>(x: T) -> bool;
36+
fn simd_masked_load<M, P, T>(mask: M, pointer: P, values: T) -> T;
2837
}
2938

30-
// CHECK-LABEL: mask_reduce_all:
39+
// CHECK-LABEL: load_f32x8
3140
#[no_mangle]
32-
pub unsafe fn mask_reduce_all(m: mask8x16) -> bool {
33-
// x86: movdqa
34-
// x86-NEXT: pmovmskb
35-
// aarch64: cmge
36-
// aarch64-NEXT: umaxv
37-
simd_reduce_all(m)
41+
pub unsafe fn load_f32x8(mask: m32x8, pointer: *const f32, output: *mut f32x8) {
42+
// x86-avx-NOT: vpslld
43+
// x86-avx: vmovaps ymm0
44+
// x86-avx-NEXT: vmaskmovps
45+
// x86-avx512-NOT: vpslld
46+
// x86-avx512: vpcmpgtd k1
47+
// x86-avx512-NEXT: vmovups ymm0 {k1} {z}
48+
// x86-avx512-NEXT: vmovaps
49+
*output = simd_masked_load(mask, pointer, f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32]))
3850
}
3951

40-
// CHECK-LABEL: mask_reduce_any:
52+
// CHECK-LABEL: load_f64x4
4153
#[no_mangle]
42-
pub unsafe fn mask_reduce_any(m: mask8x16) -> bool {
43-
// x86: movdqa
44-
// x86-NEXT: pmovmskb
45-
// aarch64: cmlt
46-
// aarch64-NEXT: umaxv
47-
simd_reduce_any(m)
54+
pub unsafe fn load_f64x4(mask: m64x4, pointer: *const f64, output: *mut f64x4) {
55+
// x86-avx-NOT: vpsllq
56+
// x86-avx: vmovapd
57+
// x86-avx-NEXT: vmaskmovpd ymm0
58+
// x86-avx512-NOT: vpsllq
59+
// x86-avx512: vpcmpgtq k1
60+
// x86-avx512-NEXT: vmovupd ymm0 {k1} {z}
61+
// x86-avx512-NEXT: vmovapd
62+
*output = simd_masked_load(mask, pointer, f64x4([0_f64, 0_f64, 0_f64, 0_f64]))
4863
}

0 commit comments

Comments
 (0)