1
- // verify that simd mask reductions do not introduce additional bit shift operations
2
- //@ revisions: x86 aarch64
3
- //@ [x86] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
4
- //@ [x86] needs-llvm-components: x86
5
- //@ [aarch64] compile-flags: --target=aarch64-unknown-linux-gnu
6
- //@ [aarch64] needs-llvm-components: aarch64
7
- //@ [aarch64] min-llvm-version: 15.0
1
+ // verify that simd masked load does not introduce additional bit shift operations
2
+ //@ revisions: x86-avx x86-avx512
3
+ //@ [x86-avx] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
4
+ //@ [x86-avx] compile-flags: -C target-feature=+avx
5
+ //@ [x86-avx] needs-llvm-components: x86
6
+ //@ [x86-avx512] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
7
+ //@ [x86-avx512] compile-flags: -C target-feature=+avx512f,+avx512vl,+avx512bw,+avx512dq
8
+ //@ [x86-avx512] needs-llvm-components: x86
8
9
//@ assembly-output: emit-asm
9
10
//@ compile-flags: --crate-type=lib -O
10
11
@@ -20,29 +21,43 @@ pub trait Sized {}
20
21
trait Copy { }
21
22
22
23
#[ repr( simd) ]
23
- pub struct mask8x16 ( [ i8 ; 16 ] ) ;
24
+ pub struct f32x8 ( [ f32 ; 8 ] ) ;
25
+
26
+ #[ repr( simd) ]
27
+ pub struct m32x8 ( [ i32 ; 8 ] ) ;
28
+
29
+ #[ repr( simd) ]
30
+ pub struct f64x4 ( [ f64 ; 4 ] ) ;
31
+
32
+ #[ repr( simd) ]
33
+ pub struct m64x4 ( [ i64 ; 4 ] ) ;
24
34
25
35
extern "rust-intrinsic" {
26
- fn simd_reduce_all < T > ( x : T ) -> bool ;
27
- fn simd_reduce_any < T > ( x : T ) -> bool ;
36
+ fn simd_masked_load < M , P , T > ( mask : M , pointer : P , values : T ) -> T ;
28
37
}
29
38
30
- // CHECK-LABEL: mask_reduce_all:
39
+ // CHECK-LABEL: load_f32x8
31
40
#[ no_mangle]
32
- pub unsafe fn mask_reduce_all ( m : mask8x16 ) -> bool {
33
- // x86: movdqa
34
- // x86-NEXT: pmovmskb
35
- // aarch64: cmge
36
- // aarch64-NEXT: umaxv
37
- simd_reduce_all ( m)
41
+ pub unsafe fn load_f32x8 ( mask : m32x8 , pointer : * const f32 , output : * mut f32x8 ) {
42
+ // x86-avx-NOT: vpslld
43
+ // x86-avx: vmovaps ymm0
44
+ // x86-avx-NEXT: vmaskmovps
45
+ // x86-avx512-NOT: vpslld
46
+ // x86-avx512: vpcmpgtd k1
47
+ // x86-avx512-NEXT: vmovups ymm0 {k1} {z}
48
+ // x86-avx512-NEXT: vmovaps
49
+ * output = simd_masked_load ( mask, pointer, f32x8 ( [ 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 ] ) )
38
50
}
39
51
40
- // CHECK-LABEL: mask_reduce_any:
52
+ // CHECK-LABEL: load_f64x4
41
53
#[ no_mangle]
42
- pub unsafe fn mask_reduce_any ( m : mask8x16 ) -> bool {
43
- // x86: movdqa
44
- // x86-NEXT: pmovmskb
45
- // aarch64: cmlt
46
- // aarch64-NEXT: umaxv
47
- simd_reduce_any ( m)
54
+ pub unsafe fn load_f64x4 ( mask : m64x4 , pointer : * const f64 , output : * mut f64x4 ) {
55
+ // x86-avx-NOT: vpsllq
56
+ // x86-avx: vmovapd
57
+ // x86-avx-NEXT: vmaskmovpd ymm0
58
+ // x86-avx512-NOT: vpsllq
59
+ // x86-avx512: vpcmpgtq k1
60
+ // x86-avx512-NEXT: vmovupd ymm0 {k1} {z}
61
+ // x86-avx512-NEXT: vmovapd
62
+ * output = simd_masked_load ( mask, pointer, f64x4 ( [ 0_f64 , 0_f64 , 0_f64 , 0_f64 ] ) )
48
63
}
0 commit comments