diff --git a/ci/run.sh b/ci/run.sh index fd7ec81d50..8eadb9285c 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -10,7 +10,7 @@ set -ex #export RUST_TEST_NOCAPTURE=1 #export RUST_TEST_THREADS=1 -export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled " +export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir" export HOST_RUSTFLAGS="${RUSTFLAGS}" export PROFILE="${PROFILE:="--profile=release"}" diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index adf526452b..b925429d4d 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -21420,7 +21420,7 @@ pub fn vrbit_s8(a: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rbit.v8i8" + link_name = "llvm.bitreverse.v8i8" )] fn _vrbit_s8(a: int8x8_t) -> int8x8_t; } @@ -21436,7 +21436,7 @@ pub fn vrbitq_s8(a: int8x16_t) -> int8x16_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rbit.v16i8" + link_name = "llvm.bitreverse.v16i8" )] fn _vrbitq_s8(a: int8x16_t) -> int8x16_t; } @@ -23871,7 +23871,7 @@ pub fn vrndn_f64(a: float64x1_t) -> float64x1_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frintn.v1f64" + link_name = "llvm.roundeven.v1f64" )] fn _vrndn_f64(a: float64x1_t) -> float64x1_t; } @@ -23887,7 +23887,7 @@ pub fn vrndnq_f64(a: float64x2_t) -> float64x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frintn.v2f64" + link_name = "llvm.roundeven.v2f64" )] fn _vrndnq_f64(a: float64x2_t) -> float64x2_t; } diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index 1842ad646e..852e103128 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -204,6 +204,7 @@ pub fn __crc32w(crc: u32, data: u32) -> u32 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] #[cfg_attr( @@ -221,6 +222,7 @@ fn priv_vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] #[cfg_attr( @@ -238,6 +240,7 @@ fn priv_vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] #[cfg_attr( @@ -255,6 +258,7 @@ fn priv_vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] #[cfg_attr( @@ -272,6 +276,7 @@ fn priv_vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] #[cfg_attr( @@ -289,6 +294,7 @@ fn priv_vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] #[cfg_attr( @@ -306,6 +312,7 @@ fn priv_vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] #[cfg_attr( @@ -323,6 +330,7 @@ fn priv_vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] #[cfg_attr( @@ -340,6 +348,7 @@ fn priv_vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] #[cfg_attr( @@ -357,6 +366,7 @@ fn priv_vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] #[cfg_attr( @@ -374,6 +384,7 @@ fn priv_vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] #[cfg_attr( @@ -391,6 +402,7 @@ fn priv_vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] #[cfg_attr( @@ -58712,7 +58724,7 @@ pub fn vrndn_f16(a: float16x4_t) -> float16x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frintn.v4f16" + link_name = "llvm.roundeven.v4f16" )] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f16")] fn _vrndn_f16(a: float16x4_t) -> float16x4_t; @@ -58734,7 +58746,7 @@ pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frintn.v8f16" + link_name = "llvm.roundeven.v8f16" )] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v8f16")] fn _vrndnq_f16(a: float16x8_t) -> float16x8_t; @@ -58763,7 +58775,7 @@ pub fn vrndn_f32(a: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frintn.v2f32" + link_name = "llvm.roundeven.v2f32" )] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")] fn _vrndn_f32(a: float32x2_t) -> float32x2_t; @@ -58792,7 +58804,7 @@ pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frintn.v4f32" + link_name = "llvm.roundeven.v4f32" )] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")] fn _vrndnq_f32(a: float32x4_t) -> float32x4_t; @@ -61531,6 +61543,8 @@ pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v16i8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { unsafe extern "unadjusted" { @@ -61543,6 +61557,8 @@ fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v1i64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t { unsafe extern "unadjusted" { @@ -61555,6 +61571,8 @@ fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { unsafe extern "unadjusted" { @@ -61567,6 +61585,8 @@ fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t { unsafe extern "unadjusted" { @@ -61579,6 +61599,8 @@ fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { unsafe extern "unadjusted" { @@ -61591,6 +61613,8 @@ fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { unsafe extern "unadjusted" { @@ -61603,6 +61627,8 @@ fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { unsafe extern "unadjusted" { @@ -61615,6 +61641,8 @@ fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { diff --git a/crates/core_arch/src/loongarch64/mod.rs b/crates/core_arch/src/loongarch64/mod.rs index ed4bcc06f7..b1704bbb48 100644 --- a/crates/core_arch/src/loongarch64/mod.rs +++ b/crates/core_arch/src/loongarch64/mod.rs @@ -329,16 +329,18 @@ pub unsafe fn asrtgt(a: i64, b: i64) { /// Loads the page table directory entry #[inline] +#[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub unsafe fn lddir(a: i64, b: i64) -> i64 { - __lddir(a, b) +pub unsafe fn lddir(a: i64) -> i64 { + __lddir(a, B) } /// Loads the page table entry #[inline] +#[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub unsafe fn ldpte(a: i64, b: i64) { - __ldpte(a, b) +pub unsafe fn ldpte(a: i64) { + __ldpte(a, B) } /// Calculate the approximate single-precision result of 1.0 divided diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 3243b98a6d..2deeb53c20 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -338,26 +338,26 @@ unsafe extern "C" { #[link_name = "llvm.ppc.altivec.vlogefp"] fn vlogefp(a: vector_float) -> vector_float; - #[link_name = "llvm.ppc.altivec.sll"] + #[link_name = "llvm.ppc.altivec.vsl"] fn vsl(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; - #[link_name = "llvm.ppc.altivec.slo"] + #[link_name = "llvm.ppc.altivec.vslo"] fn vslo(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; - #[link_name = "llvm.ppc.altivec.srab"] + #[link_name = "llvm.ppc.altivec.vsrab"] fn vsrab(a: vector_signed_char, b: vector_unsigned_char) -> vector_signed_char; - #[link_name = "llvm.ppc.altivec.srah"] + #[link_name = "llvm.ppc.altivec.vsrah"] fn vsrah(a: vector_signed_short, b: vector_unsigned_short) -> vector_signed_short; - #[link_name = "llvm.ppc.altivec.sraw"] + #[link_name = "llvm.ppc.altivec.vsraw"] fn vsraw(a: vector_signed_int, b: vector_unsigned_int) -> vector_signed_int; - #[link_name = "llvm.ppc.altivec.srl"] + #[link_name = "llvm.ppc.altivec.vsr"] fn vsr(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; - #[link_name = "llvm.ppc.altivec.sro"] + #[link_name = "llvm.ppc.altivec.vsro"] fn vsro(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; - #[link_name = "llvm.ppc.altivec.slv"] + #[link_name = "llvm.ppc.altivec.vslv"] fn vslv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; - #[link_name = "llvm.ppc.altivec.srv"] + #[link_name = "llvm.ppc.altivec.vsrv"] fn vsrv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; #[link_name = "llvm.fshl.v16i8"] diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 1e8b9f1896..ae5c37ce01 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -174,9 +174,9 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.vpklsfs"] fn vpklsfs(a: vector_unsigned_int, b: vector_unsigned_int) -> PackedTuple; #[link_name = "llvm.s390.vpklsgs"] fn vpklsgs(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> PackedTuple; - #[link_name = "llvm.s390.vuplbw"] fn vuplbw (a: vector_signed_char) -> vector_signed_short; + #[link_name = "llvm.s390.vuplb"] fn vuplb (a: vector_signed_char) -> vector_signed_short; #[link_name = "llvm.s390.vuplhw"] fn vuplhw (a: vector_signed_short) -> vector_signed_int; - #[link_name = "llvm.s390.vuplfw"] fn vuplfw (a: vector_signed_int) -> vector_signed_long_long; + #[link_name = "llvm.s390.vuplf"] fn vuplf (a: vector_signed_int) -> vector_signed_long_long; #[link_name = "llvm.s390.vupllb"] fn vupllb (a: vector_unsigned_char) -> vector_unsigned_short; #[link_name = "llvm.s390.vupllh"] fn vupllh (a: vector_unsigned_short) -> vector_unsigned_int; #[link_name = "llvm.s390.vupllf"] fn vupllf (a: vector_unsigned_int) -> vector_unsigned_long_long; @@ -2581,9 +2581,9 @@ mod sealed { // FIXME(llvm): a shuffle + simd_as does not currently optimize into a single instruction like // unpachk above. Tracked in https://github.com/llvm/llvm-project/issues/129576. - impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplbw (vector_signed_char) -> vector_signed_short} + impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplb (vector_signed_char) -> vector_signed_short} impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplhw (vector_signed_short) -> vector_signed_int} - impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplfw (vector_signed_int) -> vector_signed_long_long} + impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplf (vector_signed_int) -> vector_signed_long_long} impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllb (vector_unsigned_char) -> vector_unsigned_short} impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllh (vector_unsigned_short) -> vector_unsigned_int} @@ -3011,9 +3011,9 @@ mod sealed { #[inline] #[target_feature(enable = "vector")] unsafe fn vec_sel(self, b: Self, c: t_u!($ty)) -> Self { - let b = simd_and(b, transmute(c)); - let a = simd_and(self, simd_xor(transmute(c), transmute(vector_signed_char([!0; 16])))); - simd_or(a, b) + let b = simd_and(transmute(b), c); + let a = simd_and(transmute(self), simd_xor(c, transmute(vector_signed_char([!0; 16])))); + transmute(simd_or(a, b)) } } @@ -3198,14 +3198,14 @@ mod sealed { #[unstable(feature = "stdarch_s390x", issue = "135681")] impl VectorSearchString for $ty { #[inline] - #[target_feature(enable = "vector")] + #[target_feature(enable = "vector-enhancements-2")] unsafe fn vec_search_string_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) { let PackedTuple { x,y } = $intr_s(transmute(self), transmute(b), c); (x, y) } #[inline] - #[target_feature(enable = "vector")] + #[target_feature(enable = "vector-enhancements-2")] unsafe fn vec_search_string_until_zero_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) { let PackedTuple { x,y } = $intr_sz(transmute(self), transmute(b), c); (x, y) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 9a8c08b01b..df1cb63be3 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -573,7 +573,7 @@ pub fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_dp_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); - unsafe { vdpps(a, b, IMM8) } + unsafe { vdpps(a, b, IMM8 as i8) } } /// Horizontal addition of adjacent pairs in the two packed vectors @@ -3043,7 +3043,7 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx.round.ps.256"] fn roundps256(a: __m256, b: i32) -> __m256; #[link_name = "llvm.x86.avx.dp.ps.256"] - fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256; + fn vdpps(a: __m256, b: __m256, imm8: i8) -> __m256; #[link_name = "llvm.x86.avx.hadd.pd.256"] fn vhaddpd(a: __m256d, b: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.hadd.ps.256"] diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index 1c488c6d74..21f20f9c75 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -2146,7 +2146,7 @@ pub fn _mm256_movemask_epi8(a: __m256i) -> i32 { #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_mpsadbw_epu8(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8)) } + unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) } } /// Multiplies the low 32-bit integers from each packed 64-bit element in @@ -3800,7 +3800,7 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx2.maskstore.q.256"] fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4); #[link_name = "llvm.x86.avx2.mpsadbw"] - fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16; + fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16; #[link_name = "llvm.x86.avx2.pmul.hr.sw"] fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.packsswb"] diff --git a/crates/core_arch/src/x86/avx512bf16.rs b/crates/core_arch/src/x86/avx512bf16.rs index ca45761d08..940d3bbd67 100644 --- a/crates/core_arch/src/x86/avx512bf16.rs +++ b/crates/core_arch/src/x86/avx512bf16.rs @@ -22,11 +22,11 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.512"] fn cvtneps2bf16_512(a: f32x16) -> i16x16; #[link_name = "llvm.x86.avx512bf16.dpbf16ps.128"] - fn dpbf16ps(a: f32x4, b: i32x4, c: i32x4) -> f32x4; + fn dpbf16ps(a: f32x4, b: i16x8, c: i16x8) -> f32x4; #[link_name = "llvm.x86.avx512bf16.dpbf16ps.256"] - fn dpbf16ps_256(a: f32x8, b: i32x8, c: i32x8) -> f32x8; + fn dpbf16ps_256(a: f32x8, b: i16x16, c: i16x16) -> f32x8; #[link_name = "llvm.x86.avx512bf16.dpbf16ps.512"] - fn dpbf16ps_512(a: f32x16, b: i32x16, c: i32x16) -> f32x16; + fn dpbf16ps_512(a: f32x16, b: i16x32, c: i16x32) -> f32x16; } /// Convert packed single-precision (32-bit) floating-point elements in two 128-bit vectors @@ -250,7 +250,7 @@ pub fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] pub fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 { - unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i32x4(), b.as_i32x4())) } + unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i16x8(), b.as_i16x8())) } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -295,7 +295,7 @@ pub fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] pub fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 { - unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i32x8(), b.as_i32x8())) } + unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i16x16(), b.as_i16x16())) } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -341,7 +341,7 @@ pub fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] pub fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 { - unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i32x16(), b.as_i32x16())) } + unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i16x32(), b.as_i16x32())) } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, diff --git a/crates/core_arch/src/x86/rdtsc.rs b/crates/core_arch/src/x86/rdtsc.rs index e714aa863b..3b348153d6 100644 --- a/crates/core_arch/src/x86/rdtsc.rs +++ b/crates/core_arch/src/x86/rdtsc.rs @@ -46,15 +46,17 @@ pub unsafe fn _rdtsc() -> u64 { #[cfg_attr(test, assert_instr(rdtscp))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __rdtscp(aux: *mut u32) -> u64 { - rdtscp(aux as *mut _) + let (tsc, auxval) = rdtscp(); + *aux = auxval; + tsc } #[allow(improper_ctypes)] -unsafe extern "C" { +unsafe extern "unadjusted" { #[link_name = "llvm.x86.rdtsc"] fn rdtsc() -> u64; #[link_name = "llvm.x86.rdtscp"] - fn rdtscp(aux: *mut u8) -> u64; + fn rdtscp() -> (u64, u32); } #[cfg(test)] diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index d4d5f77512..c39547d926 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -2943,7 +2943,7 @@ intrinsics: - LLVMLink: name: "rbit.{neon_type}" links: - - link: "llvm.aarch64.neon.rbit.{neon_type}" + - link: "llvm.bitreverse.{neon_type}" arch: aarch64,arm64ec - name: "vrbit{neon_type[0].no}" @@ -3096,7 +3096,7 @@ intrinsics: - LLVMLink: name: "frintn.{neon_type}" links: - - link: "llvm.aarch64.neon.frintn.{neon_type}" + - link: "llvm.roundeven.{neon_type}" arch: aarch64,arm64ec - name: "vrndns_{type}" diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index d58c2ed10e..89e02ec3be 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -2469,7 +2469,7 @@ intrinsics: - LLVMLink: name: "llvm.frinn.{neon_type}" links: - - link: "llvm.aarch64.neon.frintn.{neon_type}" + - link: "llvm.roundeven.{neon_type}" arch: aarch64,arm64ec - link: "llvm.arm.neon.vrintn.{neon_type}" arch: arm @@ -2492,7 +2492,7 @@ intrinsics: - LLVMLink: name: "llvm.frinn.{neon_type}" links: - - link: "llvm.aarch64.neon.frintn.{neon_type}" + - link: "llvm.roundeven.{neon_type}" arch: aarch64,arm64ec - link: "llvm.arm.neon.vrintn.{neon_type}" arch: arm @@ -13202,6 +13202,7 @@ intrinsics: return_type: "{neon_type[0]}" safety: safe attr: + - *target-is-arm - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] - *neon-cfg-arm-unstable @@ -13227,6 +13228,7 @@ intrinsics: return_type: "{neon_type[0]}" safety: safe attr: + - *target-is-arm - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] - *neon-cfg-arm-unstable @@ -13812,8 +13814,8 @@ intrinsics: return_type: "{neon_type[1]}" safety: safe attr: - #- *target-is-arm - #- *neon-v7 + - *target-is-arm + - *neon-v7 - *neon-arm-unstable types: - ['_v8i8', "int8x8_t", '8']