diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index eb4a7b6dbf..928dfc7688 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -221,6 +221,10 @@ jobs: shell: bash if: startsWith(matrix.target.tuple, 'thumb') || matrix.target.tuple == 'nvptx64-nvidia-cuda' + - run: objdump --version + if: matrix.target.tuple == 'x86_64-apple-darwin' + shell: bash + # Windows & OSX go straight to `run.sh` ... - run: ./ci/run.sh shell: bash diff --git a/crates/core_arch/src/x86/sha.rs b/crates/core_arch/src/x86/sha.rs index 144677818a..dc82b278da 100644 --- a/crates/core_arch/src/x86/sha.rs +++ b/crates/core_arch/src/x86/sha.rs @@ -16,6 +16,12 @@ extern "C" { fn sha256msg2(a: i32x4, b: i32x4) -> i32x4; #[link_name = "llvm.x86.sha256rnds2"] fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4; + #[link_name = "llvm.x86.vsha512msg1"] + fn vsha512msg1(a: i64x4, b: i64x2) -> i64x4; + #[link_name = "llvm.x86.vsha512msg2"] + fn vsha512msg2(a: i64x4, b: i64x4) -> i64x4; + #[link_name = "llvm.x86.vsha512rnds2"] + fn vsha512rnds2(a: i64x4, b: i64x4, c: i64x2) -> i64x4; } #[cfg(test)] @@ -118,6 +124,43 @@ pub unsafe fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m12 transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) } +/// Performs an intermediate calculation for the next four SHA512 message qwords. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg1_epi64) +#[inline] +#[target_feature(enable = "sha512,avx")] +#[cfg_attr(test, assert_instr(vsha512msg1))] +#[unstable(feature = "sha512", issue = "none")] +pub unsafe fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i { + transmute(vsha512msg1(a.as_i64x4(), b.as_i64x2())) +} + +/// Performs the final calculation for the next four SHA512 message qwords. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg2_epi64) +#[inline] +#[target_feature(enable = "sha512,avx")] +#[cfg_attr(test, assert_instr(vsha512msg2))] +#[unstable(feature = "sha512", issue = "none")] +pub unsafe fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i { + transmute(vsha512msg2(a.as_i64x4(), b.as_i64x4())) +} + +/// Performs two rounds of SHA512 operation using initial SHA512 state (C,D,G,H) from `a`, +/// an initial SHA512 state (A,B,E,F) from `b`, and a pre-computed sum of the next two +/// round message qwords and the corresponding round constants from `c` (only the two +/// lower qwords of the third operand). The updated SHA512 state (A,B,E,F) is returned, and +/// can be used as the updated state (C,D,G,H) in later rounds. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512rnds2_epi64) +#[inline] +#[target_feature(enable = "sha512,avx")] +#[cfg_attr(test, assert_instr(vsha512rnds2))] +#[unstable(feature = "sha512", issue = "none")] +pub unsafe fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, c: __m128i) -> __m256i { + transmute(vsha512rnds2(a.as_i64x4(), b.as_i64x4(), c.as_i64x2())) +} + #[cfg(test)] mod tests { use std::{ @@ -215,4 +258,35 @@ mod tests { let r = _mm_sha256rnds2_epu32(a, b, k); assert_eq_m128i(r, expected); } + + #[simd_test(enable = "sha512,avx")] + #[allow(overflowing_literals)] + unsafe fn test_mm256_sha512msg1_epi64() { + let a = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm256_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee, 0x0, 0x0); + let r = _mm256_sha512msg1_epi64(a, b); + assert_eq_m256i(r, expected); + } + + #[simd_test(enable = "sha512,avx")] + #[allow(overflowing_literals)] + unsafe fn test_mm256_sha512msg2_epi64() { + let a = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0); + let b = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0); + let expected = _mm256_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35, 0x0, 0x0); + let r = _mm256_sha512msg2_epi64(a, b); + assert_eq_m256i(r, expected); + } + + #[simd_test(enable = "sha512,avx")] + #[allow(overflowing_literals)] + unsafe fn test_mm256_sha512rnds2_epi64() { + let a = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0); + let b = _mm256_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b, 0x0, 0x0); + let k = _mm_set_epi64x(0, 0x12835b01d807aa98); + let expected = _mm256_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19, 0x0, 0x0); + let r = _mm256_sha512rnds2_epi64(a, b, k); + assert_eq_m256i(r, expected); + } }