Skip to content

Commit 18bcbf7

Browse files
author
Smita Kamath
committedOct 21, 2024
8341052: SHA-512 implementation using SHA-NI
Reviewed-by: jbhateja, ascarpino, sviswanathan, sparasa
1 parent 54a744b commit 18bcbf7

File tree

10 files changed

+260
-19
lines changed

10 files changed

+260
-19
lines changed
 

Diff for: ‎src/hotspot/cpu/x86/assembler_x86.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -6751,6 +6751,27 @@ void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
67516751
emit_int16((unsigned char)0xCD, (0xC0 | encode));
67526752
}
67536753

6754+
void Assembler::sha512msg1(XMMRegister dst, XMMRegister src) {
6755+
assert(VM_Version::supports_sha512() && VM_Version::supports_avx(), "");
6756+
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
6757+
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
6758+
emit_int16((unsigned char)0xCC, (0xC0 | encode));
6759+
}
6760+
6761+
void Assembler::sha512msg2(XMMRegister dst, XMMRegister src) {
6762+
assert(VM_Version::supports_sha512() && VM_Version::supports_avx(), "");
6763+
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
6764+
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
6765+
emit_int16((unsigned char)0xCD, (0xC0 | encode));
6766+
}
6767+
6768+
void Assembler::sha512rnds2(XMMRegister dst, XMMRegister nds, XMMRegister src) {
6769+
assert(VM_Version::supports_sha512() && VM_Version::supports_avx(), "");
6770+
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
6771+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
6772+
emit_int16((unsigned char)0xCB, (0xC0 | encode));
6773+
}
6774+
67546775
void Assembler::shll(Register dst, int imm8) {
67556776
assert(isShiftCount(imm8), "illegal shift count");
67566777
int encode = prefix_and_encode(dst->encoding());
@@ -11670,6 +11691,19 @@ void Assembler::evbroadcasti64x2(XMMRegister dst, Address src, int vector_len) {
1167011691
emit_operand(dst, src, 0);
1167111692
}
1167211693

11694+
void Assembler::vbroadcasti128(XMMRegister dst, Address src, int vector_len) {
11695+
assert(VM_Version::supports_avx2(), "");
11696+
assert(vector_len == AVX_256bit, "");
11697+
assert(dst != xnoreg, "sanity");
11698+
InstructionMark im(this);
11699+
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
11700+
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
11701+
// swap src<->dst for encoding
11702+
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
11703+
emit_int8(0x5A);
11704+
emit_operand(dst, src, 0);
11705+
}
11706+
1167311707
// scalar single/double precision replicate
1167411708

1167511709
// duplicate single precision data from src into programmed locations in dest : requires AVX512VL

Diff for: ‎src/hotspot/cpu/x86/assembler_x86.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -2345,6 +2345,9 @@ class Assembler : public AbstractAssembler {
23452345
void sha256rnds2(XMMRegister dst, XMMRegister src);
23462346
void sha256msg1(XMMRegister dst, XMMRegister src);
23472347
void sha256msg2(XMMRegister dst, XMMRegister src);
2348+
void sha512rnds2(XMMRegister dst, XMMRegister nds, XMMRegister src);
2349+
void sha512msg1(XMMRegister dst, XMMRegister src);
2350+
void sha512msg2(XMMRegister dst, XMMRegister src);
23482351

23492352
void shldl(Register dst, Register src);
23502353
void eshldl(Register dst, Register src1, Register src2, bool no_flags);
@@ -3035,6 +3038,7 @@ class Assembler : public AbstractAssembler {
30353038
void evbroadcasti32x4(XMMRegister dst, Address src, int vector_len);
30363039
void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
30373040
void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
3041+
void vbroadcasti128(XMMRegister dst, Address src, int vector_len);
30383042

30393043
// scalar single/double/128bit precision replicate
30403044
void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);

Diff for: ‎src/hotspot/cpu/x86/macroAssembler_x86.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -3482,6 +3482,17 @@ void MacroAssembler::vpbroadcastd(XMMRegister dst, AddressLiteral src, int vecto
34823482
}
34833483
}
34843484

3485+
void MacroAssembler::vbroadcasti128(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
3486+
assert(rscratch != noreg || always_reachable(src), "missing");
3487+
3488+
if (reachable(src)) {
3489+
Assembler::vbroadcasti128(dst, as_Address(src), vector_len);
3490+
} else {
3491+
lea(rscratch, src);
3492+
Assembler::vbroadcasti128(dst, Address(rscratch, 0), vector_len);
3493+
}
3494+
}
3495+
34853496
void MacroAssembler::vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
34863497
assert(rscratch != noreg || always_reachable(src), "missing");
34873498

Diff for: ‎src/hotspot/cpu/x86/macroAssembler_x86.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,7 @@ class MacroAssembler: public Assembler {
11181118
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
11191119
Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block,
11201120
XMMRegister shuf_mask);
1121+
void sha512_update_ni_x1(Register arg_hash, Register arg_msg, Register ofs, Register limit, bool multi_block);
11211122
#endif // _LP64
11221123

11231124
void fast_md5(Register buf, Address state, Address ofs, Address limit,
@@ -1216,6 +1217,9 @@ class MacroAssembler: public Assembler {
12161217
void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); }
12171218
void addpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
12181219

1220+
using Assembler::vbroadcasti128;
1221+
void vbroadcasti128(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
1222+
12191223
using Assembler::vbroadcastsd;
12201224
void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
12211225

Diff for: ‎src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp

+179
Original file line numberDiff line numberDiff line change
@@ -1519,5 +1519,184 @@ void MacroAssembler::sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegiste
15191519
}
15201520
}
15211521

1522+
//Implemented using Intel IpSec implementation (intel-ipsec-mb on github)
1523+
void MacroAssembler::sha512_update_ni_x1(Register arg_hash, Register arg_msg, Register ofs, Register limit, bool multi_block) {
1524+
Label done_hash, block_loop;
1525+
address K512_W = StubRoutines::x86::k512_W_addr();
1526+
1527+
vbroadcasti128(xmm15, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_addr_sha512()), Assembler::AVX_256bit, r10);
1528+
1529+
//load current hash value and transform
1530+
vmovdqu(xmm0, Address(arg_hash));
1531+
vmovdqu(xmm1, Address(arg_hash, 32));
1532+
//ymm0 = D C B A, ymm1 = H G F E
1533+
vperm2i128(xmm2, xmm0, xmm1, 0x20);
1534+
vperm2i128(xmm3, xmm0, xmm1, 0x31);
1535+
//ymm2 = F E B A, ymm3 = H G D C
1536+
vpermq(xmm13, xmm2, 0x1b, Assembler::AVX_256bit);
1537+
vpermq(xmm14, xmm3, 0x1b, Assembler::AVX_256bit);
1538+
//ymm13 = A B E F, ymm14 = C D G H
1539+
1540+
lea(rax, ExternalAddress(K512_W));
1541+
align(32);
1542+
bind(block_loop);
1543+
vmovdqu(xmm11, xmm13);//ABEF
1544+
vmovdqu(xmm12, xmm14);//CDGH
1545+
1546+
//R0 - R3
1547+
vmovdqu(xmm0, Address(arg_msg, 0 * 32));
1548+
vpshufb(xmm3, xmm0, xmm15, Assembler::AVX_256bit);//ymm0 / ymm3 = W[0..3]
1549+
vpaddq(xmm0, xmm3, Address(rax, 0 * 32), Assembler::AVX_256bit);
1550+
sha512rnds2(xmm12, xmm11, xmm0);
1551+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1552+
sha512rnds2(xmm11, xmm12, xmm0);
1553+
1554+
//R4 - R7
1555+
vmovdqu(xmm0, Address(arg_msg, 1 * 32));
1556+
vpshufb(xmm4, xmm0, xmm15, Assembler::AVX_256bit);//ymm0 / ymm4 = W[4..7]
1557+
vpaddq(xmm0, xmm4, Address(rax, 1 * 32), Assembler::AVX_256bit);
1558+
sha512rnds2(xmm12, xmm11, xmm0);
1559+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1560+
sha512rnds2(xmm11, xmm12, xmm0);
1561+
sha512msg1(xmm3, xmm4); //ymm3 = W[0..3] + S0(W[1..4])
1562+
1563+
//R8 - R11
1564+
vmovdqu(xmm0, Address(arg_msg, 2 * 32));
1565+
vpshufb(xmm5, xmm0, xmm15, Assembler::AVX_256bit);//ymm0 / ymm5 = W[8..11]
1566+
vpaddq(xmm0, xmm5, Address(rax, 2 * 32), Assembler::AVX_256bit);
1567+
sha512rnds2(xmm12, xmm11, xmm0);
1568+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1569+
sha512rnds2(xmm11, xmm12, xmm0);
1570+
sha512msg1(xmm4, xmm5);//ymm4 = W[4..7] + S0(W[5..8])
1571+
1572+
//R12 - R15
1573+
vmovdqu(xmm0, Address(arg_msg, 3 * 32));
1574+
vpshufb(xmm6, xmm0, xmm15, Assembler::AVX_256bit); //ymm0 / ymm6 = W[12..15]
1575+
vpaddq(xmm0, xmm6, Address(rax, 3 * 32), Assembler::AVX_256bit);
1576+
vpermq(xmm8, xmm6, 0x1b, Assembler::AVX_256bit); //ymm8 = W[12] W[13] W[14] W[15]
1577+
vpermq(xmm9, xmm5, 0x39, Assembler::AVX_256bit); //ymm9 = W[8] W[11] W[10] W[9]
1578+
vpblendd(xmm8, xmm8, xmm9, 0x3f, Assembler::AVX_256bit); //ymm8 = W[12] W[11] W[10] W[9]
1579+
vpaddq(xmm3, xmm3, xmm8, Assembler::AVX_256bit);
1580+
sha512msg2(xmm3, xmm6);//W[16..19] = xmm3 + W[9..12] + S1(W[14..17])
1581+
sha512rnds2(xmm12, xmm11, xmm0);
1582+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1583+
sha512rnds2(xmm11, xmm12, xmm0);
1584+
sha512msg1(xmm5, xmm6); //ymm5 = W[8..11] + S0(W[9..12])
1585+
1586+
//R16 - R19, R32 - R35, R48 - R51
1587+
for (int i = 4, j = 3; j > 0; j--) {
1588+
vpaddq(xmm0, xmm3, Address(rax, i * 32), Assembler::AVX_256bit);
1589+
vpermq(xmm8, xmm3, 0x1b, Assembler::AVX_256bit);//ymm8 = W[16] W[17] W[18] W[19]
1590+
vpermq(xmm9, xmm6, 0x39, Assembler::AVX_256bit);//ymm9 = W[12] W[15] W[14] W[13]
1591+
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//xmm7 = W[16] W[15] W[14] W[13]
1592+
vpaddq(xmm4, xmm4, xmm7, Assembler::AVX_256bit);//ymm4 = W[4..7] + S0(W[5..8]) + W[13..16]
1593+
sha512msg2(xmm4, xmm3);//ymm4 += S1(W[14..17])
1594+
sha512rnds2(xmm12, xmm11, xmm0);
1595+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1596+
sha512rnds2(xmm11, xmm12, xmm0);
1597+
sha512msg1(xmm6, xmm3); //ymm6 = W[12..15] + S0(W[13..16])
1598+
i += 1;
1599+
//R20 - R23, R36 - R39, R52 - R55
1600+
vpaddq(xmm0, xmm4, Address(rax, i * 32), Assembler::AVX_256bit);
1601+
vpermq(xmm8, xmm4, 0x1b, Assembler::AVX_256bit);//ymm8 = W[20] W[21] W[22] W[23]
1602+
vpermq(xmm9, xmm3, 0x39, Assembler::AVX_256bit);//ymm9 = W[16] W[19] W[18] W[17]
1603+
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[20] W[19] W[18] W[17]
1604+
vpaddq(xmm5, xmm5, xmm7, Assembler::AVX_256bit);//ymm5 = W[8..11] + S0(W[9..12]) + W[17..20]
1605+
sha512msg2(xmm5, xmm4);//ymm5 += S1(W[18..21])
1606+
sha512rnds2(xmm12, xmm11, xmm0);
1607+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1608+
sha512rnds2(xmm11, xmm12, xmm0);
1609+
sha512msg1(xmm3, xmm4); //ymm3 = W[16..19] + S0(W[17..20])
1610+
i += 1;
1611+
//R24 - R27, R40 - R43, R56 - R59
1612+
vpaddq(xmm0, xmm5, Address(rax, i * 32), Assembler::AVX_256bit);
1613+
vpermq(xmm8, xmm5, 0x1b, Assembler::AVX_256bit);//ymm8 = W[24] W[25] W[26] W[27]
1614+
vpermq(xmm9, xmm4, 0x39, Assembler::AVX_256bit);//ymm9 = W[20] W[23] W[22] W[21]
1615+
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[24] W[23] W[22] W[21]
1616+
vpaddq(xmm6, xmm6, xmm7, Assembler::AVX_256bit);//ymm6 = W[12..15] + S0(W[13..16]) + W[21..24]
1617+
sha512msg2(xmm6, xmm5);//ymm6 += S1(W[22..25])
1618+
sha512rnds2(xmm12, xmm11, xmm0);
1619+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1620+
sha512rnds2(xmm11, xmm12, xmm0);
1621+
sha512msg1(xmm4, xmm5);//ymm4 = W[20..23] + S0(W[21..24])
1622+
i += 1;
1623+
//R28 - R31, R44 - R47, R60 - R63
1624+
vpaddq(xmm0, xmm6, Address(rax, i * 32), Assembler::AVX_256bit);
1625+
vpermq(xmm8, xmm6, 0x1b, Assembler::AVX_256bit);//ymm8 = W[28] W[29] W[30] W[31]
1626+
vpermq(xmm9, xmm5, 0x39, Assembler::AVX_256bit);//ymm9 = W[24] W[27] W[26] W[25]
1627+
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[28] W[27] W[26] W[25]
1628+
vpaddq(xmm3, xmm3, xmm7, Assembler::AVX_256bit);//ymm3 = W[16..19] + S0(W[17..20]) + W[25..28]
1629+
sha512msg2(xmm3, xmm6); //ymm3 += S1(W[26..29])
1630+
sha512rnds2(xmm12, xmm11, xmm0);
1631+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1632+
sha512rnds2(xmm11, xmm12, xmm0);
1633+
sha512msg1(xmm5, xmm6);//ymm5 = W[24..27] + S0(W[25..28])
1634+
i += 1;
1635+
}
1636+
//R64 - R67
1637+
vpaddq(xmm0, xmm3, Address(rax, 16 * 32), Assembler::AVX_256bit);
1638+
vpermq(xmm8, xmm3, 0x1b, Assembler::AVX_256bit);//ymm8 = W[64] W[65] W[66] W[67]
1639+
vpermq(xmm9, xmm6, 0x39, Assembler::AVX_256bit);//ymm9 = W[60] W[63] W[62] W[61]
1640+
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[64] W[63] W[62] W[61]
1641+
vpaddq(xmm4, xmm4, xmm7, Assembler::AVX_256bit);//ymm4 = W[52..55] + S0(W[53..56]) + W[61..64]
1642+
sha512msg2(xmm4, xmm3);//ymm4 += S1(W[62..65])
1643+
sha512rnds2(xmm12, xmm11, xmm0);
1644+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1645+
sha512rnds2(xmm11, xmm12, xmm0);
1646+
sha512msg1(xmm6, xmm3);//ymm6 = W[60..63] + S0(W[61..64])
1647+
1648+
//R68 - R71
1649+
vpaddq(xmm0, xmm4, Address(rax, 17 * 32), Assembler::AVX_256bit);
1650+
vpermq(xmm8, xmm4, 0x1b, Assembler::AVX_256bit);//ymm8 = W[68] W[69] W[70] W[71]
1651+
vpermq(xmm9, xmm3, 0x39, Assembler::AVX_256bit);//ymm9 = W[64] W[67] W[66] W[65]
1652+
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[68] W[67] W[66] W[65]
1653+
vpaddq(xmm5, xmm5, xmm7, Assembler::AVX_256bit);//ymm5 = W[56..59] + S0(W[57..60]) + W[65..68]
1654+
sha512msg2(xmm5, xmm4);//ymm5 += S1(W[66..69])
1655+
sha512rnds2(xmm12, xmm11, xmm0);
1656+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1657+
sha512rnds2(xmm11, xmm12, xmm0);
1658+
1659+
//R72 - R75
1660+
vpaddq(xmm0, xmm5, Address(rax, 18 * 32), Assembler::AVX_256bit);
1661+
vpermq(xmm8, xmm5, 0x1b, Assembler::AVX_256bit);//ymm8 = W[72] W[73] W[74] W[75]
1662+
vpermq(xmm9, xmm4, 0x39, Assembler::AVX_256bit);//ymm9 = W[68] W[71] W[70] W[69]
1663+
vpblendd(xmm7, xmm8, xmm9, 0x3f, Assembler::AVX_256bit);//ymm7 = W[72] W[71] W[70] W[69]
1664+
vpaddq(xmm6, xmm6, xmm7, Assembler::AVX_256bit);//ymm6 = W[60..63] + S0(W[61..64]) + W[69..72]
1665+
sha512msg2(xmm6, xmm5);//ymm6 += S1(W[70..73])
1666+
sha512rnds2(xmm12, xmm11, xmm0);
1667+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1668+
sha512rnds2(xmm11, xmm12, xmm0);
1669+
1670+
//R76 - R79
1671+
vpaddq(xmm0, xmm6, Address(rax, 19 * 32), Assembler::AVX_256bit);
1672+
sha512rnds2(xmm12, xmm11, xmm0);
1673+
vperm2i128(xmm0, xmm0, xmm0, 0x01);
1674+
sha512rnds2(xmm11, xmm12, xmm0);
1675+
1676+
//update hash value
1677+
vpaddq(xmm14, xmm14, xmm12, Assembler::AVX_256bit);
1678+
vpaddq(xmm13, xmm13, xmm11, Assembler::AVX_256bit);
1679+
1680+
if (multi_block) {
1681+
addptr(arg_msg, 4 * 32);
1682+
addptr(ofs, 128);
1683+
cmpptr(ofs, limit);
1684+
jcc(Assembler::belowEqual, block_loop);
1685+
movptr(rax, ofs); //return ofs
1686+
}
1687+
1688+
//store the hash value back in memory
1689+
//xmm13 = ABEF
1690+
//xmm14 = CDGH
1691+
vperm2i128(xmm1, xmm13, xmm14, 0x31);
1692+
vperm2i128(xmm2, xmm13, xmm14, 0x20);
1693+
vpermq(xmm1, xmm1, 0xb1, Assembler::AVX_256bit);//ymm1 = D C B A
1694+
vpermq(xmm2, xmm2, 0xb1, Assembler::AVX_256bit);//ymm2 = H G F E
1695+
vmovdqu(Address(arg_hash, 0 * 32), xmm1);
1696+
vmovdqu(Address(arg_hash, 1 * 32), xmm2);
1697+
1698+
bind(done_hash);
1699+
}
1700+
15221701
#endif //#ifdef _LP64
15231702

Diff for: ‎src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

+17-15
Original file line numberDiff line numberDiff line change
@@ -1558,7 +1558,7 @@ address StubGenerator::generate_sha256_implCompress(bool multi_block, const char
15581558

15591559
address StubGenerator::generate_sha512_implCompress(bool multi_block, const char *name) {
15601560
assert(VM_Version::supports_avx2(), "");
1561-
assert(VM_Version::supports_bmi2(), "");
1561+
assert(VM_Version::supports_bmi2() || VM_Version::supports_sha512(), "");
15621562
__ align(CodeEntryAlignment);
15631563
StubCodeMark mark(this, "StubRoutines", name);
15641564
address start = __ pc();
@@ -1568,22 +1568,24 @@ address StubGenerator::generate_sha512_implCompress(bool multi_block, const char
15681568
Register ofs = c_rarg2;
15691569
Register limit = c_rarg3;
15701570

1571-
const XMMRegister msg = xmm0;
1572-
const XMMRegister state0 = xmm1;
1573-
const XMMRegister state1 = xmm2;
1574-
const XMMRegister msgtmp0 = xmm3;
1575-
const XMMRegister msgtmp1 = xmm4;
1576-
const XMMRegister msgtmp2 = xmm5;
1577-
const XMMRegister msgtmp3 = xmm6;
1578-
const XMMRegister msgtmp4 = xmm7;
1579-
1580-
const XMMRegister shuf_mask = xmm8;
1581-
15821571
__ enter();
15831572

1584-
__ sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
1585-
buf, state, ofs, limit, rsp, multi_block, shuf_mask);
1586-
1573+
if (VM_Version::supports_sha512()) {
1574+
__ sha512_update_ni_x1(state, buf, ofs, limit, multi_block);
1575+
} else {
1576+
const XMMRegister msg = xmm0;
1577+
const XMMRegister state0 = xmm1;
1578+
const XMMRegister state1 = xmm2;
1579+
const XMMRegister msgtmp0 = xmm3;
1580+
const XMMRegister msgtmp1 = xmm4;
1581+
const XMMRegister msgtmp2 = xmm5;
1582+
const XMMRegister msgtmp3 = xmm6;
1583+
const XMMRegister msgtmp4 = xmm7;
1584+
1585+
const XMMRegister shuf_mask = xmm8;
1586+
__ sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
1587+
buf, state, ofs, limit, rsp, multi_block, shuf_mask);
1588+
}
15871589
__ vzeroupper();
15881590
__ leave();
15891591
__ ret(0);

Diff for: ‎src/hotspot/cpu/x86/vm_version_x86.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -1060,6 +1060,7 @@ void VM_Version::get_processor_features() {
10601060
_features &= ~CPU_AVX;
10611061
_features &= ~CPU_VZEROUPPER;
10621062
_features &= ~CPU_F16C;
1063+
_features &= ~CPU_SHA512;
10631064
}
10641065

10651066
if (logical_processors_per_package() == 1) {
@@ -1304,7 +1305,7 @@ void VM_Version::get_processor_features() {
13041305

13051306
#ifdef _LP64
13061307
// These are only supported on 64-bit
1307-
if (UseSHA && supports_avx2() && supports_bmi2()) {
1308+
if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
13081309
if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
13091310
FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
13101311
}
@@ -3007,6 +3008,8 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
30073008
xem_xcr0_eax.bits.ymm != 0) {
30083009
result |= CPU_AVX;
30093010
result |= CPU_VZEROUPPER;
3011+
if (sefsl1_cpuid7_eax.bits.sha512 != 0)
3012+
result |= CPU_SHA512;
30103013
if (std_cpuid1_ecx.bits.f16c != 0)
30113014
result |= CPU_F16C;
30123015
if (sef_cpuid7_ebx.bits.avx2 != 0) {

Diff for: ‎src/hotspot/cpu/x86/vm_version_x86.hpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,8 @@ class VM_Version : public Abstract_VM_Version {
283283
union SefCpuid7SubLeaf1Eax {
284284
uint32_t value;
285285
struct {
286-
uint32_t : 23,
286+
uint32_t sha512 : 1,
287+
: 22,
287288
avx_ifma : 1,
288289
: 8;
289290
} bits;
@@ -415,7 +416,8 @@ class VM_Version : public Abstract_VM_Version {
415416
decl(CET_SS, "cet_ss", 57) /* Control Flow Enforcement - Shadow Stack */ \
416417
decl(AVX512_IFMA, "avx512_ifma", 58) /* Integer Vector FMA instructions*/ \
417418
decl(AVX_IFMA, "avx_ifma", 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/ \
418-
decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/
419+
decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/\
420+
decl(SHA512, "sha512", 61) /* SHA512 instructions*/
419421

420422
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
421423
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
@@ -757,6 +759,7 @@ class VM_Version : public Abstract_VM_Version {
757759
static bool supports_ospke() { return (_features & CPU_OSPKE) != 0; }
758760
static bool supports_cet_ss() { return (_features & CPU_CET_SS) != 0; }
759761
static bool supports_cet_ibt() { return (_features & CPU_CET_IBT) != 0; }
762+
static bool supports_sha512() { return (_features & CPU_SHA512) != 0; }
760763

761764
//
762765
// Feature identification not affected by VM flags

Diff for: ‎src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/amd64/AMD64.java

+1
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ public enum CPUFeature implements CPUFeatureName {
256256
AVX512_IFMA,
257257
AVX_IFMA,
258258
APX_F,
259+
SHA512,
259260
}
260261

261262
private final EnumSet<CPUFeature> features;

0 commit comments

Comments
 (0)
Please sign in to comment.