Skip to content

Commit e5a68c6

Browse files
authored
Merge pull request #302 from pq-code-package/keccak
Add native Keccak backend for x86_64 and AArch64
2 parents 64fd937 + 5b877f5 commit e5a68c6

23 files changed

+3539
-1
lines changed

mldsa/common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,7 @@
4848
* The following is to avoid compilers complaining about this. */
4949
#define MLD_EMPTY_CU(s) extern int MLD_NAMESPACE(empty_cu_##s);
5050

51+
#if defined(MLD_CONFIG_USE_NATIVE_BACKEND_FIPS202)
52+
#include MLD_CONFIG_FIPS202_BACKEND_FILE
53+
#endif
5154
#endif /* !MLD_COMMON_H */

mldsa/config.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,21 @@
4545
#define MLD_CONFIG_ARITH_BACKEND_FILE "native/meta.h"
4646
#endif
4747

48+
/******************************************************************************
49+
* Name: MLD_CONFIG_FIPS202_BACKEND_FILE
50+
*
51+
* Description: The FIPS-202 backend to use.
52+
*
53+
* If MLD_CONFIG_USE_NATIVE_BACKEND_FIPS202 is set, this option
54+
* must either be undefined or the filename of a FIPS202 backend.
55+
* If unset, the default backend will be used.
56+
*
57+
* This can be set using CFLAGS.
58+
*
59+
*****************************************************************************/
60+
#if defined(MLD_CONFIG_USE_NATIVE_BACKEND_FIPS202) && \
61+
!defined(MLD_CONFIG_FIPS202_BACKEND_FILE)
62+
#define MLD_CONFIG_FIPS202_BACKEND_FILE "fips202/native/auto.h"
63+
#endif
64+
4865
#endif /* !MLD_CONFIG_H */

mldsa/fips202/keccakf1600.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <stdint.h>
3232

3333
#include "keccakf1600.h"
34+
#if !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
3435

3536
#define MLD_KECCAK_NROUNDS 24
3637
#define MLD_KECCAK_ROL(a, offset) ((a << offset) ^ (a >> (64 - offset)))
@@ -111,12 +112,20 @@ void mld_keccakf1600x4_xor_bytes(uint64_t *state, const unsigned char *data0,
111112

112113
void mld_keccakf1600x4_permute(uint64_t *state)
113114
{
115+
#if defined(MLD_USE_FIPS202_X4_NATIVE)
116+
mld_keccak_f1600_x4_native(state);
117+
#elif defined(MLD_USE_FIPS202_X2_NATIVE)
118+
mld_keccak_f1600_x2_native(state + 0 * MLD_KECCAK_LANES);
119+
mld_keccak_f1600_x2_native(state + 2 * MLD_KECCAK_LANES);
120+
#else
114121
mld_keccakf1600_permute(state + MLD_KECCAK_LANES * 0);
115122
mld_keccakf1600_permute(state + MLD_KECCAK_LANES * 1);
116123
mld_keccakf1600_permute(state + MLD_KECCAK_LANES * 2);
117124
mld_keccakf1600_permute(state + MLD_KECCAK_LANES * 3);
125+
#endif /* !MLD_USE_FIPS202_X4_NATIVE && !MLD_USE_FIPS202_X2_NATIVE */
118126
}
119127

128+
#if !defined(MLD_USE_FIPS202_X1_NATIVE)
120129
static const uint64_t mld_KeccakF_RoundConstants[MLD_KECCAK_NROUNDS] = {
121130
(uint64_t)0x0000000000000001ULL, (uint64_t)0x0000000000008082ULL,
122131
(uint64_t)0x800000000000808aULL, (uint64_t)0x8000000080008000ULL,
@@ -396,6 +405,18 @@ void mld_keccakf1600_permute(uint64_t *state)
396405
state[23] = Aso;
397406
state[24] = Asu;
398407
}
408+
#else /* !MLD_USE_FIPS202_X1_NATIVE */
409+
void mld_keccakf1600_permute(uint64_t *state)
410+
{
411+
mld_keccak_f1600_x1_native(state);
412+
}
413+
#endif /* MLD_USE_FIPS202_X1_NATIVE */
414+
415+
#else /* !MLD_CONFIG_MULTILEVEL_NO_SHARED */
416+
417+
MLD_EMPTY_CU(keccakf1600)
418+
419+
#endif /* MLD_CONFIG_MULTILEVEL_NO_SHARED */
399420

400421
/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
401422
* Don't modify by hand -- this is auto-generated by scripts/autogen. */
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright (c) The mlkem-native project authors
3+
* Copyright (c) The mldsa-native project authors
4+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5+
*/
6+
7+
#ifndef MLD_FIPS202_NATIVE_AARCH64_AUTO_H
8+
#define MLD_FIPS202_NATIVE_AARCH64_AUTO_H
9+
/* Default FIPS202 assembly profile for AArch64 systems */
10+
11+
/*
12+
* Default logic to decide which implementation to use.
13+
*
14+
*/
15+
16+
/*
17+
* Keccak-f1600
18+
*
19+
* - On Arm-based Apple CPUs, we pick a pure Neon implementation.
20+
* - Otherwise, unless MLD_SYS_AARCH64_SLOW_BARREL_SHIFTER is set,
21+
* we use lazy-rotation scalar assembly from @[HYBRID].
22+
* - Otherwise, if MLD_SYS_AARCH64_SLOW_BARREL_SHIFTER is set, we
23+
* fall back to the standard C implementation.
24+
*/
25+
#if defined(__ARM_FEATURE_SHA3) && defined(__APPLE__)
26+
#include "x1_v84a.h"
27+
#elif !defined(MLD_SYS_AARCH64_SLOW_BARREL_SHIFTER)
28+
#include "x1_scalar.h"
29+
#endif
30+
31+
/*
32+
* Keccak-f1600x2/x4
33+
*
34+
* The optimal implementation is highly CPU-specific; see @[HYBRID].
35+
*
36+
* For now, if v8.4-A is not implemented, we fall back to Keccak-f1600.
37+
* If v8.4-A is implemented and we are on an Apple CPU, we use a plain
38+
* Neon-based implementation.
39+
* If v8.4-A is implemented and we are not on an Apple CPU, we use a
40+
* scalar/Neon/Neon hybrid.
41+
* The reason for this distinction is that Apple CPUs appear to implement
42+
* the SHA3 instructions on all SIMD units, while Arm CPUs prior to Cortex-X4
43+
* don't, and ordinary Neon instructions are still needed.
44+
*/
45+
#if defined(__ARM_FEATURE_SHA3)
46+
/*
47+
* For Apple-M cores, we use a plain implementation leveraging SHA3
48+
* instructions only.
49+
*/
50+
#if defined(__APPLE__)
51+
#include "x2_v84a.h"
52+
#else
53+
#include "x4_v8a_v84a_scalar.h"
54+
#endif
55+
56+
#else /* __ARM_FEATURE_SHA3 */
57+
58+
#include "x4_v8a_scalar.h"
59+
60+
#endif /* !__ARM_FEATURE_SHA3 */
61+
62+
#endif /* !MLD_FIPS202_NATIVE_AARCH64_AUTO_H */
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright (c) The mlkem-native project authors
3+
* Copyright (c) The mldsa-native project authors
4+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5+
*/
6+
#ifndef MLD_FIPS202_NATIVE_AARCH64_SRC_FIPS202_NATIVE_AARCH64_H
7+
#define MLD_FIPS202_NATIVE_AARCH64_SRC_FIPS202_NATIVE_AARCH64_H
8+
9+
#include <stdint.h>
10+
#include "../../../../cbmc.h"
11+
#include "../../../../common.h"
12+
13+
14+
#define mld_keccakf1600_round_constants \
15+
MLD_NAMESPACE(keccakf1600_round_constants)
16+
extern const uint64_t mld_keccakf1600_round_constants[];
17+
18+
#define mld_keccak_f1600_x1_scalar_asm MLD_NAMESPACE(keccak_f1600_x1_scalar_asm)
19+
void mld_keccak_f1600_x1_scalar_asm(uint64_t *state, uint64_t const *rc)
20+
__contract__(
21+
requires(memory_no_alias(state, sizeof(uint64_t) * 25 * 1))
22+
requires(rc == mld_keccakf1600_round_constants)
23+
assigns(memory_slice(state, sizeof(uint64_t) * 25 * 1))
24+
);
25+
26+
#define mld_keccak_f1600_x1_v84a_asm MLD_NAMESPACE(keccak_f1600_x1_v84a_asm)
27+
void mld_keccak_f1600_x1_v84a_asm(uint64_t *state, uint64_t const *rc)
28+
__contract__(
29+
requires(memory_no_alias(state, sizeof(uint64_t) * 25 * 1))
30+
requires(rc == mld_keccakf1600_round_constants)
31+
assigns(memory_slice(state, sizeof(uint64_t) * 25 * 1))
32+
);
33+
34+
#define mld_keccak_f1600_x2_v84a_asm MLD_NAMESPACE(keccak_f1600_x2_v84a_asm)
35+
void mld_keccak_f1600_x2_v84a_asm(uint64_t *state, uint64_t const *rc)
36+
__contract__(
37+
requires(memory_no_alias(state, sizeof(uint64_t) * 25 * 2))
38+
requires(rc == mld_keccakf1600_round_constants)
39+
assigns(memory_slice(state, sizeof(uint64_t) * 25 * 2))
40+
);
41+
42+
#define mld_keccak_f1600_x4_scalar_v8a_hybrid_asm \
43+
MLD_NAMESPACE(keccak_f1600_x4_scalar_v8a_hybrid_asm)
44+
void mld_keccak_f1600_x4_scalar_v8a_hybrid_asm(uint64_t *state,
45+
uint64_t const *rc)
46+
__contract__(
47+
requires(memory_no_alias(state, sizeof(uint64_t) * 25 * 4))
48+
requires(rc == mld_keccakf1600_round_constants)
49+
assigns(memory_slice(state, sizeof(uint64_t) * 25 * 4))
50+
);
51+
52+
#define mld_keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm \
53+
MLD_NAMESPACE(keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm)
54+
void mld_keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm(uint64_t *state,
55+
uint64_t const *rc)
56+
__contract__(
57+
requires(memory_no_alias(state, sizeof(uint64_t) * 25 * 4))
58+
requires(rc == mld_keccakf1600_round_constants)
59+
assigns(memory_slice(state, sizeof(uint64_t) * 25 * 4))
60+
);
61+
62+
#endif /* !MLD_FIPS202_NATIVE_AARCH64_SRC_FIPS202_NATIVE_AARCH64_H */

0 commit comments

Comments
 (0)