Skip to content

Commit 402db8a

Browse files
committed
Simpasm: run autogen with simpasm simplification
Signed-off-by: willieyz <[email protected]>
1 parent 99dbf62 commit 402db8a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+13353
-7
lines changed

BIBLIOGRAPHY.md

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ source code and documentation.
7171
- Matthias J. Kannwischer
7272
* URL: https://eprint.iacr.org/2022/1243
7373
* Referenced from:
74+
- [dev/fips202/aarch64/auto.h](dev/fips202/aarch64/auto.h)
75+
- [dev/fips202/aarch64/src/keccak_f1600_x1_v84a_asm.S](dev/fips202/aarch64/src/keccak_f1600_x1_v84a_asm.S)
76+
- [dev/fips202/aarch64/src/keccak_f1600_x2_v84a_asm.S](dev/fips202/aarch64/src/keccak_f1600_x2_v84a_asm.S)
7477
- [mldsa/fips202/native/aarch64/auto.h](mldsa/fips202/native/aarch64/auto.h)
7578
- [mldsa/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S](mldsa/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S)
7679
- [mldsa/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S](mldsa/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S)
@@ -144,16 +147,30 @@ source code and documentation.
144147
- Damien Stehlé
145148
* URL: https://github.com/pq-crystals/dilithium/tree/master/avx2
146149
* Referenced from:
150+
- [dev/x86_64/src/align.h](dev/x86_64/src/align.h)
151+
- [dev/x86_64/src/consts.c](dev/x86_64/src/consts.c)
152+
- [dev/x86_64/src/consts.h](dev/x86_64/src/consts.h)
153+
- [dev/x86_64/src/intt.S](dev/x86_64/src/intt.S)
154+
- [dev/x86_64/src/ntt.S](dev/x86_64/src/ntt.S)
155+
- [dev/x86_64/src/nttunpack.S](dev/x86_64/src/nttunpack.S)
156+
- [dev/x86_64/src/pointwise.S](dev/x86_64/src/pointwise.S)
157+
- [dev/x86_64/src/pointwise_acc_l4.S](dev/x86_64/src/pointwise_acc_l4.S)
158+
- [dev/x86_64/src/pointwise_acc_l5.S](dev/x86_64/src/pointwise_acc_l5.S)
159+
- [dev/x86_64/src/pointwise_acc_l7.S](dev/x86_64/src/pointwise_acc_l7.S)
160+
- [dev/x86_64/src/poly_caddq_avx2.c](dev/x86_64/src/poly_caddq_avx2.c)
161+
- [dev/x86_64/src/poly_chknorm_avx2.c](dev/x86_64/src/poly_chknorm_avx2.c)
162+
- [dev/x86_64/src/poly_decompose_32_avx2.c](dev/x86_64/src/poly_decompose_32_avx2.c)
163+
- [dev/x86_64/src/poly_decompose_88_avx2.c](dev/x86_64/src/poly_decompose_88_avx2.c)
164+
- [dev/x86_64/src/poly_use_hint_32_avx2.c](dev/x86_64/src/poly_use_hint_32_avx2.c)
165+
- [dev/x86_64/src/poly_use_hint_88_avx2.c](dev/x86_64/src/poly_use_hint_88_avx2.c)
166+
- [dev/x86_64/src/polyz_unpack_17_avx2.c](dev/x86_64/src/polyz_unpack_17_avx2.c)
167+
- [dev/x86_64/src/polyz_unpack_19_avx2.c](dev/x86_64/src/polyz_unpack_19_avx2.c)
168+
- [dev/x86_64/src/rej_uniform_avx2.c](dev/x86_64/src/rej_uniform_avx2.c)
169+
- [dev/x86_64/src/rej_uniform_eta2_avx2.c](dev/x86_64/src/rej_uniform_eta2_avx2.c)
170+
- [dev/x86_64/src/rej_uniform_eta4_avx2.c](dev/x86_64/src/rej_uniform_eta4_avx2.c)
147171
- [mldsa/native/x86_64/src/align.h](mldsa/native/x86_64/src/align.h)
148172
- [mldsa/native/x86_64/src/consts.c](mldsa/native/x86_64/src/consts.c)
149173
- [mldsa/native/x86_64/src/consts.h](mldsa/native/x86_64/src/consts.h)
150-
- [mldsa/native/x86_64/src/intt.S](mldsa/native/x86_64/src/intt.S)
151-
- [mldsa/native/x86_64/src/ntt.S](mldsa/native/x86_64/src/ntt.S)
152-
- [mldsa/native/x86_64/src/nttunpack.S](mldsa/native/x86_64/src/nttunpack.S)
153-
- [mldsa/native/x86_64/src/pointwise.S](mldsa/native/x86_64/src/pointwise.S)
154-
- [mldsa/native/x86_64/src/pointwise_acc_l4.S](mldsa/native/x86_64/src/pointwise_acc_l4.S)
155-
- [mldsa/native/x86_64/src/pointwise_acc_l5.S](mldsa/native/x86_64/src/pointwise_acc_l5.S)
156-
- [mldsa/native/x86_64/src/pointwise_acc_l7.S](mldsa/native/x86_64/src/pointwise_acc_l7.S)
157174
- [mldsa/native/x86_64/src/poly_caddq_avx2.c](mldsa/native/x86_64/src/poly_caddq_avx2.c)
158175
- [mldsa/native/x86_64/src/poly_chknorm_avx2.c](mldsa/native/x86_64/src/poly_chknorm_avx2.c)
159176
- [mldsa/native/x86_64/src/poly_decompose_32_avx2.c](mldsa/native/x86_64/src/poly_decompose_32_avx2.c)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Copyright (c) The mlkem-native project authors
3+
* Copyright (c) The mldsa-native project authors
4+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5+
*/
6+
7+
/* References
8+
* ==========
9+
*
10+
* - [HYBRID]
11+
* Hybrid scalar/vector implementations of Keccak and SPHINCS+ on AArch64
12+
* Becker, Kannwischer
13+
* https://eprint.iacr.org/2022/1243
14+
*/
15+
16+
#ifndef MLD_FIPS202_NATIVE_AARCH64_AUTO_H
17+
#define MLD_FIPS202_NATIVE_AARCH64_AUTO_H
18+
/* Default FIPS202 assembly profile for AArch64 systems */
19+
20+
/*
21+
* Default logic to decide which implementation to use.
22+
*
23+
*/
24+
25+
/*
26+
* Keccak-f1600
27+
*
28+
* - On Arm-based Apple CPUs, we pick a pure Neon implementation.
29+
* - Otherwise, unless MLD_SYS_AARCH64_SLOW_BARREL_SHIFTER is set,
30+
* we use lazy-rotation scalar assembly from @[HYBRID].
31+
* - Otherwise, if MLD_SYS_AARCH64_SLOW_BARREL_SHIFTER is set, we
32+
* fall back to the standard C implementation.
33+
*/
34+
#if defined(__ARM_FEATURE_SHA3) && defined(__APPLE__)
35+
#include "x1_v84a.h"
36+
#elif !defined(MLD_SYS_AARCH64_SLOW_BARREL_SHIFTER)
37+
#include "x1_scalar.h"
38+
#endif
39+
40+
/*
41+
* Keccak-f1600x2/x4
42+
*
43+
* The optimal implementation is highly CPU-specific; see @[HYBRID].
44+
*
45+
* For now, if v8.4-A is not implemented, we fall back to Keccak-f1600.
46+
* If v8.4-A is implemented and we are on an Apple CPU, we use a plain
47+
* Neon-based implementation.
48+
* If v8.4-A is implemented and we are not on an Apple CPU, we use a
49+
* scalar/Neon/Neon hybrid.
50+
* The reason for this distinction is that Apple CPUs appear to implement
51+
* the SHA3 instructions on all SIMD units, while Arm CPUs prior to Cortex-X4
52+
* don't, and ordinary Neon instructions are still needed.
53+
*/
54+
#if defined(__ARM_FEATURE_SHA3)
55+
/*
56+
* For Apple-M cores, we use a plain implementation leveraging SHA3
57+
* instructions only.
58+
*/
59+
#if defined(__APPLE__)
60+
#include "x2_v84a.h"
61+
#else
62+
#include "x4_v8a_v84a_scalar.h"
63+
#endif
64+
65+
#else /* __ARM_FEATURE_SHA3 */
66+
67+
#include "x4_v8a_scalar.h"
68+
69+
#endif /* !__ARM_FEATURE_SHA3 */
70+
71+
#endif /* !MLD_FIPS202_NATIVE_AARCH64_AUTO_H */
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright (c) The mlkem-native project authors
3+
* Copyright (c) The mldsa-native project authors
4+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5+
*/
6+
#ifndef MLD_FIPS202_NATIVE_AARCH64_SRC_FIPS202_NATIVE_AARCH64_H
7+
#define MLD_FIPS202_NATIVE_AARCH64_SRC_FIPS202_NATIVE_AARCH64_H
8+
9+
#include <stdint.h>
10+
#include "../../../../cbmc.h"
11+
#include "../../../../common.h"
12+
13+
14+
#define mld_keccakf1600_round_constants \
15+
MLD_NAMESPACE(keccakf1600_round_constants)
16+
extern const uint64_t mld_keccakf1600_round_constants[];
17+
18+
#define mld_keccak_f1600_x1_scalar_asm MLD_NAMESPACE(keccak_f1600_x1_scalar_asm)
19+
void mld_keccak_f1600_x1_scalar_asm(uint64_t *state, uint64_t const *rc)
20+
__contract__(
21+
requires(memory_no_alias(state, sizeof(uint64_t) * 25 * 1))
22+
requires(rc == mld_keccakf1600_round_constants)
23+
assigns(memory_slice(state, sizeof(uint64_t) * 25 * 1))
24+
);
25+
26+
#define mld_keccak_f1600_x1_v84a_asm MLD_NAMESPACE(keccak_f1600_x1_v84a_asm)
27+
void mld_keccak_f1600_x1_v84a_asm(uint64_t *state, uint64_t const *rc)
28+
__contract__(
29+
requires(memory_no_alias(state, sizeof(uint64_t) * 25 * 1))
30+
requires(rc == mld_keccakf1600_round_constants)
31+
assigns(memory_slice(state, sizeof(uint64_t) * 25 * 1))
32+
);
33+
34+
#define mld_keccak_f1600_x2_v84a_asm MLD_NAMESPACE(keccak_f1600_x2_v84a_asm)
35+
void mld_keccak_f1600_x2_v84a_asm(uint64_t *state, uint64_t const *rc)
36+
__contract__(
37+
requires(memory_no_alias(state, sizeof(uint64_t) * 25 * 2))
38+
requires(rc == mld_keccakf1600_round_constants)
39+
assigns(memory_slice(state, sizeof(uint64_t) * 25 * 2))
40+
);
41+
42+
#define mld_keccak_f1600_x4_scalar_v8a_hybrid_asm \
43+
MLD_NAMESPACE(keccak_f1600_x4_scalar_v8a_hybrid_asm)
44+
void mld_keccak_f1600_x4_scalar_v8a_hybrid_asm(uint64_t *state,
45+
uint64_t const *rc)
46+
__contract__(
47+
requires(memory_no_alias(state, sizeof(uint64_t) * 25 * 4))
48+
requires(rc == mld_keccakf1600_round_constants)
49+
assigns(memory_slice(state, sizeof(uint64_t) * 25 * 4))
50+
);
51+
52+
#define mld_keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm \
53+
MLD_NAMESPACE(keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm)
54+
void mld_keccak_f1600_x4_scalar_v8a_v84a_hybrid_asm(uint64_t *state,
55+
uint64_t const *rc)
56+
__contract__(
57+
requires(memory_no_alias(state, sizeof(uint64_t) * 25 * 4))
58+
requires(rc == mld_keccakf1600_round_constants)
59+
assigns(memory_slice(state, sizeof(uint64_t) * 25 * 4))
60+
);
61+
62+
#endif /* !MLD_FIPS202_NATIVE_AARCH64_SRC_FIPS202_NATIVE_AARCH64_H */

0 commit comments

Comments
 (0)