From fc9079fb5ac5cd4af98e3e0f094a0a3cf2a01499 Mon Sep 17 00:00:00 2001 From: Basil Hess Date: Sun, 12 May 2024 10:07:39 +0200 Subject: [PATCH] Simplify downstream integration (#4) * - adds META.yml files - adds new api: crypto_sign_signature and crypto_sign_verify - adds namespacing - converts CRLF to LF - fixes for big-endian - changes types from 'unsigned long long' to size_t - fixes for strict compilers - ensures newlines at end of files - remove PQM4 defines from api, using size_t --- .cmake/target.cmake | 2 +- .github/workflows/cmake.yml | 2 +- META/MAYO_1_META.yml | 37 ++ META/MAYO_2_META.yml | 37 ++ META/MAYO_3_META.yml | 37 ++ META/MAYO_5_META.yml | 37 ++ apps/CMakeLists.txt | 1 + apps/PQCgenKAT_sign.c | 21 +- apps/example.c | 19 +- apps/example_nistapi.c | 39 +- include/mayo.h | 106 +++- include/mem.h | 5 +- include/{rng.h => randombytes.h} | 11 +- src/AVX2/arithmetic_128.h | 3 +- src/AVX2/arithmetic_64.h | 3 +- src/AVX2/arithmetic_96.h | 3 +- src/AVX2/arithmetic_common.h | 3 +- src/AVX2/echelon_form.h | 183 +++--- src/AVX2/echelon_form_loop.h | 115 ++-- src/AVX2/shuffle_arithmetic_128.h | 14 +- src/AVX2/shuffle_arithmetic_64.h | 962 +++++++++++++++--------------- src/AVX2/shuffle_arithmetic_96.h | 17 +- src/CMakeLists.txt | 5 + src/arithmetic.c | 10 +- src/arithmetic.h | 68 +-- src/common/aes128ctr.c | 3 +- src/common/aes_c.c | 1 + src/common/aes_ctr.h | 1 + src/common/debug_bench_tools.h | 3 +- src/common/fips202.c | 1 + src/common/fips202.h | 1 + src/common/mem.c | 1 + src/common/randombytes_ctrdrbg.c | 3 +- src/common/randombytes_system.c | 5 +- src/generic/arithmetic_128.h | 3 +- src/generic/arithmetic_64.h | 3 +- src/generic/arithmetic_96.h | 3 +- src/generic/arithmetic_common.h | 2 +- src/generic/echelon_form.h | 290 ++++----- src/mayo.c | 109 ++-- src/mayo_1/api.c | 52 +- src/mayo_1/api.h | 34 +- src/mayo_2/api.c | 51 +- src/mayo_2/api.h | 32 +- src/mayo_3/api.c | 50 +- src/mayo_3/api.h | 33 +- src/mayo_5/api.c | 50 +- src/mayo_5/api.h | 33 +- src/params.c | 3 + src/simple_arithmetic.h | 1 + test/CMakeLists.txt | 3 +- test/bench.c | 19 +- test/bench_mayo_table3.c | 9 +- test/test_kat.c | 29 +- test/test_mayo.c | 17 +- test/test_sample_solution.c | 7 +- 56 files changed, 1465 insertions(+), 1127 deletions(-) create mode 100644 META/MAYO_1_META.yml create mode 100644 META/MAYO_2_META.yml create mode 100644 META/MAYO_3_META.yml create mode 100644 META/MAYO_5_META.yml rename include/{rng.h => randombytes.h} (84%) diff --git a/.cmake/target.cmake b/.cmake/target.cmake index 368b846..b030ca2 100644 --- a/.cmake/target.cmake +++ b/.cmake/target.cmake @@ -54,7 +54,7 @@ if (${MAYO_BUILD_TYPE} MATCHES "ref") option(ENABLE_AESNI "Use AESni" OFF) option(ENABLE_PARAMS_DYNAMIC "Use dynamic parameters" ON) add_definitions(-DMAYO_BUILD_TYPE_REF) -elseif(${MAYO_BUILD_TYPE} MATCHES "ref") +elseif(${MAYO_BUILD_TYPE} MATCHES "opt") add_definitions(-DMAYO_BUILD_TYPE_OPT) option(ENABLE_PARAMS_DYNAMIC "Use dynamic parameters" OFF) elseif(${MAYO_BUILD_TYPE} MATCHES "avx2") diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 30183db..441d2a8 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -12,7 +12,7 @@ env: jobs: build_test: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: matrix: mayo_build_type: [ref, opt, avx2] diff --git a/META/MAYO_1_META.yml b/META/MAYO_1_META.yml new file mode 100644 index 0000000..70beb97 --- /dev/null +++ b/META/MAYO_1_META.yml @@ -0,0 +1,37 @@ +name: MAYO_1 +type: signature +claimed-nist-level: 1 +length-public-key: 1168 +length-secret-key: 24 +length-signature: 321 +nistkat-sha256: ba2473dedd92cf3b8a1fc14fc22f2ffdde972c8b64cfcd8cddb4f803e48df017 +principal-submitters: + - Ward Beullens + - Fabio Campos + - Sofía Celi + - Basil Hess + - Matthias J. Kannwischer +implementations: + - name: opt + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_1 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL + signature_keypair: pqmayo_MAYO_1_opt_crypto_sign_keypair + signature_signature: pqmayo_MAYO_1_opt_crypto_sign_signature + signature_verify: pqmayo_MAYO_1_opt_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_1/api.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_1/api.h ./src/simple_arithmetic.h ./src/generic/arithmetic_common.h ./src/generic/echelon_form.h ./src/generic/arithmetic_96.h ./src/generic/arithmetic_64.h ./src/generic/arithmetic_128.h ./src/arithmetic.h ./src/common/aes_ctr.h + - name: avx2 + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_1 -DMAYO_BUILD_TYPE_AVX2 -DMAYO_AVX -DHAVE_RANDOMBYTES_NORETVAL + signature_keypair: pqmayo_MAYO_1_avx2_crypto_sign_keypair + signature_signature: pqmayo_MAYO_1_avx2_crypto_sign_signature + signature_verify: pqmayo_MAYO_1_avx2_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_1/api.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_1/api.h ./src/simple_arithmetic.h ./src/arithmetic.h ./src/common/aes_ctr.h ./src/AVX2/arithmetic_128.h ./src/AVX2/arithmetic_96.h ./src/AVX2/echelon_form.h ./src/AVX2/shuffle_arithmetic_128.h ./src/AVX2/shuffle_arithmetic_96.h ./src/AVX2/arithmetic_64.h ./src/AVX2/arithmetic_common.h ./src/AVX2/echelon_form_loop.h ./src/AVX2/shuffle_arithmetic_64.h + supported_platforms: + - architecture: x86_64 + operating_systems: + - Darwin + - Linux + required_flags: + - avx2 diff --git a/META/MAYO_2_META.yml b/META/MAYO_2_META.yml new file mode 100644 index 0000000..5606a1b --- /dev/null +++ b/META/MAYO_2_META.yml @@ -0,0 +1,37 @@ +name: MAYO_2 +type: signature +claimed-nist-level: 1 +length-public-key: 5488 +length-secret-key: 24 +length-signature: 180 +nistkat-sha256: 72cb237642b2c0c4e7f8c824d9c8601ac7189784649d28dbb2cccfb94732c9a3 +principal-submitters: + - Ward Beullens + - Fabio Campos + - Sofía Celi + - Basil Hess + - Matthias J. Kannwischer +implementations: + - name: opt + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_2 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL + signature_keypair: pqmayo_MAYO_2_opt_crypto_sign_keypair + signature_signature: pqmayo_MAYO_2_opt_crypto_sign_signature + signature_verify: pqmayo_MAYO_2_opt_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_2/api.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_2/api.h ./src/simple_arithmetic.h ./src/generic/arithmetic_common.h ./src/generic/echelon_form.h ./src/generic/arithmetic_96.h ./src/generic/arithmetic_64.h ./src/generic/arithmetic_128.h ./src/arithmetic.h ./src/common/aes_ctr.h + - name: avx2 + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_2 -DMAYO_BUILD_TYPE_AVX2 -DMAYO_AVX -DHAVE_RANDOMBYTES_NORETVAL + signature_keypair: pqmayo_MAYO_2_avx2_crypto_sign_keypair + signature_signature: pqmayo_MAYO_2_avx2_crypto_sign_signature + signature_verify: pqmayo_MAYO_2_avx2_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_2/api.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_2/api.h ./src/simple_arithmetic.h ./src/arithmetic.h ./src/common/aes_ctr.h ./src/AVX2/arithmetic_128.h ./src/AVX2/arithmetic_96.h ./src/AVX2/echelon_form.h ./src/AVX2/shuffle_arithmetic_128.h ./src/AVX2/shuffle_arithmetic_96.h ./src/AVX2/arithmetic_64.h ./src/AVX2/arithmetic_common.h ./src/AVX2/echelon_form_loop.h ./src/AVX2/shuffle_arithmetic_64.h + supported_platforms: + - architecture: x86_64 + operating_systems: + - Darwin + - Linux + required_flags: + - avx2 diff --git a/META/MAYO_3_META.yml b/META/MAYO_3_META.yml new file mode 100644 index 0000000..3a2516c --- /dev/null +++ b/META/MAYO_3_META.yml @@ -0,0 +1,37 @@ +name: MAYO_3 +type: signature +claimed-nist-level: 3 +length-public-key: 2656 +length-secret-key: 32 +length-signature: 577 +nistkat-sha256: dbc49f4fdfa0de69d416051215cb53c042c4a329d325452d079f3734b7467a6b +principal-submitters: + - Ward Beullens + - Fabio Campos + - Sofía Celi + - Basil Hess + - Matthias J. Kannwischer +implementations: + - name: opt + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_3 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL -DHAVE_STACKEFFICIENT + signature_keypair: pqmayo_MAYO_3_opt_crypto_sign_keypair + signature_signature: pqmayo_MAYO_3_opt_crypto_sign_signature + signature_verify: pqmayo_MAYO_3_opt_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_3/api.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_3/api.h ./src/simple_arithmetic.h ./src/generic/arithmetic_common.h ./src/generic/echelon_form.h ./src/generic/arithmetic_96.h ./src/generic/arithmetic_64.h ./src/generic/arithmetic_128.h ./src/arithmetic.h ./src/common/aes_ctr.h + - name: avx2 + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_3 -DMAYO_BUILD_TYPE_AVX2 -DMAYO_AVX -DHAVE_RANDOMBYTES_NORETVAL -DHAVE_STACKEFFICIENT + signature_keypair: pqmayo_MAYO_3_avx2_crypto_sign_keypair + signature_signature: pqmayo_MAYO_3_avx2_crypto_sign_signature + signature_verify: pqmayo_MAYO_3_avx2_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_3/api.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_3/api.h ./src/simple_arithmetic.h ./src/arithmetic.h ./src/common/aes_ctr.h ./src/AVX2/arithmetic_128.h ./src/AVX2/arithmetic_96.h ./src/AVX2/echelon_form.h ./src/AVX2/shuffle_arithmetic_128.h ./src/AVX2/shuffle_arithmetic_96.h ./src/AVX2/arithmetic_64.h ./src/AVX2/arithmetic_common.h ./src/AVX2/echelon_form_loop.h ./src/AVX2/shuffle_arithmetic_64.h + supported_platforms: + - architecture: x86_64 + operating_systems: + - Darwin + - Linux + required_flags: + - avx2 diff --git a/META/MAYO_5_META.yml b/META/MAYO_5_META.yml new file mode 100644 index 0000000..e0db318 --- /dev/null +++ b/META/MAYO_5_META.yml @@ -0,0 +1,37 @@ +name: MAYO_5 +type: signature +claimed-nist-level: 5 +length-public-key: 5008 +length-secret-key: 40 +length-signature: 838 +nistkat-sha256: f2c1c69045c7d15e714a04119965e8a7007ef54f9293158587560227c97b237d +principal-submitters: + - Ward Beullens + - Fabio Campos + - Sofía Celi + - Basil Hess + - Matthias J. Kannwischer +implementations: + - name: opt + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_5 -DMAYO_BUILD_TYPE_OPT -DHAVE_RANDOMBYTES_NORETVAL -DHAVE_STACKEFFICIENT + signature_keypair: pqmayo_MAYO_5_opt_crypto_sign_keypair + signature_signature: pqmayo_MAYO_5_opt_crypto_sign_signature + signature_verify: pqmayo_MAYO_5_opt_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_5/api.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_5/api.h ./src/simple_arithmetic.h ./src/generic/arithmetic_common.h ./src/generic/echelon_form.h ./src/generic/arithmetic_96.h ./src/generic/arithmetic_64.h ./src/generic/arithmetic_128.h ./src/arithmetic.h ./src/common/aes_ctr.h + - name: avx2 + version: https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo + folder_name: . + compile_opts: -DMAYO_VARIANT=MAYO_5 -DMAYO_BUILD_TYPE_AVX2 -DMAYO_AVX -DHAVE_RANDOMBYTES_NORETVAL -DHAVE_STACKEFFICIENT + signature_keypair: pqmayo_MAYO_5_avx2_crypto_sign_keypair + signature_signature: pqmayo_MAYO_5_avx2_crypto_sign_signature + signature_verify: pqmayo_MAYO_5_avx2_crypto_sign_verify + sources: LICENSE NOTICE ./src/arithmetic.c ./src/mayo_5/api.c ./src/params.c ./src/mayo.c ./include/mayo.h ./include/mem.h ./src/mayo_5/api.h ./src/simple_arithmetic.h ./src/arithmetic.h ./src/common/aes_ctr.h ./src/AVX2/arithmetic_128.h ./src/AVX2/arithmetic_96.h ./src/AVX2/echelon_form.h ./src/AVX2/shuffle_arithmetic_128.h ./src/AVX2/shuffle_arithmetic_96.h ./src/AVX2/arithmetic_64.h ./src/AVX2/arithmetic_common.h ./src/AVX2/echelon_form_loop.h ./src/AVX2/shuffle_arithmetic_64.h + supported_platforms: + - architecture: x86_64 + operating_systems: + - Darwin + - Linux + required_flags: + - avx2 diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 0e41962..d0e1e95 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -28,3 +28,4 @@ foreach(MVARIANT ${MVARIANT_S}) target_link_libraries(example_nistapi_${MVARIANT_LOWER} PRIVATE ${MVARIANT_LOWER}_nistapi) target_include_directories(example_nistapi_${MVARIANT_LOWER} PRIVATE ../include ../src/${MVARIANT_LOWER}) endforeach() + diff --git a/apps/PQCgenKAT_sign.c b/apps/PQCgenKAT_sign.c index 7cd1fa7..3d582c1 100644 --- a/apps/PQCgenKAT_sign.c +++ b/apps/PQCgenKAT_sign.c @@ -31,7 +31,7 @@ protection within the United States. */ #include "api.h" -#include "rng.h" +#include "randombytes.h" #include #include #include @@ -46,7 +46,7 @@ protection within the United States. int FindMarker(FILE *infile, const char *marker); int ReadHex(FILE *infile, unsigned char *A, int Length, char *str); -void fprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L); +void fprintBstr(FILE *fp, char *S, unsigned char *A, size_t L); int main(void) { char fn_req[32], fn_rsp[32]; @@ -55,7 +55,7 @@ int main(void) { unsigned char msg[3300]; unsigned char entropy_input[48]; unsigned char *m, *sm, *m1; - unsigned long long mlen, smlen, mlen1; + size_t mlen, smlen, mlen1; int count; int done; unsigned char pk[CRYPTO_PUBLICKEYBYTES], sk[CRYPTO_SECRETKEYBYTES]; @@ -84,7 +84,7 @@ int main(void) { randombytes(seed, 48); fprintBstr(fp_req, "seed = ", seed, 48); mlen = 33 * (i + 1); - fprintf(fp_req, "mlen = %llu\n", mlen); + fprintf(fp_req, "mlen = %zu\n", mlen); randombytes(msg, mlen); fprintBstr(fp_req, "msg = ", msg, mlen); fprintf(fp_req, "pk =\n"); @@ -121,13 +121,13 @@ int main(void) { randombytes_init(seed, NULL, 256); if (FindMarker(fp_req, "mlen = ")) { - if (fscanf(fp_req, "%llu", &mlen) != 1) + if (fscanf(fp_req, "%zu", &mlen) != 1) return KAT_DATA_ERROR; } else { printf("ERROR: unable to read 'mlen' from <%s>\n", fn_req); return KAT_DATA_ERROR; } - fprintf(fp_rsp, "mlen = %llu\n", mlen); + fprintf(fp_rsp, "mlen = %zu\n", mlen); m = (unsigned char *)calloc(mlen, sizeof(unsigned char)); m1 = (unsigned char *)calloc(mlen + CRYPTO_BYTES, sizeof(unsigned char)); @@ -151,7 +151,7 @@ int main(void) { printf("crypto_sign returned <%d>\n", ret_val); return KAT_CRYPTO_FAILURE; } - fprintf(fp_rsp, "smlen = %llu\n", smlen); + fprintf(fp_rsp, "smlen = %zu\n", smlen); fprintBstr(fp_rsp, "sm = ", sm, smlen); fprintf(fp_rsp, "\n"); @@ -162,7 +162,7 @@ int main(void) { if (mlen != mlen1) { printf( - "crypto_sign_open returned bad 'mlen': Got <%llu>, expected <%llu>\n", + "crypto_sign_open returned bad 'mlen': Got <%zu>, expected <%zu>\n", mlen1, mlen); return KAT_CRYPTO_FAILURE; } @@ -265,8 +265,8 @@ int ReadHex(FILE *infile, unsigned char *A, int Length, char *str) { return 1; } -void fprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L) { - unsigned long long i; +void fprintBstr(FILE *fp, char *S, unsigned char *A, size_t L) { + size_t i; fprintf(fp, "%s", S); @@ -278,3 +278,4 @@ void fprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L) { fprintf(fp, "\n"); } + diff --git a/apps/example.c b/apps/example.c index 0694dce..30369ae 100644 --- a/apps/example.c +++ b/apps/example.c @@ -24,21 +24,21 @@ */ static int example_mayo(const mayo_params_t* p) { - unsigned long long msglen = 32; - unsigned long long smlen = p->sig_bytes + msglen; + size_t msglen = 32; + size_t smlen = PARAM_sig_bytes(p) + msglen; - unsigned char *pk = calloc(p->cpk_bytes, 1); - unsigned char *sk = calloc(p->csk_bytes, 1); + unsigned char *pk = calloc(PARAM_cpk_bytes(p), 1); + unsigned char *sk = calloc(PARAM_csk_bytes(p), 1); - unsigned char *epk = calloc(p->epk_bytes, 1); + unsigned char *epk = calloc(PARAM_epk_bytes(p), 1); sk_t *esk = calloc(sizeof(sk_t), 1); - unsigned char *sig = calloc(p->sig_bytes + msglen, 1); + unsigned char *sig = calloc(PARAM_sig_bytes(p) + msglen, 1); unsigned char msg[32] = { 0xe }; unsigned char msg2[32] = { 0 }; - printf("Example with %s\n", p->name); + printf("Example with %s\n", PARAM_name(p)); printf("mayo_keypair -> "); int res = mayo_keypair(p, pk, sk); @@ -129,7 +129,7 @@ static int example_mayo(const mayo_params_t* p) { err: free(pk); free(epk); - mayo_secure_free(sk, p->csk_bytes); + mayo_secure_free(sk, PARAM_csk_bytes(p)); mayo_secure_free(esk, sizeof(sk_t)); free(sig); return res; @@ -145,6 +145,7 @@ int main(void) { } } #else - return example_mayo(&MAYO_VARIANT); + return example_mayo(0); #endif } + diff --git a/apps/example_nistapi.c b/apps/example_nistapi.c index ffa9943..4e5e54d 100644 --- a/apps/example_nistapi.c +++ b/apps/example_nistapi.c @@ -20,8 +20,9 @@ */ static int example_mayo(void) { - unsigned long long msglen = 32; - unsigned long long smlen = CRYPTO_BYTES + msglen; + size_t msglen = 32; + size_t smlen = CRYPTO_BYTES + msglen; + size_t siglen = CRYPTO_BYTES; unsigned char *pk = calloc(CRYPTO_PUBLICKEYBYTES, 1); unsigned char *sk = calloc(CRYPTO_SECRETKEYBYTES, 1); @@ -76,6 +77,39 @@ static int example_mayo(void) { res = 0; printf("OK\n"); } + + printf("crypto_sign_signature -> "); + res = crypto_sign_signature(sig, &siglen, msg, msglen, sk); + if (res) { + printf("FAIL\n"); + res = -1; + goto err; + } else { + printf("OK\n"); + } + + printf("crypto_sign_verify (with correct signature) -> "); + res = crypto_sign_verify(sig, siglen, msg, msglen, pk); + if (res) { + printf("FAIL\n"); + res = -1; + goto err; + } else { + res = 0; + printf("OK\n"); + } + + printf("crypto_sign_verify (with altered signature) -> "); + sig[0] = ~sig[0]; + res = crypto_sign_verify(sig, siglen, msg, msglen, pk); + if (!res) { + printf("FAIL\n"); + res = -1; + goto err; + } else { + res = 0; + printf("OK\n"); + } err: free(pk); @@ -87,3 +121,4 @@ static int example_mayo(void) { int main(void) { return example_mayo(); } + diff --git a/include/mayo.h b/include/mayo.h index f49bcab..1de4bf2 100644 --- a/include/mayo.h +++ b/include/mayo.h @@ -4,6 +4,7 @@ #define MAYO_H #include +#include #define F_TAIL_LEN 5 #define F_TAIL_64 \ @@ -13,6 +14,7 @@ #define F_TAIL_128 \ { 4, 8, 0, 4, 2 } // f(z) = z^128 + x*z^4 + x^2*z^3 + x^3*z + x^2 +#define MAYO_1_name "MAYO_1" #define MAYO_1_n 66 #define MAYO_1_m 64 #define MAYO_1_o 8 @@ -39,6 +41,7 @@ #define MAYO_1_pk_seed_bytes 16 #define MAYO_1_sk_seed_bytes 24 +#define MAYO_2_name "MAYO_2" #define MAYO_2_n 78 #define MAYO_2_m 64 #define MAYO_2_o 18 @@ -65,6 +68,7 @@ #define MAYO_2_pk_seed_bytes 16 #define MAYO_2_sk_seed_bytes 24 +#define MAYO_3_name "MAYO_3" #define MAYO_3_n 99 #define MAYO_3_m 96 #define MAYO_3_o 10 @@ -91,6 +95,7 @@ #define MAYO_3_pk_seed_bytes 16 #define MAYO_3_sk_seed_bytes 32 +#define MAYO_5_name "MAYO_5" #define MAYO_5_n 133 #define MAYO_5_m 128 #define MAYO_5_o 12 @@ -121,6 +126,25 @@ #define PARAM_JOIN2(a, b) PARAM_JOIN2_(a, b) #define PARAM_NAME(end) PARAM_JOIN2(MAYO_VARIANT, end) +#if defined(MAYO_VARIANT) +#define PARAM_JOIN3_(a, b, c) pqmayo_##a##_##b##_##c +#define PARAM_JOIN3(a, b, c) PARAM_JOIN3_(a, b, c) +#define PARAM_NAME3(end, s) PARAM_JOIN3(MAYO_VARIANT, end, s) + +#if defined(MAYO_BUILD_TYPE_REF) +#define MAYO_NAMESPACE(s) PARAM_NAME3(ref, s) +#elif defined(MAYO_BUILD_TYPE_OPT) +#define MAYO_NAMESPACE(s) PARAM_NAME3(opt, s) +#elif defined(MAYO_BUILD_TYPE_AVX2) +#define MAYO_NAMESPACE(s) PARAM_NAME3(avx2, s) +#else +#error "Build type not known" +#endif + +#else +#define MAYO_NAMESPACE(s) s +#endif + #ifdef ENABLE_PARAMS_DYNAMIC #define NAME_MAX mayo5 #define N_MAX 133 @@ -174,6 +198,63 @@ #error "Parameter not specified" #endif +#ifdef ENABLE_PARAMS_DYNAMIC +#define PARAM_name(p) (p->name) +#define PARAM_m(p) (p->m) +#define PARAM_n(p) (p->n) +#define PARAM_o(p) (p->o) +#define PARAM_v(p) (p->n - p->o) +#define PARAM_A_cols(p) (p->k * p->o + 1) +#define PARAM_k(p) (p->k) +#define PARAM_q(p) (p->q) +#define PARAM_m_bytes(p) (p->m_bytes) +#define PARAM_O_bytes(p) (p->O_bytes) +#define PARAM_v_bytes(p) (p->v_bytes) +#define PARAM_r_bytes(p) (p->r_bytes) +#define PARAM_P1_bytes(p) (p->P1_bytes) +#define PARAM_P2_bytes(p) (p->P2_bytes) +#define PARAM_P3_bytes(p) (p->P3_bytes) +#define PARAM_csk_bytes(p) (p->csk_bytes) +#define PARAM_esk_bytes(p) (p->esk_bytes) +#define PARAM_cpk_bytes(p) (p->cpk_bytes) +#define PARAM_epk_bytes(p) (p->epk_bytes) +#define PARAM_sig_bytes(p) (p->sig_bytes) +#define PARAM_f_tail(p) (p->f_tail) +#define PARAM_salt_bytes(p) (p->salt_bytes) +#define PARAM_sk_seed_bytes(p) (p->sk_seed_bytes) +#define PARAM_digest_bytes(p) (p->digest_bytes) +#define PARAM_pk_seed_bytes(p) (p->pk_seed_bytes) +#elif defined(MAYO_VARIANT) +#define PARAM_name(p) PARAM_NAME(name) +#define PARAM_m(p) PARAM_NAME(m) +#define PARAM_n(p) PARAM_NAME(n) +#define PARAM_o(p) PARAM_NAME(o) +#define PARAM_v(p) PARAM_NAME(v) +#define PARAM_A_cols(p) PARAM_NAME(A_cols) +#define PARAM_k(p) PARAM_NAME(k) +#define PARAM_q(p) PARAM_NAME(q) +#define PARAM_m_bytes(p) PARAM_NAME(m_bytes) +#define PARAM_O_bytes(p) PARAM_NAME(O_bytes) +#define PARAM_v_bytes(p) PARAM_NAME(v_bytes) +#define PARAM_r_bytes(p) PARAM_NAME(r_bytes) +#define PARAM_P1_bytes(p) PARAM_NAME(P1_bytes) +#define PARAM_P2_bytes(p) PARAM_NAME(P2_bytes) +#define PARAM_P3_bytes(p) PARAM_NAME(P3_bytes) +#define PARAM_csk_bytes(p) PARAM_NAME(csk_bytes) +#define PARAM_esk_bytes(p) PARAM_NAME(esk_bytes) +#define PARAM_cpk_bytes(p) PARAM_NAME(cpk_bytes) +#define PARAM_epk_bytes(p) PARAM_NAME(epk_bytes) +#define PARAM_sig_bytes(p) PARAM_NAME(sig_bytes) +static const unsigned char f_tail[] = PARAM_NAME(f_tail); +#define PARAM_salt_bytes(p) PARAM_NAME(salt_bytes) +#define PARAM_sk_seed_bytes(p) PARAM_NAME(sk_seed_bytes) +#define PARAM_digest_bytes(p) PARAM_NAME(digest_bytes) +#define PARAM_pk_seed_bytes(p) PARAM_NAME(pk_seed_bytes) +#define PARAM_f_tail(p) f_tail +#else +#error "Parameter not specified" +#endif + /** * Struct defining MAYO parameters */ @@ -212,10 +293,12 @@ typedef struct sk_t { /** * MAYO parameter sets */ +#ifdef ENABLE_PARAMS_DYNAMIC extern const mayo_params_t MAYO_1; extern const mayo_params_t MAYO_2; extern const mayo_params_t MAYO_3; extern const mayo_params_t MAYO_5; +#endif /** * Status codes @@ -234,8 +317,14 @@ extern const mayo_params_t MAYO_5; * @param[out] sk Mayo secret key * @return int status code */ +#define mayo_keypair MAYO_NAMESPACE(mayo_keypair) int mayo_keypair(const mayo_params_t *p, unsigned char *pk, unsigned char *sk); +#define mayo_sign_signature MAYO_NAMESPACE(mayo_sign_signature) +int mayo_sign_signature(const mayo_params_t *p, unsigned char *sig, + size_t *siglen, const unsigned char *m, + size_t mlen, const unsigned char *csk); + /** * MAYO signature generation. * @@ -251,9 +340,10 @@ int mayo_keypair(const mayo_params_t *p, unsigned char *pk, unsigned char *sk); * @param[in] sk Compacted secret key * @return int status code */ +#define mayo_sign MAYO_NAMESPACE(mayo_sign) int mayo_sign(const mayo_params_t *p, unsigned char *sm, - unsigned long long *smlen, const unsigned char *m, - unsigned long long mlen, const unsigned char *sk); + size_t *smlen, const unsigned char *m, + size_t mlen, const unsigned char *sk); /** * Mayo open signature. @@ -270,9 +360,10 @@ int mayo_sign(const mayo_params_t *p, unsigned char *sm, * @param[in] pk Compacted public key * @return int status code */ +#define mayo_open MAYO_NAMESPACE(mayo_open) int mayo_open(const mayo_params_t *p, unsigned char *m, - unsigned long long *mlen, const unsigned char *sm, - unsigned long long smlen, const unsigned char *pk); + size_t *mlen, const unsigned char *sm, + size_t smlen, const unsigned char *pk); /** * Mayo compact keypair generation. @@ -288,6 +379,7 @@ int mayo_open(const mayo_params_t *p, unsigned char *m, * @param[out] csk Mayo compacted secret key * @return int status code */ +#define mayo_keypair_compact MAYO_NAMESPACE(mayo_keypair_compact) int mayo_keypair_compact(const mayo_params_t *p, unsigned char *cpk, unsigned char *csk); @@ -302,6 +394,7 @@ int mayo_keypair_compact(const mayo_params_t *p, unsigned char *cpk, * @param[out] epk Expanded public key. * @return int return code */ +#define mayo_expand_pk MAYO_NAMESPACE(mayo_expand_pk) int mayo_expand_pk(const mayo_params_t *p, const unsigned char *cpk, unsigned char *epk); @@ -316,6 +409,7 @@ int mayo_expand_pk(const mayo_params_t *p, const unsigned char *cpk, * @param[out] esk Expanded secret key. * @return int return code */ +#define mayo_expand_sk MAYO_NAMESPACE(mayo_expand_sk) int mayo_expand_sk(const mayo_params_t *p, const unsigned char *csk, sk_t *esk); @@ -332,8 +426,10 @@ int mayo_expand_sk(const mayo_params_t *p, const unsigned char *csk, * @param[in] pk Compacted public key * @return int 0 if verification succeeded, 1 otherwise. */ +#define mayo_verify MAYO_NAMESPACE(mayo_verify) int mayo_verify(const mayo_params_t *p, const unsigned char *m, - unsigned long long mlen, const unsigned char *sig, + size_t mlen, const unsigned char *sig, const unsigned char *pk); #endif + diff --git a/include/mem.h b/include/mem.h index 4695847..0ab3abc 100644 --- a/include/mem.h +++ b/include/mem.h @@ -7,8 +7,10 @@ #if defined(__GNUC__) || defined(__clang__) #define BSWAP32(i) __builtin_bswap32((i)) +#define BSWAP64(i) __builtin_bswap64((i)) #else #define BSWAP32(i) ((((i) >> 24) & 0xff) | (((i) >> 8) & 0xff00) | (((i) & 0xff00) << 8) | ((i) << 24)) +#define BSWAP64(i) ((BSWAP32((i) >> 32) & 0xffffffff) | (BSWAP32(i) << 32)) #endif // a > b -> b - a is negative @@ -56,4 +58,5 @@ void mayo_secure_free(void *mem, size_t size); */ void mayo_secure_clear(void *mem, size_t size); -#endif \ No newline at end of file +#endif + diff --git a/include/rng.h b/include/randombytes.h similarity index 84% rename from include/rng.h rename to include/randombytes.h index 7bf4efa..0b44306 100644 --- a/include/rng.h +++ b/include/randombytes.h @@ -1,7 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 -#ifndef rng_h -#define rng_h +#ifndef randombytes_h +#define randombytes_h + +#include /** * Randombytes initialization. @@ -23,6 +25,7 @@ void randombytes_init(unsigned char *entropy_input, * @param[in] xlen Number of random bytes to be generated * @return int 0 on success, -1 otherwise */ -int randombytes(unsigned char *x, unsigned long long xlen); +int randombytes(unsigned char *x, size_t xlen); + +#endif /* randombytes_h */ -#endif /* rng_h */ diff --git a/src/AVX2/arithmetic_128.h b/src/AVX2/arithmetic_128.h index f7c647a..27b367e 100644 --- a/src/AVX2/arithmetic_128.h +++ b/src/AVX2/arithmetic_128.h @@ -74,4 +74,5 @@ static vec_copy_128(bins + 8, out); } -#endif \ No newline at end of file +#endif + diff --git a/src/AVX2/arithmetic_64.h b/src/AVX2/arithmetic_64.h index 65743a8..9f7535c 100644 --- a/src/AVX2/arithmetic_64.h +++ b/src/AVX2/arithmetic_64.h @@ -65,4 +65,5 @@ inline void multiply_bins_64(uint64_t *bins, uint64_t *out) { vec_copy_64(bins + 4, out); } -#endif \ No newline at end of file +#endif + diff --git a/src/AVX2/arithmetic_96.h b/src/AVX2/arithmetic_96.h index 842dcfb..8635967 100644 --- a/src/AVX2/arithmetic_96.h +++ b/src/AVX2/arithmetic_96.h @@ -70,4 +70,5 @@ inline void multiply_bins_96(uint64_t *bins, uint64_t *out) { vec_copy_96(bins + 6, out); } -#endif \ No newline at end of file +#endif + diff --git a/src/AVX2/arithmetic_common.h b/src/AVX2/arithmetic_common.h index 90feb73..eeb13dc 100644 --- a/src/AVX2/arithmetic_common.h +++ b/src/AVX2/arithmetic_common.h @@ -171,4 +171,5 @@ static inline uint64_t gf16v_mul_u64( uint64_t a, uint8_t b ) { return r64; } -#endif \ No newline at end of file +#endif + diff --git a/src/AVX2/echelon_form.h b/src/AVX2/echelon_form.h index 523ebdd..fa69de0 100644 --- a/src/AVX2/echelon_form.h +++ b/src/AVX2/echelon_form.h @@ -1,95 +1,88 @@ -// SPDX-License-Identifier: Apache-2.0 - -#include -#include - - -#define MAYO_MAX(x, y) (((x) > (y)) ? (x) : (y)) -#define MAYO_MIN(x, y) (((x) < (y)) ? (x) : (y)) - - -// -// generate multiplication table for '4-bit' variable 'b'. From https://eprint.iacr.org/2023/059/. -// -static inline __m256i tbl32_gf16_multab( uint8_t b ) { - static const unsigned char __gf16_mulbase[128] __attribute__((aligned(32))) = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x03, 0x01, 0x07, 0x05, 0x0b, 0x09, 0x0f, 0x0d, 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x03, 0x01, 0x07, 0x05, 0x0b, 0x09, 0x0f, 0x0d, - 0x00, 0x04, 0x08, 0x0c, 0x03, 0x07, 0x0b, 0x0f, 0x06, 0x02, 0x0e, 0x0a, 0x05, 0x01, 0x0d, 0x09, 0x00, 0x04, 0x08, 0x0c, 0x03, 0x07, 0x0b, 0x0f, 0x06, 0x02, 0x0e, 0x0a, 0x05, 0x01, 0x0d, 0x09, - 0x00, 0x08, 0x03, 0x0b, 0x06, 0x0e, 0x05, 0x0d, 0x0c, 0x04, 0x0f, 0x07, 0x0a, 0x02, 0x09, 0x01, 0x00, 0x08, 0x03, 0x0b, 0x06, 0x0e, 0x05, 0x0d, 0x0c, 0x04, 0x0f, 0x07, 0x0a, 0x02, 0x09, 0x01 - }; - - - __m256i bx = _mm256_set1_epi16( b & 0xf ); - __m256i b1 = _mm256_srli_epi16( bx, 1 ); - - const __m256i tab0 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 0)); - const __m256i tab1 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 1)); - const __m256i tab2 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 2)); - const __m256i tab3 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 3)); - - __m256i mask_1 = _mm256_set1_epi16(1); - __m256i mask_4 = _mm256_set1_epi16(4); - __m256i mask_0 = _mm256_setzero_si256(); - - return ( tab0 & _mm256_cmpgt_epi16( bx & mask_1, mask_0) ) - ^ ( tab1 & _mm256_cmpgt_epi16( b1 & mask_1, mask_0) ) - ^ ( tab2 & _mm256_cmpgt_epi16( bx & mask_4, mask_0) ) - ^ ( tab3 & _mm256_cmpgt_epi16( b1 & mask_4, mask_0) ); -} - -/* put matrix in row echelon form with ones on first nonzero entries in constant time*/ -static inline void EF(unsigned char *A, int _nrows, int _ncols) { - - (void) _nrows; - (void) _ncols; - - #define nrows M_MAX - #define ncols (K_MAX * O_MAX + 1) - - #define AVX_REGS_PER_ROW ((K_MAX * O_MAX + 1 + 31) / 32) - #define MAX_COLS (AVX_REGS_PER_ROW * 32) - - __m256i _pivot_row[AVX_REGS_PER_ROW]; - __m256i A_avx[AVX_REGS_PER_ROW* M_MAX]; - - unsigned char* pivot_row_bytes = (unsigned char*) _pivot_row; - unsigned char* A_bytes = (unsigned char*) A_avx; - - // load A in the tail of AVX2 registers - for (int i = 0; i < nrows; i++) { - for (int j = 0; j < ncols; j++) - { - A_bytes[i*MAX_COLS + (MAX_COLS - ncols) + j] = A[ i*ncols + j ]; - } - } - - // pivot row is secret, pivot col is not - unsigned char inverse; - int pivot_row = 0; - int pivot_col = MAYO_MAX(MAX_COLS - ncols,0); - for (; pivot_col < MAX_COLS-128; pivot_col++) { - #include "echelon_form_loop.h" - } - for (; pivot_col < MAX_COLS-96; pivot_col++) { - #include "echelon_form_loop.h" - } - for (; pivot_col < MAX_COLS-64; pivot_col++) { - #include "echelon_form_loop.h" - } - for (; pivot_col < MAX_COLS-32; pivot_col++) { - #include "echelon_form_loop.h" - } - for (; pivot_col < MAX_COLS; pivot_col++) { - #include "echelon_form_loop.h" - } - - // write the matrix A back - for (int i = 0; i < nrows; i++) { - for (int j = 0; j < ncols; j++) { - A[i * ncols + j] = A_bytes[i*AVX_REGS_PER_ROW*32 + (MAX_COLS - ncols) + j]; - } - } - mayo_secure_clear(_pivot_row, AVX_REGS_PER_ROW * 32); - mayo_secure_clear(A_avx, AVX_REGS_PER_ROW * 32 * nrows); -} \ No newline at end of file +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + + +#define MAYO_MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MAYO_MIN(x, y) (((x) < (y)) ? (x) : (y)) + + +// +// generate multiplication table for '4-bit' variable 'b'. From https://eprint.iacr.org/2023/059/. +// +static inline __m256i tbl32_gf16_multab( uint8_t b ) { + __m256i bx = _mm256_set1_epi16( b & 0xf ); + __m256i b1 = _mm256_srli_epi16( bx, 1 ); + + const __m256i tab0 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 0)); + const __m256i tab1 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 1)); + const __m256i tab2 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 2)); + const __m256i tab3 = _mm256_load_si256((__m256i const *) (__gf16_mulbase + 32 * 3)); + + __m256i mask_1 = _mm256_set1_epi16(1); + __m256i mask_4 = _mm256_set1_epi16(4); + __m256i mask_0 = _mm256_setzero_si256(); + + return ( tab0 & _mm256_cmpgt_epi16( bx & mask_1, mask_0) ) + ^ ( tab1 & _mm256_cmpgt_epi16( b1 & mask_1, mask_0) ) + ^ ( tab2 & _mm256_cmpgt_epi16( bx & mask_4, mask_0) ) + ^ ( tab3 & _mm256_cmpgt_epi16( b1 & mask_4, mask_0) ); +} + +/* put matrix in row echelon form with ones on first nonzero entries in constant time*/ +static inline void EF(unsigned char *A, int _nrows, int _ncols) { + + (void) _nrows; + (void) _ncols; + + #define nrows M_MAX + #define ncols (K_MAX * O_MAX + 1) + + #define AVX_REGS_PER_ROW ((K_MAX * O_MAX + 1 + 31) / 32) + #define MAX_COLS (AVX_REGS_PER_ROW * 32) + + __m256i _pivot_row[AVX_REGS_PER_ROW]; + __m256i A_avx[AVX_REGS_PER_ROW* M_MAX]; + + unsigned char* pivot_row_bytes = (unsigned char*) _pivot_row; + unsigned char* A_bytes = (unsigned char*) A_avx; + + // load A in the tail of AVX2 registers + for (int i = 0; i < nrows; i++) { + for (int j = 0; j < ncols; j++) + { + A_bytes[i*MAX_COLS + (MAX_COLS - ncols) + j] = A[ i*ncols + j ]; + } + } + + // pivot row is secret, pivot col is not + unsigned char inverse; + int pivot_row = 0; + int pivot_col = MAYO_MAX(MAX_COLS - ncols,0); + for (; pivot_col < MAX_COLS-128; pivot_col++) { + #include "echelon_form_loop.h" + } + for (; pivot_col < MAX_COLS-96; pivot_col++) { + #include "echelon_form_loop.h" + } + for (; pivot_col < MAX_COLS-64; pivot_col++) { + #include "echelon_form_loop.h" + } + for (; pivot_col < MAX_COLS-32; pivot_col++) { + #include "echelon_form_loop.h" + } + for (; pivot_col < MAX_COLS; pivot_col++) { + #include "echelon_form_loop.h" + } + + // write the matrix A back + for (int i = 0; i < nrows; i++) { + for (int j = 0; j < ncols; j++) { + A[i * ncols + j] = A_bytes[i*AVX_REGS_PER_ROW*32 + (MAX_COLS - ncols) + j]; + } + } + mayo_secure_clear(_pivot_row, AVX_REGS_PER_ROW * 32); + mayo_secure_clear(A_avx, AVX_REGS_PER_ROW * 32 * nrows); +} + diff --git a/src/AVX2/echelon_form_loop.h b/src/AVX2/echelon_form_loop.h index 1b64523..b8b2974 100644 --- a/src/AVX2/echelon_form_loop.h +++ b/src/AVX2/echelon_form_loop.h @@ -1,57 +1,58 @@ -// SPDX-License-Identifier: Apache-2.0 - -int pivot_col_rounded = pivot_col/32; - -int pivot_row_lower_bound = MAYO_MAX(0, pivot_col + nrows - MAX_COLS); -int pivot_row_upper_bound = MAYO_MIN(nrows - 1, pivot_col - MAX_COLS + ncols); -/* the pivot row is guaranteed to be between these lower and upper bounds if A has full rank*/ - -/* zero out pivot row */ -for (int i = pivot_col_rounded; i < AVX_REGS_PER_ROW; i++) { - _pivot_row[i] = _mm256_set1_epi8(0); -} - -/* try to get a pivot row in constant time */ -unsigned char pivot = 0; -uint32_t pivot_is_zero = -1; -for (int row = pivot_row_lower_bound; - row <= MAYO_MIN(nrows - 1, pivot_row_upper_bound + 32); row++) { - uint32_t is_pivot_row = ~ct_compare_32(row, pivot_row); - uint32_t below_pivot_row = ct_is_greater_than(row, pivot_row); - __m256i mask = _mm256_set1_epi32( is_pivot_row | (below_pivot_row & pivot_is_zero) ); - for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { - _pivot_row[j] ^= mask & A_avx[row * AVX_REGS_PER_ROW + j]; - } - pivot = pivot_row_bytes[pivot_col]; - pivot_is_zero = ~ct_compare_32((int) pivot, 0); -} - -/* multiply pivot row by inverse of pivot */ -inverse = inverse_f(pivot); -__m256i inverse_multab = tbl32_gf16_multab(inverse); - -for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { - _pivot_row[j] = _mm256_shuffle_epi8(inverse_multab, _pivot_row[j]); -} - -/* conditionally write pivot row to the correct row, if there is a nonzero pivot */ -/* eliminate entries below pivot */ -for (int row = pivot_row_lower_bound; row < nrows; row++) { - unsigned char below_pivot = (unsigned char) (ct_is_greater_than(row, pivot_row)); - unsigned char elt_to_elim = A_bytes[row*AVX_REGS_PER_ROW*32 + pivot_col]; - - __m256i multab = tbl32_gf16_multab(below_pivot & elt_to_elim); - if (row <= pivot_row_upper_bound) { - __m256i mask = _mm256_set1_epi32(~ct_compare_32(row, pivot_row) & ~pivot_is_zero); - for (int col = pivot_col_rounded; col < AVX_REGS_PER_ROW; col++) { - A_avx[row*AVX_REGS_PER_ROW + col] = _mm256_blendv_epi8(A_avx[row*AVX_REGS_PER_ROW + col], _pivot_row[col], mask) ^ - _mm256_shuffle_epi8(multab, _pivot_row[col]); - } - } else { - for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { - A_avx[row*AVX_REGS_PER_ROW + j] ^= _mm256_shuffle_epi8(multab, _pivot_row[j]); - } - } -} - -pivot_row += (-(int32_t)(~pivot_is_zero)); \ No newline at end of file +// SPDX-License-Identifier: Apache-2.0 + +int pivot_col_rounded = pivot_col/32; + +int pivot_row_lower_bound = MAYO_MAX(0, pivot_col + nrows - MAX_COLS); +int pivot_row_upper_bound = MAYO_MIN(nrows - 1, pivot_col - MAX_COLS + ncols); +/* the pivot row is guaranteed to be between these lower and upper bounds if A has full rank*/ + +/* zero out pivot row */ +for (int i = pivot_col_rounded; i < AVX_REGS_PER_ROW; i++) { + _pivot_row[i] = _mm256_set1_epi8(0); +} + +/* try to get a pivot row in constant time */ +unsigned char pivot = 0; +uint32_t pivot_is_zero = -1; +for (int row = pivot_row_lower_bound; + row <= MAYO_MIN(nrows - 1, pivot_row_upper_bound + 32); row++) { + uint32_t is_pivot_row = ~ct_compare_32(row, pivot_row); + uint32_t below_pivot_row = ct_is_greater_than(row, pivot_row); + __m256i mask = _mm256_set1_epi32( is_pivot_row | (below_pivot_row & pivot_is_zero) ); + for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { + _pivot_row[j] ^= mask & A_avx[row * AVX_REGS_PER_ROW + j]; + } + pivot = pivot_row_bytes[pivot_col]; + pivot_is_zero = ~ct_compare_32((int) pivot, 0); +} + +/* multiply pivot row by inverse of pivot */ +inverse = inverse_f(pivot); +__m256i inverse_multab = tbl32_gf16_multab(inverse); + +for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { + _pivot_row[j] = _mm256_shuffle_epi8(inverse_multab, _pivot_row[j]); +} + +/* conditionally write pivot row to the correct row, if there is a nonzero pivot */ +/* eliminate entries below pivot */ +for (int row = pivot_row_lower_bound; row < nrows; row++) { + unsigned char below_pivot = (unsigned char) (ct_is_greater_than(row, pivot_row)); + unsigned char elt_to_elim = A_bytes[row*AVX_REGS_PER_ROW*32 + pivot_col]; + + __m256i multab = tbl32_gf16_multab(below_pivot & elt_to_elim); + if (row <= pivot_row_upper_bound) { + __m256i mask = _mm256_set1_epi32(~ct_compare_32(row, pivot_row) & ~pivot_is_zero); + for (int col = pivot_col_rounded; col < AVX_REGS_PER_ROW; col++) { + A_avx[row*AVX_REGS_PER_ROW + col] = _mm256_blendv_epi8(A_avx[row*AVX_REGS_PER_ROW + col], _pivot_row[col], mask) ^ + _mm256_shuffle_epi8(multab, _pivot_row[col]); + } + } else { + for (int j = pivot_col_rounded; j < AVX_REGS_PER_ROW; j++) { + A_avx[row*AVX_REGS_PER_ROW + j] ^= _mm256_shuffle_epi8(multab, _pivot_row[j]); + } + } +} + +pivot_row += (-(int32_t)(~pivot_is_zero)); + diff --git a/src/AVX2/shuffle_arithmetic_128.h b/src/AVX2/shuffle_arithmetic_128.h index 6c1dfa7..27b416a 100644 --- a/src/AVX2/shuffle_arithmetic_128.h +++ b/src/AVX2/shuffle_arithmetic_128.h @@ -195,7 +195,7 @@ inline void mayo_5_Vt_times_L_avx2(const uint64_t *_L, const __m256i *V_multabs, __m256i in_even1 = _mm256_srli_epi16(in_odd1, 4) & low_nibble_mask; in_odd1 &= low_nibble_mask; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even0); @@ -249,7 +249,7 @@ inline void mayo_5_P1_times_Vt_avx2(const uint64_t *_P1, __m256i *V_multabs, uin in_odd1 &= low_nibble_mask; cols_used ++; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_even0); @@ -300,7 +300,7 @@ inline void mayo_5_Vt_times_Pv_avx2(const uint64_t *_Pv, const __m256i *V_multab __m256i in_even1 = _mm256_srli_epi16(in_odd1, 4) & low_nibble_mask; in_odd1 &= low_nibble_mask; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even0); @@ -358,7 +358,7 @@ inline void mayo_5_P1_times_S1_plus_P2_times_S2_avx2(const uint64_t *_P1, const in_odd1 &= low_nibble_mask; P1_cols_used ++; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_even0); @@ -377,7 +377,7 @@ inline void mayo_5_P1_times_S1_plus_P2_times_S2_avx2(const uint64_t *_P1, const __m256i in_even1 = _mm256_srli_epi16(in_odd1, 4) & low_nibble_mask; in_odd1 &= low_nibble_mask; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even0); @@ -432,7 +432,7 @@ inline void mayo_5_P3_times_S2_avx2(const uint64_t *_P3, __m256i *S2_multabs, ui in_odd1 &= low_nibble_mask; cols_used ++; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even0); @@ -488,7 +488,7 @@ inline void mayo_5_S2t_times_PS2_avx2(const uint64_t *_PS2, __m256i *S2_multabs, __m256i in_even1 = _mm256_srli_epi16(in_odd1, 4) & low_nibble_mask; in_odd1 &= low_nibble_mask; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_even0); diff --git a/src/AVX2/shuffle_arithmetic_64.h b/src/AVX2/shuffle_arithmetic_64.h index 75162b4..defff86 100644 --- a/src/AVX2/shuffle_arithmetic_64.h +++ b/src/AVX2/shuffle_arithmetic_64.h @@ -1,481 +1,481 @@ -// SPDX-License-Identifier: Apache-2.0 - -#ifndef SHUFFLE_ARITHMETIC_64_H -#define SHUFFLE_ARITHMETIC_64_H - -#include -#include -#include -#include - -// P1*0 -> P1: v x v, O: v x o -static -inline void mayo_12_P1_times_O_avx2(const uint64_t *_P1, __m256i *O_multabs, uint64_t *_acc){ - - const __m256i *P1 = (__m256i *) _P1; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - size_t cols_used = 0; - for (size_t r = 0; r < V_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[O_MAX] = {0}; - for (size_t c = r; c < V_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - cols_used ++; - - for (size_t k = 0; k < O_MAX; k+=2) - { - temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); - temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); - } - } - - // convert to normal format and add to accumulator - for (size_t k = 0; k < O_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*O_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[(r*O_MAX) + k + 1] ^= temp[k+1] ^ t; - } - } -} - - -static -inline void mayo_12_Ot_times_P1O_P2_avx2(const uint64_t *_P1O_P2, __m256i *O_multabs, uint64_t *_acc){ - - const __m256i *P1O_P2 = (__m256i *) _P1O_P2; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - for (size_t c = 0; c < O_MAX; c++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[O_MAX] = {0}; - for (size_t r = 0; r < V_MAX; r++) - { - __m256i in_odd = _mm256_loadu_si256(P1O_P2 + r*O_MAX + c); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < O_MAX; k+=2) - { - temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*r + k/2], in_odd); - temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*r + k/2], in_even); - } - } - - // convert to normal format and add to accumulator - for (size_t k = 0; k < O_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(k*O_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[((k+1)*O_MAX) + c] ^= temp[k+1] ^ t; - } - } -} - -static -inline void mayo_12_P1P1t_times_O(const uint64_t *_P1, const unsigned char *O, uint64_t *_acc){ - - const __m256i *P1 = (__m256i *) _P1; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - __m256i O_multabs[O_MAX/2*V_MAX]; - mayo_O_multabs_avx2(O, O_multabs); - - size_t cols_used = 0; - for (size_t r = 0; r < V_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[O_MAX] = {0}; - cols_used += 1; - size_t pos = r; - for (size_t c = 0; c < r; c++) - { - __m256i in_odd = _mm256_loadu_si256(P1 + pos); - pos += (V_MAX -c - 1); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < O_MAX; k+=2) - { - temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); - temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); - } - } - - for (size_t c = r+1; c < V_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - cols_used ++; - - for (size_t k = 0; k < O_MAX; k+=2) - { - temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); - temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); - } - } - - for (size_t k = 0; k < O_MAX; k+=2) - { - __m256i acc0 = _mm256_loadu_si256(acc + (r*O_MAX + k )); - __m256i acc1 = _mm256_loadu_si256(acc + (r*O_MAX + k + 1)); - - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - - _mm256_storeu_si256(acc + (r*O_MAX + k ), acc0 ^ temp[k ] ^ _mm256_slli_epi16(t,4)); - _mm256_storeu_si256(acc + (r*O_MAX + k + 1), acc1 ^ temp[k+1] ^ t); - } - } -} - - -static -inline void mayo_12_Vt_times_L_avx2(const uint64_t *_L, const __m256i *V_multabs, uint64_t *_acc){ - - const __m256i *L = (__m256i *) _L; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - size_t k; - - for (size_t c = 0; c < O_MAX; c++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - for (size_t r = 0; r < V_MAX; r++) - { - __m256i in_odd = _mm256_loadu_si256(L + r*O_MAX + c); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k+1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(k*O_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[((k+1)*O_MAX) + c] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[k*O_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - - -static -inline void mayo_12_Vt_times_Pv_avx2(const uint64_t *_Pv, const __m256i *V_multabs, uint64_t *_acc){ - - const __m256i *Pv = (__m256i *) _Pv; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - size_t k; - - for (size_t c = 0; c < K_MAX; c++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - for (size_t r = 0; r < V_MAX; r++) - { - __m256i in_odd = _mm256_loadu_si256(Pv + r*K_MAX + c); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k+1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(k*K_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[((k+1)*K_MAX) + c] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[k*K_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - -static -inline void mayo_12_P1_times_Vt_avx2(const uint64_t *_P1, __m256i *V_multabs, uint64_t *_acc){ - size_t k,c; - const __m256i *P1 = (__m256i *) _P1; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - size_t cols_used = 0; - for (size_t r = 0; r < V_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - - for (c=r; c < V_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - cols_used ++; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k + 1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - -// P1*S1 -> P1: v x v, S1: v x k // P1 upper triangular -// same as mayo_12_P1_times_Vt_avx2 -static -inline void mayo_12_P1_times_S1_avx2(const uint64_t *_P1, __m256i *S1_multabs, uint64_t *_acc){ - mayo_12_P1_times_Vt_avx2(_P1, S1_multabs, _acc); -} - -static -inline void mayo_12_S1t_times_PS1_avx2(const uint64_t *_PS1, __m256i *S1_multabs, uint64_t *_acc){ - mayo_12_Vt_times_Pv_avx2(_PS1, S1_multabs, _acc); -} - -static -inline void mayo_12_S2t_times_PS2_avx2(const uint64_t *_PS2, __m256i *S2_multabs, uint64_t *_acc){ - const __m256i *PS2 = (__m256i *) _PS2; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - size_t k; - - for (size_t c = 0; c < K_MAX; c++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - for (size_t r = 0; r < O_MAX; r++) - { - __m256i in_odd = _mm256_loadu_si256(PS2 + r*K_MAX + c); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k+1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(k*K_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[((k+1)*K_MAX) + c] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[k*K_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - - -// P2*S2 -> P2: v x o, S2: o x k -static -inline void mayo_12_P2_times_S2_avx2(const uint64_t *_P2, __m256i *S2_multabs, uint64_t *_acc){ - size_t k,c; - const __m256i *P2 = (__m256i *) _P2; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - size_t cols_used = 0; - for (size_t r = 0; r < V_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - - for (c=0; c < O_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P2 + cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - cols_used ++; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k + 1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - - -// P2*S2 -> P2: v x o, S2: o x k -static -inline void mayo_12_P1_times_S1_plus_P2_times_S2_avx2(const uint64_t *_P1, const uint64_t *_P2, __m256i *S1_multabs, __m256i *S2_multabs, uint64_t *_acc){ - size_t k,c; - const __m256i *P1 = (__m256i *) _P1; - const __m256i *P2 = (__m256i *) _P2; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - size_t P1_cols_used = 0; - for (size_t r = 0; r < V_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - - - // P1 * S1 - for (c=r; c < V_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P1 + P1_cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - P1_cols_used ++; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_even); - } - } - - // P2 * S2 - for (c=0; c < O_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P2 + r*O_MAX + c); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k + 1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - -// P3*S2 -> P3: o x o, S2: o x k // P3 upper triangular -static -inline void mayo_12_P3_times_S2_avx2(const uint64_t *_P3, __m256i *S2_multabs, uint64_t *_acc){ - size_t k,c; - const __m256i *P3 = (__m256i *) _P3; - __m256i *acc = (__m256i *) _acc; - const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); - - size_t cols_used = 0; - for (size_t r = 0; r < O_MAX; r++) - { - // do multiplications for one row and accumulate results in temporary format - __m256i temp[K_OVER_2*2] = {0}; - - for (c=r; c < O_MAX; c++) - { - __m256i in_odd = _mm256_loadu_si256(P3 + cols_used); - __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; - in_odd &= low_nibble_mask; - cols_used ++; - - for (size_t k = 0; k < K_OVER_2; k++) - { - temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); - temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); - } - } - - // convert to normal format and add to accumulator - for (k = 0; k + 1 < K_MAX; k+=2) - { - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); - acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; - } -#if K_MAX % 2 == 1 - __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; - acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); -#endif - } -} - - -static inline -void mayo12_m_upper(int m_legs, const uint64_t *in, uint64_t *out, int size) { - (void) size; - int m_vecs_stored = 0; - - for (int r = 0; r < O_MAX; ++r) { - const __m256i* _in = (const __m256i*) (in + m_legs * 2 * (r * size + r)); - __m256i* _out = (__m256i*) (out + m_legs * 2 * m_vecs_stored); - _out[0] = _in[0]; - m_vecs_stored++; - for (int c = r + 1; c < O_MAX; ++c) { - const __m256i* _in2 = (const __m256i*) (in + m_legs * 2 * (r * size + c)); - const __m256i* _in3 = (const __m256i*) (in + m_legs * 2 * (c * size + r)); - _out = (__m256i*) (out + m_legs * 2 * m_vecs_stored); - _out[0] = _in2[0] ^ _in3[0]; - m_vecs_stored++; - } - } -} - - -#undef K_OVER_2 -#endif - +// SPDX-License-Identifier: Apache-2.0 + +#ifndef SHUFFLE_ARITHMETIC_64_H +#define SHUFFLE_ARITHMETIC_64_H + +#include +#include +#include +#include + +// P1*0 -> P1: v x v, O: v x o +static +inline void mayo_12_P1_times_O_avx2(const uint64_t *_P1, __m256i *O_multabs, uint64_t *_acc){ + + const __m256i *P1 = (__m256i *) _P1; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + size_t cols_used = 0; + for (size_t r = 0; r < V_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[O_MAX] = {0}; + for (size_t c = r; c < V_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + cols_used ++; + + for (size_t k = 0; k < O_MAX; k+=2) + { + temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); + temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); + } + } + + // convert to normal format and add to accumulator + for (size_t k = 0; k < O_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*O_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[(r*O_MAX) + k + 1] ^= temp[k+1] ^ t; + } + } +} + + +static +inline void mayo_12_Ot_times_P1O_P2_avx2(const uint64_t *_P1O_P2, __m256i *O_multabs, uint64_t *_acc){ + + const __m256i *P1O_P2 = (__m256i *) _P1O_P2; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + for (size_t c = 0; c < O_MAX; c++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[O_MAX] = {0}; + for (size_t r = 0; r < V_MAX; r++) + { + __m256i in_odd = _mm256_loadu_si256(P1O_P2 + r*O_MAX + c); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (size_t k = 0; k < O_MAX; k+=2) + { + temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*r + k/2], in_odd); + temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*r + k/2], in_even); + } + } + + // convert to normal format and add to accumulator + for (size_t k = 0; k < O_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(k*O_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[((k+1)*O_MAX) + c] ^= temp[k+1] ^ t; + } + } +} + +static +inline void mayo_12_P1P1t_times_O(const uint64_t *_P1, const unsigned char *O, uint64_t *_acc){ + + const __m256i *P1 = (__m256i *) _P1; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + __m256i O_multabs[O_MAX/2*V_MAX]; + mayo_O_multabs_avx2(O, O_multabs); + + size_t cols_used = 0; + for (size_t r = 0; r < V_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[O_MAX] = {0}; + cols_used += 1; + size_t pos = r; + for (size_t c = 0; c < r; c++) + { + __m256i in_odd = _mm256_loadu_si256(P1 + pos); + pos += (V_MAX -c - 1); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (size_t k = 0; k < O_MAX; k+=2) + { + temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); + temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); + } + } + + for (size_t c = r+1; c < V_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + cols_used ++; + + for (size_t k = 0; k < O_MAX; k+=2) + { + temp[k] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_odd); + temp[k + 1] ^= _mm256_shuffle_epi8(O_multabs[O_MAX/2*c + k/2], in_even); + } + } + + for (size_t k = 0; k < O_MAX; k+=2) + { + __m256i acc0 = _mm256_loadu_si256(acc + (r*O_MAX + k )); + __m256i acc1 = _mm256_loadu_si256(acc + (r*O_MAX + k + 1)); + + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + + _mm256_storeu_si256(acc + (r*O_MAX + k ), acc0 ^ temp[k ] ^ _mm256_slli_epi16(t,4)); + _mm256_storeu_si256(acc + (r*O_MAX + k + 1), acc1 ^ temp[k+1] ^ t); + } + } +} + + +static +inline void mayo_12_Vt_times_L_avx2(const uint64_t *_L, const __m256i *V_multabs, uint64_t *_acc){ + + const __m256i *L = (__m256i *) _L; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + size_t k; + + for (size_t c = 0; c < O_MAX; c++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + for (size_t r = 0; r < V_MAX; r++) + { + __m256i in_odd = _mm256_loadu_si256(L + r*O_MAX + c); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k+1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(k*O_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[((k+1)*O_MAX) + c] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[k*O_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + + +static +inline void mayo_12_Vt_times_Pv_avx2(const uint64_t *_Pv, const __m256i *V_multabs, uint64_t *_acc){ + + const __m256i *Pv = (__m256i *) _Pv; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + size_t k; + + for (size_t c = 0; c < K_MAX; c++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + for (size_t r = 0; r < V_MAX; r++) + { + __m256i in_odd = _mm256_loadu_si256(Pv + r*K_MAX + c); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k+1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(k*K_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[((k+1)*K_MAX) + c] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[k*K_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + +static +inline void mayo_12_P1_times_Vt_avx2(const uint64_t *_P1, __m256i *V_multabs, uint64_t *_acc){ + size_t k,c; + const __m256i *P1 = (__m256i *) _P1; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + size_t cols_used = 0; + for (size_t r = 0; r < V_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + + for (c=r; c < V_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P1 + cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + cols_used ++; + + for (k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k + 1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + +// P1*S1 -> P1: v x v, S1: v x k // P1 upper triangular +// same as mayo_12_P1_times_Vt_avx2 +static +inline void mayo_12_P1_times_S1_avx2(const uint64_t *_P1, __m256i *S1_multabs, uint64_t *_acc){ + mayo_12_P1_times_Vt_avx2(_P1, S1_multabs, _acc); +} + +static +inline void mayo_12_S1t_times_PS1_avx2(const uint64_t *_PS1, __m256i *S1_multabs, uint64_t *_acc){ + mayo_12_Vt_times_Pv_avx2(_PS1, S1_multabs, _acc); +} + +static +inline void mayo_12_S2t_times_PS2_avx2(const uint64_t *_PS2, __m256i *S2_multabs, uint64_t *_acc){ + const __m256i *PS2 = (__m256i *) _PS2; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + size_t k; + + for (size_t c = 0; c < K_MAX; c++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + for (size_t r = 0; r < O_MAX; r++) + { + __m256i in_odd = _mm256_loadu_si256(PS2 + r*K_MAX + c); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k+1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(k*K_MAX) + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[((k+1)*K_MAX) + c] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[k*K_MAX + c] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + + +// P2*S2 -> P2: v x o, S2: o x k +static +inline void mayo_12_P2_times_S2_avx2(const uint64_t *_P2, __m256i *S2_multabs, uint64_t *_acc){ + size_t k,c; + const __m256i *P2 = (__m256i *) _P2; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + size_t cols_used = 0; + for (size_t r = 0; r < V_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + + for (c=0; c < O_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P2 + cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + cols_used ++; + + for (k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k + 1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + + +// P2*S2 -> P2: v x o, S2: o x k +static +inline void mayo_12_P1_times_S1_plus_P2_times_S2_avx2(const uint64_t *_P1, const uint64_t *_P2, __m256i *S1_multabs, __m256i *S2_multabs, uint64_t *_acc){ + size_t k,c; + const __m256i *P1 = (__m256i *) _P1; + const __m256i *P2 = (__m256i *) _P2; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + size_t P1_cols_used = 0; + for (size_t r = 0; r < V_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + + + // P1 * S1 + for (c=r; c < V_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P1 + P1_cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + P1_cols_used ++; + + for (k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_even); + } + } + + // P2 * S2 + for (c=0; c < O_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P2 + r*O_MAX + c); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + + for (k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k + 1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + +// P3*S2 -> P3: o x o, S2: o x k // P3 upper triangular +static +inline void mayo_12_P3_times_S2_avx2(const uint64_t *_P3, __m256i *S2_multabs, uint64_t *_acc){ + size_t k,c; + const __m256i *P3 = (__m256i *) _P3; + __m256i *acc = (__m256i *) _acc; + const __m256i low_nibble_mask = _mm256_set_epi64x(0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f); + + size_t cols_used = 0; + for (size_t r = 0; r < O_MAX; r++) + { + // do multiplications for one row and accumulate results in temporary format + __m256i temp[K_OVER_2*2] = {0}; + + for (c=r; c < O_MAX; c++) + { + __m256i in_odd = _mm256_loadu_si256(P3 + cols_used); + __m256i in_even = _mm256_srli_epi16(in_odd, 4) & low_nibble_mask; + in_odd &= low_nibble_mask; + cols_used ++; + + for (k = 0; k < K_OVER_2; k++) + { + temp[2*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd); + temp[2*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even); + } + } + + // convert to normal format and add to accumulator + for (k = 0; k + 1 < K_MAX; k+=2) + { + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); + acc[(r*K_MAX) + k + 1] ^= temp[k+1] ^ t; + } +#if K_MAX % 2 == 1 + __m256i t = (temp[k + 1] ^ _mm256_srli_epi16(temp[k],4)) & low_nibble_mask; + acc[(r*K_MAX) + k] ^= temp[k] ^ _mm256_slli_epi16(t,4); +#endif + } +} + + +static inline +void mayo12_m_upper(int m_legs, const uint64_t *in, uint64_t *out, int size) { + (void) size; + int m_vecs_stored = 0; + + for (int r = 0; r < O_MAX; ++r) { + const __m256i* _in = (const __m256i*) (in + m_legs * 2 * (r * size + r)); + __m256i* _out = (__m256i*) (out + m_legs * 2 * m_vecs_stored); + _out[0] = _in[0]; + m_vecs_stored++; + for (int c = r + 1; c < O_MAX; ++c) { + const __m256i* _in2 = (const __m256i*) (in + m_legs * 2 * (r * size + c)); + const __m256i* _in3 = (const __m256i*) (in + m_legs * 2 * (c * size + r)); + _out = (__m256i*) (out + m_legs * 2 * m_vecs_stored); + _out[0] = _in2[0] ^ _in3[0]; + m_vecs_stored++; + } + } +} + + +#undef K_OVER_2 +#endif + diff --git a/src/AVX2/shuffle_arithmetic_96.h b/src/AVX2/shuffle_arithmetic_96.h index 7d161db..9b3a69d 100644 --- a/src/AVX2/shuffle_arithmetic_96.h +++ b/src/AVX2/shuffle_arithmetic_96.h @@ -194,7 +194,7 @@ inline void mayo_3_Vt_times_L_avx2(const uint64_t *L, const __m256i *V_multabs, __m256i in_even1 = _mm256_srli_epi16(in_odd1, 4) & low_nibble_mask; in_odd1 &= low_nibble_mask; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even0); @@ -252,7 +252,7 @@ inline void mayo_3_P1_times_Vt_avx2(const uint64_t *P1, __m256i *V_multabs, uint in_odd1 &= low_nibble_mask; cols_used++; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*c + k], in_even0); @@ -308,7 +308,7 @@ inline void mayo_3_Vt_times_Pv_avx2(const uint64_t *Pv, const __m256i *V_multabs __m256i in_even1 = _mm256_srli_epi16(in_odd1, 4) & low_nibble_mask; in_odd1 &= low_nibble_mask; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(V_multabs[K_OVER_2*r + k], in_even0); @@ -368,7 +368,7 @@ inline void mayo_3_P1_times_S1_plus_P2_times_S2_avx2(const uint64_t *P1, const u in_odd1 &= low_nibble_mask; P1_cols_used++; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(S1_multabs[K_OVER_2*c + k], in_even0); @@ -387,7 +387,7 @@ inline void mayo_3_P1_times_S1_plus_P2_times_S2_avx2(const uint64_t *P1, const u __m256i in_even1 = _mm256_srli_epi16(in_odd1, 4) & low_nibble_mask; in_odd1 &= low_nibble_mask; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even0); @@ -446,7 +446,7 @@ inline void mayo_3_P3_times_S2_avx2(const uint64_t *P3, __m256i *S2_multabs, uin in_odd1 &= low_nibble_mask; cols_used++; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*c + k], in_even0); @@ -507,7 +507,7 @@ inline void mayo_3_S2t_times_PS2_avx2(const uint64_t *PS2, __m256i *S2_multabs, __m256i in_even1 = _mm256_srli_epi16(in_odd1, 4) & low_nibble_mask; in_odd1 &= low_nibble_mask; - for (size_t k = 0; k < K_OVER_2; k++) + for (k = 0; k < K_OVER_2; k++) { temp[4*k] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_odd0); temp[4*k + 1] ^= _mm256_shuffle_epi8(S2_multabs[K_OVER_2*r + k], in_even0); @@ -546,4 +546,5 @@ inline void mayo_3_S2t_times_PS2_avx2(const uint64_t *PS2, __m256i *S2_multabs, } #undef K_OVER_2 -#endif \ No newline at end of file +#endif + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d2aa397..96db732 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,9 +49,11 @@ if (ENABLE_PARAMS_DYNAMIC) set(SOURCE_FILES_VARIANT ${MVARIANT_LOWER}/api.c) add_library(${MVARIANT_LOWER}_nistapi ${SOURCE_FILES_VARIANT}) target_link_libraries(${MVARIANT_LOWER}_nistapi PRIVATE mayo) + target_compile_definitions(${MVARIANT_LOWER}_nistapi PUBLIC ENABLE_PARAMS_DYNAMIC) target_include_directories(${MVARIANT_LOWER}_nistapi PUBLIC ${MVARIANT_LOWER} ${INC_PLATFORM}) add_library(${MVARIANT_LOWER}_test_nistapi ${SOURCE_FILES_VARIANT}) target_link_libraries(${MVARIANT_LOWER}_test_nistapi PRIVATE mayo_test) + target_compile_definitions(${MVARIANT_LOWER}_test_nistapi PUBLIC ENABLE_PARAMS_DYNAMIC) target_include_directories(${MVARIANT_LOWER}_test_nistapi PUBLIC ${MVARIANT_LOWER} ${INC_PLATFORM}) endforeach() @@ -77,5 +79,8 @@ else() add_library(${MVARIANT_LOWER}_test_nistapi ${SOURCE_FILES_VARIANT}) target_link_libraries(${MVARIANT_LOWER}_test_nistapi PRIVATE ${MVARIANT_LOWER}_test) target_include_directories(${MVARIANT_LOWER}_test_nistapi PUBLIC ${MVARIANT_LOWER} ${INC_PLATFORM}) + target_compile_definitions(${MVARIANT_LOWER}_nistapi PUBLIC MAYO_VARIANT=${MVARIANT}) + target_compile_definitions(${MVARIANT_LOWER}_test_nistapi PUBLIC MAYO_VARIANT=${MVARIANT}) endforeach() endif() + diff --git a/src/arithmetic.c b/src/arithmetic.c index 24d1996..de09954 100644 --- a/src/arithmetic.c +++ b/src/arithmetic.c @@ -78,6 +78,7 @@ void P1P1t_times_O(const mayo_params_t* p, const uint64_t* P1, const unsigned ch } void V_times_L__V_times_P1_times_Vt(const mayo_params_t* p, const uint64_t* L, const unsigned char* V, uint64_t* M, const uint64_t* P1, uint64_t* Y) { + (void) p; #if MAYO_AVX && defined(MAYO_VARIANT) && M_MAX == 64 __m256i V_multabs[(K_MAX+1)/2*V_MAX]; alignas (32) uint64_t Pv[N_MINUS_O_MAX * K_MAX * M_MAX / 16] = {0}; @@ -121,6 +122,7 @@ void V_times_L__V_times_P1_times_Vt(const mayo_params_t* p, const uint64_t* L, c } void Ot_times_P1O_P2(const mayo_params_t* p, const uint64_t* P1, const unsigned char* O, uint64_t* P1O_P2, uint64_t* P3) { + (void) p; #if MAYO_AVX && defined(MAYO_VARIANT) && M_MAX == 64 __m256i O_multabs[O_MAX/2*V_MAX]; mayo_O_multabs_avx2(O, O_multabs); @@ -157,12 +159,13 @@ void Ot_times_P1O_P2(const mayo_params_t* p, const uint64_t* P1, const unsigned // [ P3*S2 = P2 ] void m_calculate_PS_SPS(const uint64_t *P1, const uint64_t *P2, const uint64_t *P3, const unsigned char *S, const int m, const int v, const int o, const int k, uint64_t *SPS) { + (void) m; #if MAYO_AVX const int n = o + v; /* Old approach which is constant time but doesn't have to be */ - unsigned char S1[V_MAX*K_MAX]; // == N-O, K - unsigned char S2[O_MAX*K_MAX]; // == O, K + unsigned char S1[V_MAX*K_MAX] = { 0 }; // == N-O, K + unsigned char S2[O_MAX*K_MAX] = { 0 }; // == O, K unsigned char *s1_write = S1; unsigned char *s2_write = S2; @@ -320,4 +323,5 @@ int sample_solution(const mayo_params_t *p, unsigned char *A, } } return 1; -} \ No newline at end of file +} + diff --git a/src/arithmetic.h b/src/arithmetic.h index ec79f33..c2fb4fe 100644 --- a/src/arithmetic.h +++ b/src/arithmetic.h @@ -8,6 +8,10 @@ #include #include +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#define TARGET_BIG_ENDIAN +#endif + #if defined(MAYO_AVX) && (M_MAX == 64) #include #endif @@ -29,78 +33,30 @@ #include #endif -#ifdef ENABLE_PARAMS_DYNAMIC -#define PARAM_m(p) (p->m) -#define PARAM_n(p) (p->n) -#define PARAM_o(p) (p->o) -#define PARAM_v(p) (p->n - p->o) -#define PARAM_A_cols(p) (p->k * p->o + 1) -#define PARAM_k(p) (p->k) -#define PARAM_q(p) (p->q) -#define PARAM_m_bytes(p) (p->m_bytes) -#define PARAM_O_bytes(p) (p->O_bytes) -#define PARAM_v_bytes(p) (p->v_bytes) -#define PARAM_r_bytes(p) (p->r_bytes) -#define PARAM_P1_bytes(p) (p->P1_bytes) -#define PARAM_P2_bytes(p) (p->P2_bytes) -#define PARAM_P3_bytes(p) (p->P3_bytes) -#define PARAM_csk_bytes(p) (p->csk_bytes) -#define PARAM_esk_bytes(p) (p->esk_bytes) -#define PARAM_cpk_bytes(p) (p->cpk_bytes) -#define PARAM_epk_bytes(p) (p->epk_bytes) -#define PARAM_sig_bytes(p) (p->sig_bytes) -#define PARAM_f_tail(p) (p->f_tail) -#define PARAM_salt_bytes(p) (p->salt_bytes) -#define PARAM_sk_seed_bytes(p) (p->sk_seed_bytes) -#define PARAM_digest_bytes(p) (p->digest_bytes) -#define PARAM_pk_seed_bytes(p) (p->pk_seed_bytes) -#elif defined(MAYO_VARIANT) -#define PARAM_m(p) PARAM_NAME(m) -#define PARAM_n(p) PARAM_NAME(n) -#define PARAM_o(p) PARAM_NAME(o) -#define PARAM_v(p) PARAM_NAME(v) -#define PARAM_A_cols(p) PARAM_NAME(A_cols) -#define PARAM_k(p) PARAM_NAME(k) -#define PARAM_q(p) PARAM_NAME(q) -#define PARAM_m_bytes(p) PARAM_NAME(m_bytes) -#define PARAM_O_bytes(p) PARAM_NAME(O_bytes) -#define PARAM_v_bytes(p) PARAM_NAME(v_bytes) -#define PARAM_r_bytes(p) PARAM_NAME(r_bytes) -#define PARAM_P1_bytes(p) PARAM_NAME(P1_bytes) -#define PARAM_P2_bytes(p) PARAM_NAME(P2_bytes) -#define PARAM_P3_bytes(p) PARAM_NAME(P3_bytes) -#define PARAM_csk_bytes(p) PARAM_NAME(csk_bytes) -#define PARAM_esk_bytes(p) PARAM_NAME(esk_bytes) -#define PARAM_cpk_bytes(p) PARAM_NAME(cpk_bytes) -#define PARAM_epk_bytes(p) PARAM_NAME(epk_bytes) -#define PARAM_sig_bytes(p) PARAM_NAME(sig_bytes) -static const unsigned char f_tail[] = PARAM_NAME(f_tail); -#define PARAM_salt_bytes(p) PARAM_NAME(salt_bytes) -#define PARAM_sk_seed_bytes(p) PARAM_NAME(sk_seed_bytes) -#define PARAM_digest_bytes(p) PARAM_NAME(digest_bytes) -#define PARAM_pk_seed_bytes(p) PARAM_NAME(pk_seed_bytes) -#define PARAM_f_tail(p) f_tail -#else -#error "Parameter not specified" -#endif - // Calculate P3 = O^T * (P1*O + P2) in KeyGen +#define Ot_times_P1O_P2 MAYO_NAMESPACE(Ot_times_P1O_P2) void Ot_times_P1O_P2(const mayo_params_t* p, const uint64_t* P1, const unsigned char* O, uint64_t* P1O_P2, uint64_t* P3); // Calculate Upper in KeyGen +#define m_upper MAYO_NAMESPACE(m_upper) void m_upper(int m_legs, const uint64_t *in, uint64_t *out, int size); // Calculate acc = (P1+P1^T)*O in expand_sk +#define P1P1t_times_O MAYO_NAMESPACE(P1P1t_times_O) void P1P1t_times_O(const mayo_params_t* p, const uint64_t* P1P1t, const unsigned char* O, uint64_t* acc); // Calculate M=V*L and Y=V*P1*V^T in Sign +#define V_times_L__V_times_P1_times_Vt MAYO_NAMESPACE(V_times_L__V_times_P1_times_Vt) void V_times_L__V_times_P1_times_Vt(const mayo_params_t* p, const uint64_t* L, const unsigned char* V, uint64_t* M, const uint64_t* P1, uint64_t* Y); // Sample solution in Sign +#define sample_solution MAYO_NAMESPACE(sample_solution) int sample_solution(const mayo_params_t *p, unsigned char *A, const unsigned char *y, const unsigned char *r, unsigned char *x, int k, int o, int m, int A_cols); // Calculate SPS = S*P*S^T in Verify +#define m_calculate_PS_SPS MAYO_NAMESPACE(m_calculate_PS_SPS) void m_calculate_PS_SPS(const uint64_t *P1, const uint64_t *P2, const uint64_t *P3, const unsigned char *S, const int m, const int v, const int o, const int k, uint64_t *SPS); -#endif \ No newline at end of file +#endif + diff --git a/src/common/aes128ctr.c b/src/common/aes128ctr.c index 9382136..6fa736c 100644 --- a/src/common/aes128ctr.c +++ b/src/common/aes128ctr.c @@ -289,4 +289,5 @@ int AES_128_CTR_4R_NI(unsigned char *output, size_t outputByteLen, oqs_aes128r4_free_schedule_ni(schedule); return (int)outputByteLen; } -#endif \ No newline at end of file +#endif + diff --git a/src/common/aes_c.c b/src/common/aes_c.c index 868fcd7..8afec21 100644 --- a/src/common/aes_c.c +++ b/src/common/aes_c.c @@ -738,3 +738,4 @@ void AES_256_ECB(const uint8_t *input, const unsigned char *key, unsigned char * aes256_ecb(output, input, 1, &ctx); aes256_ctx_release(&ctx); } + diff --git a/src/common/aes_ctr.h b/src/common/aes_ctr.h index c47c01e..28c4955 100644 --- a/src/common/aes_ctr.h +++ b/src/common/aes_ctr.h @@ -21,3 +21,4 @@ int AES_128_CTR(unsigned char *output, size_t outputByteLen, #endif #endif + diff --git a/src/common/debug_bench_tools.h b/src/common/debug_bench_tools.h index c1de817..8d12592 100644 --- a/src/common/debug_bench_tools.h +++ b/src/common/debug_bench_tools.h @@ -65,4 +65,5 @@ static inline void print_avx2_(__m256i a){ #endif -#endif \ No newline at end of file +#endif + diff --git a/src/common/fips202.c b/src/common/fips202.c index a6f3ecd..1a5475a 100644 --- a/src/common/fips202.c +++ b/src/common/fips202.c @@ -1090,3 +1090,4 @@ void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen) { output[i] = t[i]; } } + diff --git a/src/common/fips202.h b/src/common/fips202.h index 5a2261a..ed39e12 100644 --- a/src/common/fips202.h +++ b/src/common/fips202.h @@ -9,3 +9,4 @@ int shake128(unsigned char *output, size_t outputByteLen, const unsigned char *i int shake256(unsigned char *output, size_t outputByteLen, const unsigned char *input, size_t inputByteLen); #endif + diff --git a/src/common/mem.c b/src/common/mem.c index f5fe699..ec0a284 100644 --- a/src/common/mem.c +++ b/src/common/mem.c @@ -16,3 +16,4 @@ void mayo_secure_clear(void *mem, size_t size) { static volatile memset_t memset_func = memset; memset_func(mem, 0, size); } + diff --git a/src/common/randombytes_ctrdrbg.c b/src/common/randombytes_ctrdrbg.c index ab45aec..7751c0a 100644 --- a/src/common/randombytes_ctrdrbg.c +++ b/src/common/randombytes_ctrdrbg.c @@ -124,7 +124,7 @@ AES256_CTR_DRBG_Update(unsigned char *provided_data, memcpy(V, temp + 32, 16); } -int randombytes(unsigned char *random_array, unsigned long long nbytes) { +int randombytes(unsigned char *random_array, size_t nbytes) { int ret = randombytes_nist(random_array, nbytes); #ifdef ENABLE_CT_TESTING VALGRIND_MAKE_MEM_UNDEFINED(random_array, ret); @@ -138,3 +138,4 @@ randombytes_init(unsigned char *entropy_input, int security_strength) { return randombytes_init_nist(entropy_input, personalization_string, security_strength); } + diff --git a/src/common/randombytes_system.c b/src/common/randombytes_system.c index a8783c3..4f601f1 100644 --- a/src/common/randombytes_system.c +++ b/src/common/randombytes_system.c @@ -31,7 +31,7 @@ THE SOFTWARE. # define _GNU_SOURCE #endif /* defined(__linux__) || defined(__GNU__) */ -#include +#include #if defined(_WIN32) /* Windows */ @@ -380,7 +380,7 @@ static int randombytes_select(void *buf, size_t n) #endif } -int randombytes(unsigned char *x, unsigned long long xlen) { +int randombytes(unsigned char *x, size_t xlen) { int ret = randombytes_select(x, (size_t) xlen); #ifdef ENABLE_CT_TESTING @@ -396,3 +396,4 @@ void randombytes_init(unsigned char *entropy_input, (void) personalization_string; (void) security_strength; } + diff --git a/src/generic/arithmetic_128.h b/src/generic/arithmetic_128.h index 6926b67..418c308 100644 --- a/src/generic/arithmetic_128.h +++ b/src/generic/arithmetic_128.h @@ -86,4 +86,5 @@ inline void multiply_bins_128(uint64_t *bins, uint64_t *out) { vec_copy_128(bins + 8, out); } -#endif \ No newline at end of file +#endif + diff --git a/src/generic/arithmetic_64.h b/src/generic/arithmetic_64.h index 1c574fc..a70b7a3 100644 --- a/src/generic/arithmetic_64.h +++ b/src/generic/arithmetic_64.h @@ -124,4 +124,5 @@ inline void multiply_bins_64(uint64_t *bins, uint64_t *out) { vec_copy_64(bins + 4, out); } -#endif \ No newline at end of file +#endif + diff --git a/src/generic/arithmetic_96.h b/src/generic/arithmetic_96.h index 237f99b..a38f89e 100644 --- a/src/generic/arithmetic_96.h +++ b/src/generic/arithmetic_96.h @@ -81,4 +81,5 @@ inline void multiply_bins_96(uint64_t *bins, uint64_t *out) { vec_copy_96(bins + 6, out); } -#endif \ No newline at end of file +#endif + diff --git a/src/generic/arithmetic_common.h b/src/generic/arithmetic_common.h index 079bdf6..d337bc2 100644 --- a/src/generic/arithmetic_common.h +++ b/src/generic/arithmetic_common.h @@ -84,7 +84,7 @@ static inline void mayo_generic_m_calculate_PS(const uint64_t *P1, const uint64_ */ // use more stack efficient version for MAYO_3 and MAYO_5 - #if defined(PQM4) && N_MAX > 78 + #if (defined(HAVE_STACKEFFICIENT) || defined(PQM4)) && N_MAX > 78 uint64_t accumulator[M_MAX * N_MAX] = {0}; int P1_used; int P3_used; diff --git a/src/generic/echelon_form.h b/src/generic/echelon_form.h index a7ea621..8250584 100644 --- a/src/generic/echelon_form.h +++ b/src/generic/echelon_form.h @@ -1,138 +1,152 @@ - -// SPDX-License-Identifier: Apache-2.0 - -#ifndef ECHELON_FORM_H -#define ECHELON_FORM_H - -#include -#include -#include -#include - -#define MAYO_MAX(x, y) (((x) > (y)) ? (x) : (y)) -#define MAYO_MIN(x, y) (((x) < (y)) ? (x) : (y)) - -static inline unsigned char -m_extract_element(const uint64_t *in, int index) { - const int leg = index / 16; - const int offset = index % 16; - - return (in[leg] >> (offset*4)) & 0xF; -} - -static inline void -ef_pack_m_vec(const unsigned char *in, uint64_t *out, int ncols) { - int i; - unsigned char *out8 = (unsigned char *)out; - for(i = 0; i+1 < ncols; i += 2){ - out8[i/2] = (in[i+0] << 0) | (in[i+1] << 4); - } - if (ncols % 2 == 1){ - out8[i/2] = (in[i+0] << 0); - } -} - -static inline void -ef_unpack_m_vec(int legs, const uint64_t *in, unsigned char *out) { - const unsigned char *in8 = (const unsigned char *)in; - for(int i = 0; i < legs * 16; i += 2){ - out[i] = (in8[i/2]) & 0xF; - out[i+1] = (in8[i/2] >> 4); - } -} - - -// put matrix in row echelon form with ones on first nonzero entries *in -// constant time* -static inline void EF(unsigned char *A, int nrows, int ncols) { - - alignas (32) uint64_t _pivot_row[(K_MAX * O_MAX + 1 + 15) / 16]; - alignas (32) uint64_t _pivot_row2[(K_MAX * O_MAX + 1 + 15) / 16]; - alignas (32) uint64_t packed_A[((K_MAX * O_MAX + 1 + 15) / 16) * M_MAX]; - - int row_len = (ncols + 15) / 16; - - // nibbleslice the matrix A - for (int i = 0; i < nrows; i++) { - ef_pack_m_vec(A + i * ncols, packed_A + i * row_len, ncols); - } - - // pivot row is secret, pivot col is not - - unsigned char inverse; - int pivot_row = 0; - for (int pivot_col = 0; pivot_col < ncols; pivot_col++) { - - int pivot_row_lower_bound = MAYO_MAX(0, pivot_col + nrows - ncols); - int pivot_row_upper_bound = MAYO_MIN(nrows - 1, pivot_col); - // the pivot row is guaranteed to be between these lower and upper bounds if - // A has full rank - - // zero out pivot row - for (int i = 0; i < row_len; i++) { - _pivot_row[i] = 0; - _pivot_row2[i] = 0; - } - - // try to get a pivot row in constant time - unsigned char pivot = 0; - uint64_t pivot_is_zero = -1; - for (int row = pivot_row_lower_bound; - row <= MAYO_MIN(nrows - 1, pivot_row_upper_bound + 32); row++) { - - uint64_t is_pivot_row = ~ct_compare_64(row, pivot_row); - uint64_t below_pivot_row = ct_64_is_greater_than(row, pivot_row); - - for (int j = 0; j < row_len; j++) { - _pivot_row[j] ^= (is_pivot_row | (below_pivot_row & pivot_is_zero)) & - packed_A[row * row_len + j]; - } - pivot = m_extract_element(_pivot_row, pivot_col); - pivot_is_zero = ~ct_compare_64((int) pivot, 0); - } - - // multiply pivot row by inverse of pivot - inverse = inverse_f(pivot); - vec_mul_add_u64(row_len, _pivot_row, inverse, _pivot_row2); - - // conditionally write pivot row to the correct row, if there is a nonzero - // pivot - for (int row = pivot_row_lower_bound; row <= pivot_row_upper_bound; row++) { - uint64_t do_copy = ~ct_compare_64(row, pivot_row) & ~pivot_is_zero; - uint64_t do_not_copy = ~do_copy; - for (int col = 0; col < row_len; col++) { - packed_A[row * row_len + col] = - (do_not_copy & packed_A[row * row_len + col]) + - (do_copy & _pivot_row2[col]); - } - } - - // eliminate entries below pivot - for (int row = pivot_row_lower_bound; row < nrows; row++) { - unsigned char below_pivot = (row > pivot_row); - unsigned char elt_to_elim = m_extract_element(packed_A + row * row_len, pivot_col); - - vec_mul_add_u64(row_len, _pivot_row2, below_pivot * elt_to_elim, - packed_A + row * row_len); - } - - pivot_row += (-(int32_t)(~pivot_is_zero)); - } - - unsigned char temp[(O_MAX * K_MAX + 1 + 15)]; - - // unbitslice the matrix A - for (int i = 0; i < nrows; i++) { - ef_unpack_m_vec(row_len, packed_A + i * row_len, temp); - for (int j = 0; j < ncols; j++) { - A[i * ncols + j] = temp[j]; - } - } - - mayo_secure_clear(temp, K_MAX * O_MAX + 1 + 15); - mayo_secure_clear(_pivot_row, (K_MAX * O_MAX + 1 + 15) / 16 * 8); - mayo_secure_clear(_pivot_row2, (K_MAX * O_MAX + 1 + 15) / 16 * 8); - mayo_secure_clear(packed_A, ((K_MAX * O_MAX + 1 + 15) / 16) * M_MAX * 8); -} - -#endif \ No newline at end of file + +// SPDX-License-Identifier: Apache-2.0 + +#ifndef ECHELON_FORM_H +#define ECHELON_FORM_H + +#include +#include +#include +#include + +#define MAYO_MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MAYO_MIN(x, y) (((x) < (y)) ? (x) : (y)) + +static inline unsigned char +m_extract_element(const uint64_t *in, int index) { + const int leg = index / 16; + const int offset = index % 16; + + return (in[leg] >> (offset*4)) & 0xF; +} + +static inline void +ef_pack_m_vec(const unsigned char *in, uint64_t *out, int ncols) { + int i; + unsigned char *out8 = (unsigned char *)out; + for(i = 0; i+1 < ncols; i += 2){ +#ifdef TARGET_BIG_ENDIAN + out8[(((i/2 + 8) / 8) * 8) - 1 - (i/2)%8] = (in[i+0] << 0) | (in[i+1] << 4); +#else + out8[i/2] = (in[i+0] << 0) | (in[i+1] << 4); +#endif + } + if (ncols % 2 == 1){ +#ifdef TARGET_BIG_ENDIAN + out8[(((i/2 + 8) / 8) * 8) - 1 - (i/2)%8] = (in[i+0] << 0); +#else + out8[i/2] = (in[i+0] << 0); +#endif + } +} + +static inline void +ef_unpack_m_vec(int legs, const uint64_t *in, unsigned char *out) { + const unsigned char *in8 = (const unsigned char *)in; + for(int i = 0; i < legs * 16; i += 2){ +#ifdef TARGET_BIG_ENDIAN + out[i] = (in8[(((i/2 + 8) / 8) * 8) - 1 - (i/2)%8]) & 0xF; + out[i+1] = (in8[(((i/2 + 8) / 8) * 8) - 1 - (i/2)%8] >> 4); +#else + out[i] = (in8[i/2]) & 0xF; + out[i+1] = (in8[i/2] >> 4); +#endif + } +} + + +// put matrix in row echelon form with ones on first nonzero entries *in +// constant time* +static inline void EF(unsigned char *A, int nrows, int ncols) { + + alignas (32) uint64_t _pivot_row[(K_MAX * O_MAX + 1 + 15) / 16]; + alignas (32) uint64_t _pivot_row2[(K_MAX * O_MAX + 1 + 15) / 16]; + alignas (32) uint64_t packed_A[((K_MAX * O_MAX + 1 + 15) / 16) * M_MAX] = { 0 }; + + int row_len = (ncols + 15) / 16; + + // nibbleslice the matrix A + for (int i = 0; i < nrows; i++) { + ef_pack_m_vec(A + i * ncols, packed_A + i * row_len, ncols); + } + + // pivot row is secret, pivot col is not + + unsigned char inverse; + int pivot_row = 0; + for (int pivot_col = 0; pivot_col < ncols; pivot_col++) { + + int pivot_row_lower_bound = MAYO_MAX(0, pivot_col + nrows - ncols); + int pivot_row_upper_bound = MAYO_MIN(nrows - 1, pivot_col); + // the pivot row is guaranteed to be between these lower and upper bounds if + // A has full rank + + // zero out pivot row + for (int i = 0; i < row_len; i++) { + _pivot_row[i] = 0; + _pivot_row2[i] = 0; + } + + // try to get a pivot row in constant time + unsigned char pivot = 0; + uint64_t pivot_is_zero = -1; + for (int row = pivot_row_lower_bound; + row <= MAYO_MIN(nrows - 1, pivot_row_upper_bound + 32); row++) { + + uint64_t is_pivot_row = ~ct_compare_64(row, pivot_row); + uint64_t below_pivot_row = ct_64_is_greater_than(row, pivot_row); + + for (int j = 0; j < row_len; j++) { + _pivot_row[j] ^= (is_pivot_row | (below_pivot_row & pivot_is_zero)) & + packed_A[row * row_len + j]; + } + pivot = m_extract_element(_pivot_row, pivot_col); + pivot_is_zero = ~ct_compare_64((int) pivot, 0); + } + + // multiply pivot row by inverse of pivot + inverse = inverse_f(pivot); + vec_mul_add_u64(row_len, _pivot_row, inverse, _pivot_row2); + + // conditionally write pivot row to the correct row, if there is a nonzero + // pivot + for (int row = pivot_row_lower_bound; row <= pivot_row_upper_bound; row++) { + uint64_t do_copy = ~ct_compare_64(row, pivot_row) & ~pivot_is_zero; + uint64_t do_not_copy = ~do_copy; + for (int col = 0; col < row_len; col++) { + packed_A[row * row_len + col] = + (do_not_copy & packed_A[row * row_len + col]) + + (do_copy & _pivot_row2[col]); + } + } + + // eliminate entries below pivot + for (int row = pivot_row_lower_bound; row < nrows; row++) { + unsigned char below_pivot = (row > pivot_row); + unsigned char elt_to_elim = m_extract_element(packed_A + row * row_len, pivot_col); + + vec_mul_add_u64(row_len, _pivot_row2, below_pivot * elt_to_elim, + packed_A + row * row_len); + } + + pivot_row += (-(int64_t)(~pivot_is_zero)); + } + + unsigned char temp[(O_MAX * K_MAX + 1 + 15)]; + + // unbitslice the matrix A + for (int i = 0; i < nrows; i++) { + ef_unpack_m_vec(row_len, packed_A + i * row_len, temp); + for (int j = 0; j < ncols; j++) { + A[i * ncols + j] = temp[j]; + } + } + + mayo_secure_clear(temp, K_MAX * O_MAX + 1 + 15); + mayo_secure_clear(_pivot_row, (K_MAX * O_MAX + 1 + 15) / 16 * 8); + mayo_secure_clear(_pivot_row2, (K_MAX * O_MAX + 1 + 15) / 16 * 8); + mayo_secure_clear(packed_A, ((K_MAX * O_MAX + 1 + 15) / 16) * M_MAX * 8); +} + +#endif + diff --git a/src/mayo.c b/src/mayo.c index 84dd93f..ce1ccd4 100644 --- a/src/mayo.c +++ b/src/mayo.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -40,7 +40,7 @@ static void encode(const unsigned char *m, unsigned char *menc, int mlen) { } } -static void compute_rhs(const mayo_params_t *p, const uint64_t *_vPv, unsigned char *t, unsigned char *y){ +static void compute_rhs(const mayo_params_t *p, const uint64_t *_vPv, const unsigned char *t, unsigned char *y){ #ifndef ENABLE_PARAMS_DYNAMIC (void) p; #endif @@ -62,10 +62,18 @@ static void compute_rhs(const mayo_params_t *p, const uint64_t *_vPv, unsigned c // reduce mod f(X) for (int jj = 0; jj < F_TAIL_LEN; jj++) { if(jj%2 == 0){ +#ifdef TARGET_BIG_ENDIAN + temp_bytes[(((jj/2 + 8) / 8) * 8) - 1 - (jj/2)%8] ^= mul_f(top, PARAM_f_tail(p)[jj]); +#else temp_bytes[jj/2] ^= mul_f(top, PARAM_f_tail(p)[jj]); +#endif } else { +#ifdef TARGET_BIG_ENDIAN + temp_bytes[(((jj/2 + 8) / 8) * 8) - 1 - (jj/2)%8] ^= mul_f(top, PARAM_f_tail(p)[jj]) << 4; +#else temp_bytes[jj/2] ^= mul_f(top, PARAM_f_tail(p)[jj]) << 4; +#endif } } @@ -80,8 +88,14 @@ static void compute_rhs(const mayo_params_t *p, const uint64_t *_vPv, unsigned c // add to y for (int i = 0; i < PARAM_m(p); i+=2) { +#ifdef TARGET_BIG_ENDIAN + y[i] = t[i] ^ (temp_bytes[(((i/2 + 8) / 8) * 8) - 1 - (i/2)%8] & 0xF); + y[i+1] = t[i+1] ^ (temp_bytes[(((i/2 + 8) / 8) * 8) - 1 - (i/2)%8] >> 4); +#else y[i] = t[i] ^ (temp_bytes[i/2] & 0xF); y[i+1] = t[i+1] ^ (temp_bytes[i/2] >> 4); +#endif + } } @@ -208,6 +222,11 @@ static void compute_A(const mayo_params_t *p, const uint64_t *_VtL, unsigned cha } } +#ifdef TARGET_BIG_ENDIAN + for (int i = 0; i < (((PARAM_o(p)*PARAM_k(p)+15)/16)*16)*MAYO_M_OVER_8; ++i) + A[i] = BSWAP64(A[i]); +#endif + for (int r = 0; r < PARAM_m(p); r+=16) { for (int c = 0; c < PARAM_A_cols(p)-1 ; c+=16) @@ -234,9 +253,9 @@ int mayo_keypair(const mayo_params_t *p, unsigned char *pk, unsigned char *sk) { return ret; } -int mayo_sign(const mayo_params_t *p, unsigned char *sm, - unsigned long long *smlen, const unsigned char *m, - unsigned long long mlen, const unsigned char *csk) { +int mayo_sign_signature(const mayo_params_t *p, unsigned char *sig, + size_t *siglen, const unsigned char *m, + size_t mlen, const unsigned char *csk) { int ret = MAYO_OK; unsigned char tenc[M_BYTES_MAX], t[M_MAX]; // no secret data unsigned char y[M_MAX]; // secret data @@ -264,6 +283,9 @@ int mayo_sign(const mayo_params_t *p, unsigned char *sm, const int param_v_bytes = PARAM_v_bytes(p); const int param_r_bytes = PARAM_r_bytes(p); const int param_P1_bytes = PARAM_P1_bytes(p); +#ifdef TARGET_BIG_ENDIAN + const int param_P2_bytes = PARAM_P2_bytes(p); +#endif const int param_sig_bytes = PARAM_sig_bytes(p); const int param_A_cols = PARAM_A_cols(p); const int param_digest_bytes = PARAM_digest_bytes(p); @@ -286,22 +308,22 @@ int mayo_sign(const mayo_params_t *p, unsigned char *sm, alignas (32) uint64_t Mtmp[K_MAX * O_MAX * M_MAX / 16] = {0}; #ifdef TARGET_BIG_ENDIAN - for (int i = 0; i < param_P1_bytes / 4; ++i) { - P1[i] = BSWAP32(P1[i]); + for (int i = 0; i < param_P1_bytes / 8; ++i) { + P1[i] = BSWAP64(P1[i]); } - for (int i = 0; i < param_P2_bytes / 4; ++i) { - L[i] = BSWAP32(L[i]); + for (int i = 0; i < param_P2_bytes / 8; ++i) { + L[i] = BSWAP64(L[i]); } #endif // choose the randomizer - #ifndef PQM4 + #if defined(PQM4) || defined(HAVE_RANDOMBYTES_NORETVAL) + randombytes(tmp + param_digest_bytes, param_salt_bytes); + #else if (randombytes(tmp + param_digest_bytes, param_salt_bytes) != MAYO_OK) { ret = MAYO_ERR; goto err; } - #else - randombytes(tmp + param_digest_bytes, param_salt_bytes); #endif // hashing to salt @@ -360,11 +382,9 @@ int mayo_sign(const mayo_params_t *p, unsigned char *sm, mat_add(vi, Ox, s + i * param_n, param_n - param_o, 1); memcpy(s + i * param_n + (param_n - param_o), x + i * param_o, param_o); } - encode(s, sm, param_n * param_k); - memcpy(sm + param_sig_bytes - param_salt_bytes, salt, param_salt_bytes); - memmove(sm + param_sig_bytes, m, - mlen); // assert: smlen == param_k * param_n + mlen - *smlen = param_sig_bytes + mlen; + encode(s, sig, param_n * param_k); + memcpy(sig + param_sig_bytes - param_salt_bytes, salt, param_salt_bytes); + *siglen = param_sig_bytes; err: mayo_secure_clear(V, K_MAX * V_BYTES_MAX + R_BYTES_MAX); mayo_secure_clear(Vdec, N_MINUS_O_MAX * K_MAX); @@ -378,11 +398,27 @@ int mayo_sign(const mayo_params_t *p, unsigned char *sm, return ret; } +int mayo_sign(const mayo_params_t *p, unsigned char *sm, + size_t *smlen, const unsigned char *m, + size_t mlen, const unsigned char *csk) { + int ret = MAYO_OK; + const int param_sig_bytes = PARAM_sig_bytes(p); + size_t siglen = param_sig_bytes; + ret = mayo_sign_signature(p, sm, &siglen, m, mlen, csk); + if (ret != MAYO_OK || siglen != (size_t) param_sig_bytes) + goto err; + + memmove(sm + param_sig_bytes, m, mlen); + *smlen = siglen + mlen; +err: + return ret; +} + int mayo_open(const mayo_params_t *p, unsigned char *m, - unsigned long long *mlen, const unsigned char *sm, - unsigned long long smlen, const unsigned char *pk) { + size_t *mlen, const unsigned char *sm, + size_t smlen, const unsigned char *pk) { const int param_sig_bytes = PARAM_sig_bytes(p); - if (smlen < (unsigned long long)param_sig_bytes) { + if (smlen < (size_t)param_sig_bytes) { return MAYO_ERR; } int result = mayo_verify(p, sm + param_sig_bytes, smlen - param_sig_bytes, sm, @@ -418,13 +454,13 @@ int mayo_keypair_compact(const mayo_params_t *p, unsigned char *cpk, const int param_sk_seed_bytes = PARAM_sk_seed_bytes(p); // seed_sk $←- B^(sk_seed bytes) - #ifndef PQM4 + #if defined(PQM4) || defined(HAVE_RANDOMBYTES_NORETVAL) + randombytes(seed_sk, param_sk_seed_bytes); + #else if (randombytes(seed_sk, param_sk_seed_bytes) != MAYO_OK) { ret = MAYO_ERR; goto err; } - #else - randombytes(seed_sk, param_sk_seed_bytes); #endif // S ← shake256(seedsk, pk seed bytes + O bytes) @@ -444,6 +480,7 @@ int mayo_keypair_compact(const mayo_params_t *p, unsigned char *cpk, PK_PRF((unsigned char *)P, param_P1_bytes + param_P2_bytes, seed_pk, param_pk_seed_bytes); + int m_legs = param_m / 32; uint64_t *P1 = P; @@ -462,7 +499,8 @@ int mayo_keypair_compact(const mayo_params_t *p, unsigned char *cpk, memcpy(cpk + param_pk_seed_bytes, P3_upper, param_P3_bytes); -#ifndef PQM4 + +#if !defined(PQM4) && !defined(HAVE_RANDOMBYTES_NORETVAL) err: #endif mayo_secure_clear(O, (N_MINUS_O_MAX)*O_MAX); @@ -519,8 +557,8 @@ int mayo_expand_sk(const mayo_params_t *p, const unsigned char *csk, uint64_t *P2 = P + (param_P1_bytes / 8); #ifdef TARGET_BIG_ENDIAN - for (int i = 0; i < (param_P1_bytes + param_P2_bytes) / 4; ++i) { - P[i] = BSWAP32(P[i]); + for (int i = 0; i < (param_P1_bytes + param_P2_bytes) / 8; ++i) { + P[i] = BSWAP64(P[i]); } #endif @@ -533,8 +571,8 @@ int mayo_expand_sk(const mayo_params_t *p, const unsigned char *csk, memcpy(sk->o, S + param_pk_seed_bytes, param_O_bytes); #ifdef TARGET_BIG_ENDIAN - for (int i = 0; i < (param_P1_bytes + param_P2_bytes) / 4; ++i) { - P[i] = BSWAP32(P[i]); + for (int i = 0; i < (param_P1_bytes + param_P2_bytes) / 8; ++i) { + P[i] = BSWAP64(P[i]); } #endif @@ -544,7 +582,7 @@ int mayo_expand_sk(const mayo_params_t *p, const unsigned char *csk, } int mayo_verify(const mayo_params_t *p, const unsigned char *m, - unsigned long long mlen, const unsigned char *sig, + size_t mlen, const unsigned char *sig, const unsigned char *cpk) { unsigned char tEnc[M_BYTES_MAX]; unsigned char t[M_MAX]; @@ -578,14 +616,14 @@ int mayo_verify(const mayo_params_t *p, const unsigned char *m, uint64_t *P3 = P2 + (param_P2_bytes / 8); #ifdef TARGET_BIG_ENDIAN - for (int i = 0; i < param_P1_bytes / 4; ++i) { - P1[i] = BSWAP32(P1[i]); + for (int i = 0; i < param_P1_bytes / 8; ++i) { + P1[i] = BSWAP64(P1[i]); } - for (int i = 0; i < param_P2_bytes / 4; ++i) { - P2[i] = BSWAP32(P2[i]); + for (int i = 0; i < param_P2_bytes / 8; ++i) { + P2[i] = BSWAP64(P2[i]); } - for (int i = 0; i < param_P3_bytes / 4; ++i) { - P3[i] = BSWAP32(P3[i]); + for (int i = 0; i < param_P3_bytes / 8; ++i) { + P3[i] = BSWAP64(P3[i]); } #endif @@ -615,3 +653,4 @@ int mayo_verify(const mayo_params_t *p, const unsigned char *m, } return MAYO_ERR; // bad signature } + diff --git a/src/mayo_1/api.c b/src/mayo_1/api.c index 5c0021a..b7e2ef8 100644 --- a/src/mayo_1/api.c +++ b/src/mayo_1/api.c @@ -3,46 +3,44 @@ #include #include -int -crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { - return mayo_keypair(&MAYO_1, pk, sk); -} - -#ifndef PQM4 -int -crypto_sign(unsigned char *sm, unsigned long long *smlen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *sk) { - return mayo_sign(&MAYO_1, sm, smlen, m, mlen, sk); -} +#ifdef ENABLE_PARAMS_DYNAMIC +#define MAYO_PARAMS &MAYO_1 +#else +#define MAYO_PARAMS 0 +#endif int -crypto_sign_open(unsigned char *m, unsigned long long *mlen, - const unsigned char *sm, unsigned long long smlen, - const unsigned char *pk) { - return mayo_open(&MAYO_1, m, mlen, sm, smlen, pk); +crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + return mayo_keypair(MAYO_PARAMS, pk, sk); } - -#else int crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, size_t mlen, const unsigned char *sk) { + return mayo_sign(MAYO_PARAMS, sm, smlen, m, mlen, sk); +} - unsigned long long smlen_ll; - int rc = mayo_sign(&MAYO_1, sm, &smlen_ll, m, mlen, sk); - *smlen = smlen_ll; - return rc; +int +crypto_sign_signature(unsigned char *sig, + size_t *siglen, const unsigned char *m, + size_t mlen, const unsigned char *sk) { + return mayo_sign_signature(MAYO_PARAMS, sig, siglen, m, mlen, sk); } int crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk) { - unsigned long long mlen_ll; - int rc = mayo_open(&MAYO_1, m, &mlen_ll, sm, smlen, pk); - *mlen = mlen_ll; - return rc; + return mayo_open(MAYO_PARAMS, m, mlen, sm, smlen, pk); } -#endif \ No newline at end of file + +int +crypto_sign_verify(const unsigned char *sig, size_t siglen, + const unsigned char *m, size_t mlen, + const unsigned char *pk) { + if (siglen != CRYPTO_BYTES) + return -1; + return mayo_verify(MAYO_PARAMS, m, mlen, sig, pk); +} + diff --git a/src/mayo_1/api.h b/src/mayo_1/api.h index 0a9cfb4..07a2f67 100644 --- a/src/mayo_1/api.h +++ b/src/mayo_1/api.h @@ -3,39 +3,41 @@ #ifndef api_h #define api_h +#include + #define CRYPTO_SECRETKEYBYTES 24 #define CRYPTO_PUBLICKEYBYTES 1168 #define CRYPTO_BYTES 321 #define CRYPTO_ALGNAME "MAYO_1" +#define crypto_sign_keypair MAYO_NAMESPACE(crypto_sign_keypair) int crypto_sign_keypair(unsigned char *pk, unsigned char *sk); - -#ifndef PQM4 -int -crypto_sign(unsigned char *sm, unsigned long long *smlen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *sk); - -int -crypto_sign_open(unsigned char *m, unsigned long long *mlen, - const unsigned char *sm, unsigned long long smlen, - const unsigned char *pk); - -#else -#include - +#define crypto_sign MAYO_NAMESPACE(crypto_sign) int crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, size_t mlen, const unsigned char *sk); +#define crypto_sign_signature MAYO_NAMESPACE(crypto_sign_signature) +int +crypto_sign_signature(unsigned char *sig, + size_t *siglen, const unsigned char *m, + size_t mlen, const unsigned char *sk); + +#define crypto_sign_open MAYO_NAMESPACE(crypto_sign_open) int crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk); -#endif + +#define crypto_sign_verify MAYO_NAMESPACE(crypto_sign_verify) +int +crypto_sign_verify(const unsigned char *sig, size_t siglen, + const unsigned char *m, size_t mlen, + const unsigned char *pk); #endif /* api_h */ + diff --git a/src/mayo_2/api.c b/src/mayo_2/api.c index 5a1ab5b..a7cf85e 100644 --- a/src/mayo_2/api.c +++ b/src/mayo_2/api.c @@ -3,45 +3,44 @@ #include #include -int -crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { - return mayo_keypair(&MAYO_2, pk, sk); -} - -#ifndef PQM4 -int -crypto_sign(unsigned char *sm, unsigned long long *smlen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *sk) { - return mayo_sign(&MAYO_2, sm, smlen, m, mlen, sk); -} +#ifdef ENABLE_PARAMS_DYNAMIC +#define MAYO_PARAMS &MAYO_2 +#else +#define MAYO_PARAMS 0 +#endif int -crypto_sign_open(unsigned char *m, unsigned long long *mlen, - const unsigned char *sm, unsigned long long smlen, - const unsigned char *pk) { - return mayo_open(&MAYO_2, m, mlen, sm, smlen, pk); +crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + return mayo_keypair(MAYO_PARAMS, pk, sk); } -#else int crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, size_t mlen, const unsigned char *sk) { + return mayo_sign(MAYO_PARAMS, sm, smlen, m, mlen, sk); +} - unsigned long long smlen_ll; - int rc = mayo_sign(&MAYO_2, sm, &smlen_ll, m, mlen, sk); - *smlen = smlen_ll; - return rc; +int +crypto_sign_signature(unsigned char *sig, + size_t *siglen, const unsigned char *m, + size_t mlen, const unsigned char *sk) { + return mayo_sign_signature(MAYO_PARAMS, sig, siglen, m, mlen, sk); } int crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk) { - unsigned long long mlen_ll; - int rc = mayo_open(&MAYO_2, m, &mlen_ll, sm, smlen, pk); - *mlen = mlen_ll; - return rc; + return mayo_open(MAYO_PARAMS, m, mlen, sm, smlen, pk); } -#endif \ No newline at end of file + +int +crypto_sign_verify(const unsigned char *sig, size_t siglen, + const unsigned char *m, size_t mlen, + const unsigned char *pk) { + if (siglen != CRYPTO_BYTES) + return -1; + return mayo_verify(MAYO_PARAMS, m, mlen, sig, pk); +} + diff --git a/src/mayo_2/api.h b/src/mayo_2/api.h index df291eb..7c38c1a 100644 --- a/src/mayo_2/api.h +++ b/src/mayo_2/api.h @@ -3,39 +3,41 @@ #ifndef api_h #define api_h +#include + #define CRYPTO_SECRETKEYBYTES 24 #define CRYPTO_PUBLICKEYBYTES 5488 #define CRYPTO_BYTES 180 #define CRYPTO_ALGNAME "MAYO_2" +#define crypto_sign_keypair MAYO_NAMESPACE(crypto_sign_keypair) int crypto_sign_keypair(unsigned char *pk, unsigned char *sk); - -#ifndef PQM4 -int -crypto_sign(unsigned char *sm, unsigned long long *smlen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *sk); - -int -crypto_sign_open(unsigned char *m, unsigned long long *mlen, - const unsigned char *sm, unsigned long long smlen, - const unsigned char *pk); -#else -#include - +#define crypto_sign MAYO_NAMESPACE(crypto_sign) int crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, size_t mlen, const unsigned char *sk); +#define crypto_sign_signature MAYO_NAMESPACE(crypto_sign_signature) +int +crypto_sign_signature(unsigned char *sig, + size_t *siglen, const unsigned char *m, + size_t mlen, const unsigned char *sk); + +#define crypto_sign_open MAYO_NAMESPACE(crypto_sign_open) int crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk); -#endif +#define crypto_sign_verify MAYO_NAMESPACE(crypto_sign_verify) +int +crypto_sign_verify(const unsigned char *sig, size_t siglen, + const unsigned char *m, size_t mlen, + const unsigned char *pk); #endif /* api_h */ + diff --git a/src/mayo_3/api.c b/src/mayo_3/api.c index 242b060..5c42eab 100644 --- a/src/mayo_3/api.c +++ b/src/mayo_3/api.c @@ -3,44 +3,44 @@ #include #include -int -crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { - return mayo_keypair(&MAYO_3, pk, sk); -} +#ifdef ENABLE_PARAMS_DYNAMIC +#define MAYO_PARAMS &MAYO_3 +#else +#define MAYO_PARAMS 0 +#endif -#ifndef PQM4 int -crypto_sign(unsigned char *sm, unsigned long long *smlen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *sk) { - return mayo_sign(&MAYO_3, sm, smlen, m, mlen, sk); +crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + return mayo_keypair(MAYO_PARAMS, pk, sk); } -int -crypto_sign_open(unsigned char *m, unsigned long long *mlen, - const unsigned char *sm, unsigned long long smlen, - const unsigned char *pk) { - return mayo_open(&MAYO_3, m, mlen, sm, smlen, pk); -} -#else int crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, size_t mlen, const unsigned char *sk) { + return mayo_sign(MAYO_PARAMS, sm, smlen, m, mlen, sk); +} - unsigned long long smlen_ll; - int rc = mayo_sign(&MAYO_3, sm, &smlen_ll, m, mlen, sk); - *smlen = smlen_ll; - return rc; +int +crypto_sign_signature(unsigned char *sig, + size_t *siglen, const unsigned char *m, + size_t mlen, const unsigned char *sk) { + return mayo_sign_signature(MAYO_PARAMS, sig, siglen, m, mlen, sk); } int crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk) { - unsigned long long mlen_ll; - int rc = mayo_open(&MAYO_3, m, &mlen_ll, sm, smlen, pk); - *mlen = mlen_ll; - return rc; + return mayo_open(MAYO_PARAMS, m, mlen, sm, smlen, pk); } -#endif \ No newline at end of file + +int +crypto_sign_verify(const unsigned char *sig, size_t siglen, + const unsigned char *m, size_t mlen, + const unsigned char *pk) { + if (siglen != CRYPTO_BYTES) + return -1; + return mayo_verify(MAYO_PARAMS, m, mlen, sig, pk); +} + diff --git a/src/mayo_3/api.h b/src/mayo_3/api.h index d9c174c..47899c1 100644 --- a/src/mayo_3/api.h +++ b/src/mayo_3/api.h @@ -3,36 +3,41 @@ #ifndef api_h #define api_h +#include + #define CRYPTO_SECRETKEYBYTES 32 #define CRYPTO_PUBLICKEYBYTES 2656 #define CRYPTO_BYTES 577 #define CRYPTO_ALGNAME "MAYO_3" +#define crypto_sign_keypair MAYO_NAMESPACE(crypto_sign_keypair) int crypto_sign_keypair(unsigned char *pk, unsigned char *sk); -#ifndef PQM4 -int -crypto_sign(unsigned char *sm, unsigned long long *smlen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *sk); - -int -crypto_sign_open(unsigned char *m, unsigned long long *mlen, - const unsigned char *sm, unsigned long long smlen, - const unsigned char *pk); -#else -#include - +#define crypto_sign MAYO_NAMESPACE(crypto_sign) int crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, size_t mlen, const unsigned char *sk); +#define crypto_sign_signature MAYO_NAMESPACE(crypto_sign_signature) +int +crypto_sign_signature(unsigned char *sig, + size_t *siglen, const unsigned char *m, + size_t mlen, const unsigned char *sk); + +#define crypto_sign_open MAYO_NAMESPACE(crypto_sign_open) int crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk); -#endif + +#define crypto_sign_verify MAYO_NAMESPACE(crypto_sign_verify) +int +crypto_sign_verify(const unsigned char *sig, size_t siglen, + const unsigned char *m, size_t mlen, + const unsigned char *pk); + #endif /* api_h */ + diff --git a/src/mayo_5/api.c b/src/mayo_5/api.c index 24c674e..f2e861e 100644 --- a/src/mayo_5/api.c +++ b/src/mayo_5/api.c @@ -3,44 +3,44 @@ #include #include -int -crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { - return mayo_keypair(&MAYO_5, pk, sk); -} +#ifdef ENABLE_PARAMS_DYNAMIC +#define MAYO_PARAMS &MAYO_5 +#else +#define MAYO_PARAMS 0 +#endif -#ifndef PQM4 int -crypto_sign(unsigned char *sm, unsigned long long *smlen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *sk) { - return mayo_sign(&MAYO_5, sm, smlen, m, mlen, sk); +crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + return mayo_keypair(MAYO_PARAMS, pk, sk); } -int -crypto_sign_open(unsigned char *m, unsigned long long *mlen, - const unsigned char *sm, unsigned long long smlen, - const unsigned char *pk) { - return mayo_open(&MAYO_5, m, mlen, sm, smlen, pk); -} -#else int crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, size_t mlen, const unsigned char *sk) { + return mayo_sign(MAYO_PARAMS, sm, smlen, m, mlen, sk); +} - unsigned long long smlen_ll; - int rc = mayo_sign(&MAYO_5, sm, &smlen_ll, m, mlen, sk); - *smlen = smlen_ll; - return rc; +int +crypto_sign_signature(unsigned char *sig, + size_t *siglen, const unsigned char *m, + size_t mlen, const unsigned char *sk) { + return mayo_sign_signature(MAYO_PARAMS, sig, siglen, m, mlen, sk); } int crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk) { - unsigned long long mlen_ll; - int rc = mayo_open(&MAYO_5, m, &mlen_ll, sm, smlen, pk); - *mlen = mlen_ll; - return rc; + return mayo_open(MAYO_PARAMS, m, mlen, sm, smlen, pk); } -#endif \ No newline at end of file + +int +crypto_sign_verify(const unsigned char *sig, size_t siglen, + const unsigned char *m, size_t mlen, + const unsigned char *pk) { + if (siglen != CRYPTO_BYTES) + return -1; + return mayo_verify(MAYO_PARAMS, m, mlen, sig, pk); +} + diff --git a/src/mayo_5/api.h b/src/mayo_5/api.h index 41a819b..fa1ebbb 100644 --- a/src/mayo_5/api.h +++ b/src/mayo_5/api.h @@ -3,38 +3,41 @@ #ifndef api_h #define api_h +#include + #define CRYPTO_SECRETKEYBYTES 40 #define CRYPTO_PUBLICKEYBYTES 5008 #define CRYPTO_BYTES 838 #define CRYPTO_ALGNAME "MAYO_5" +#define crypto_sign_keypair MAYO_NAMESPACE(crypto_sign_keypair) int crypto_sign_keypair(unsigned char *pk, unsigned char *sk); - -#ifndef PQM4 -int -crypto_sign(unsigned char *sm, unsigned long long *smlen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *sk); - -int -crypto_sign_open(unsigned char *m, unsigned long long *mlen, - const unsigned char *sm, unsigned long long smlen, - const unsigned char *pk); -#else -#include - +#define crypto_sign MAYO_NAMESPACE(crypto_sign) int crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, size_t mlen, const unsigned char *sk); +#define crypto_sign_signature MAYO_NAMESPACE(crypto_sign_signature) +int +crypto_sign_signature(unsigned char *sig, + size_t *siglen, const unsigned char *m, + size_t mlen, const unsigned char *sk); + +#define crypto_sign_open MAYO_NAMESPACE(crypto_sign_open) int crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk); -#endif + +#define crypto_sign_verify MAYO_NAMESPACE(crypto_sign_verify) +int +crypto_sign_verify(const unsigned char *sig, size_t siglen, + const unsigned char *m, size_t mlen, + const unsigned char *pk); #endif /* api_h */ + diff --git a/src/params.c b/src/params.c index 3dd4941..043d4ce 100644 --- a/src/params.c +++ b/src/params.c @@ -2,6 +2,7 @@ #include +#ifdef ENABLE_PARAMS_DYNAMIC static const unsigned char f_tail_64[] = F_TAIL_64; static const unsigned char f_tail_96[] = F_TAIL_96; static const unsigned char f_tail_128[] = F_TAIL_128; @@ -37,3 +38,5 @@ MAYO_GEN_PARAMS(MAYO_1); MAYO_GEN_PARAMS(MAYO_2); MAYO_GEN_PARAMS(MAYO_3); MAYO_GEN_PARAMS(MAYO_5); +#endif + diff --git a/src/simple_arithmetic.h b/src/simple_arithmetic.h index 0c2c4e7..ce7580f 100644 --- a/src/simple_arithmetic.h +++ b/src/simple_arithmetic.h @@ -144,3 +144,4 @@ static inline void m_vec_mul_add_x(int m_legs, const uint64_t *in, uint64_t *acc } #endif + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0d67e1e..f3be92b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -43,4 +43,5 @@ else() target_link_libraries(mayo_test_sample_solution_MAYO_1 mayo_1_test_nistapi) target_include_directories(mayo_test_sample_solution_MAYO_1 PRIVATE ../include ../src/mayo_1) add_test(MAYO_1_SAMPLE_SOLUTION mayo_test_sample_solution_MAYO_1) -endif() \ No newline at end of file +endif() + diff --git a/test/bench.c b/test/bench.c index ba81bf7..a33b776 100644 --- a/test/bench.c +++ b/test/bench.c @@ -45,7 +45,7 @@ int main(int argc, char *argv[]) { goto end; } int runs = atoi(argv[1]); - rc = bench_sig(&MAYO_VARIANT, runs, 0); + rc = bench_sig(0, runs, 0); #endif @@ -96,18 +96,18 @@ static int bench_sig(const mayo_params_t *p, int runs, int csv) { const int m_len = 32; - unsigned char *pk = calloc(p->cpk_bytes, 1); - unsigned char *epk = calloc(p->epk_bytes, 1); - unsigned char *sk = calloc(p->csk_bytes, 1); + unsigned char *pk = calloc(PARAM_cpk_bytes(p), 1); + unsigned char *epk = calloc(PARAM_epk_bytes(p), 1); + unsigned char *sk = calloc(PARAM_csk_bytes(p), 1); sk_t *esk = calloc(sizeof(sk_t), 1); - unsigned char *sig = calloc(p->sig_bytes + m_len, 1); + unsigned char *sig = calloc(PARAM_sig_bytes(p) + m_len, 1); unsigned char *m = calloc(m_len, 1); - unsigned long long len = p->sig_bytes; + size_t len = PARAM_sig_bytes(p); if (csv) { - printf("%s,", p->name); + printf("%s,", PARAM_name(p)); } else { - printf("Benchmarking %s\n", p->name); + printf("Benchmarking %s\n", PARAM_name(p)); } BENCH_CODE_1(runs); @@ -128,7 +128,7 @@ static int bench_sig(const mayo_params_t *p, int runs, int csv) { len = 32; BENCH_CODE_1(runs); - mayo_open(p, m, &len, sig, p->sig_bytes, pk); + mayo_open(p, m, &len, sig, PARAM_sig_bytes(p), pk); BENCH_CODE_2("mayo_verify", csv); if (csv) { @@ -160,3 +160,4 @@ static inline int64_t cpucycles(void) { return (int64_t)(time.tv_sec * 1e9 + time.tv_nsec); #endif } + diff --git a/test/bench_mayo_table3.c b/test/bench_mayo_table3.c index 6ff64dd..c340328 100644 --- a/test/bench_mayo_table3.c +++ b/test/bench_mayo_table3.c @@ -46,7 +46,7 @@ int main(int argc, char *argv[]) { goto end; } int runs = atoi(argv[1]); - rc = bench_sig(&MAYO_VARIANT, runs, 0); + rc = bench_sig(0, runs, 0); #endif @@ -87,8 +87,11 @@ int cmpfunc (const void *a, const void *b) { #define LIST_SIZE 10000 +#define Ot_times_P1O_P2 MAYO_NAMESPACE(Ot_times_P1O_P2) extern void Ot_times_P1O_P2(const mayo_params_t* p, const uint64_t* P1, const unsigned char* O, uint64_t* P1O_P2, uint64_t* P3); +#define V_times_L__V_times_P1_times_Vt MAYO_NAMESPACE(V_times_L__V_times_P1_times_Vt) extern void V_times_L__V_times_P1_times_Vt(const mayo_params_t* p, const uint64_t* L, const unsigned char* V, uint64_t* M, const uint64_t* P1, uint64_t* Y); +#define m_calculate_PS_SPS MAYO_NAMESPACE(m_calculate_PS_SPS) extern void m_calculate_PS_SPS(const uint64_t *P1, const uint64_t *P2, const uint64_t *P3, const unsigned char *S, const int m, const int v, const int o, const int k, uint64_t *SPS); @@ -115,9 +118,9 @@ static int bench_sig(const mayo_params_t *p, int runs, int csv) { if (csv) { - printf("%s,", p->name); + printf("%s,", PARAM_name(p)); } else { - printf("Benchmarking %s\n", p->name); + printf("Benchmarking %s\n", PARAM_name(p)); } BENCH_CODE_1(runs); diff --git a/test/test_kat.c b/test/test_kat.c index 21ae2d2..28596cc 100644 --- a/test/test_kat.c +++ b/test/test_kat.c @@ -12,7 +12,7 @@ You are solely responsible for determining the appropriateness of using and dist #include #include #include -#include +#include #include #define MAX_MARKER_LEN 50 @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) { } end: #else - rc = test_sig_kat(&MAYO_VARIANT); + rc = test_sig_kat(0); #endif return rc; } @@ -59,17 +59,17 @@ int main(int argc, char *argv[]) { static int test_sig_kat(const mayo_params_t *p) { unsigned char seed[48]; unsigned char *m, *sm, *m1, *sm_rsp; - unsigned long long mlen, smlen, mlen1; + size_t mlen, smlen, mlen1; int count; int done; - unsigned char pk[p->cpk_bytes], sk[p->csk_bytes]; + unsigned char pk[PARAM_cpk_bytes(p)], sk[PARAM_csk_bytes(p)]; int ret_val; char fn_rsp[64]; FILE *fp_rsp; - unsigned char pk_rsp[p->cpk_bytes], sk_rsp[p->csk_bytes]; + unsigned char pk_rsp[PARAM_cpk_bytes(p)], sk_rsp[PARAM_csk_bytes(p)]; - sprintf(fn_rsp, "../../KAT/PQCsignKAT_%d_%s.rsp", p->csk_bytes, p->name); + sprintf(fn_rsp, "../../KAT/PQCsignKAT_%d_%s.rsp", PARAM_csk_bytes(p), PARAM_name(p)); if ( (fp_rsp = fopen(fn_rsp, "r")) == NULL ) { printf("Couldn't open <%s> for read\n", fn_rsp); return KAT_FILE_OPEN_ERROR; @@ -92,7 +92,7 @@ static int test_sig_kat(const mayo_params_t *p) { randombytes_init(seed, NULL, 256); if ( FindMarker(fp_rsp, "mlen = ") ) { - ret_val = fscanf(fp_rsp, "%lld", &mlen); + ret_val = fscanf(fp_rsp, "%zu", &mlen); } else { printf("ERROR: unable to read 'mlen' from <%s>\n", fn_rsp); return KAT_DATA_ERROR; @@ -100,8 +100,8 @@ static int test_sig_kat(const mayo_params_t *p) { m = (unsigned char *)calloc(mlen, sizeof(unsigned char)); m1 = (unsigned char *)calloc(mlen, sizeof(unsigned char)); - sm = (unsigned char *)calloc(mlen + p->sig_bytes, sizeof(unsigned char)); - sm_rsp = (unsigned char *)calloc(mlen + p->sig_bytes, sizeof(unsigned char)); + sm = (unsigned char *)calloc(mlen + PARAM_sig_bytes(p), sizeof(unsigned char)); + sm_rsp = (unsigned char *)calloc(mlen + PARAM_sig_bytes(p), sizeof(unsigned char)); if ( !ReadHex(fp_rsp, m, (int)mlen, "msg = ") ) { printf("ERROR: unable to read 'msg' from <%s>\n", fn_rsp); @@ -113,20 +113,20 @@ static int test_sig_kat(const mayo_params_t *p) { printf("crypto_sign_keypair returned <%d>\n", ret_val); return KAT_CRYPTO_FAILURE; } - if ( !ReadHex(fp_rsp, pk_rsp, p->cpk_bytes, "pk = ") ) { + if ( !ReadHex(fp_rsp, pk_rsp, PARAM_cpk_bytes(p), "pk = ") ) { printf("ERROR: unable to read 'pk' from <%s>\n", fn_rsp); return KAT_DATA_ERROR; } - if ( !ReadHex(fp_rsp, sk_rsp, p->csk_bytes, "sk = ") ) { + if ( !ReadHex(fp_rsp, sk_rsp, PARAM_csk_bytes(p), "sk = ") ) { printf("ERROR: unable to read 'sk' from <%s>\n", fn_rsp); return KAT_DATA_ERROR; } - if (memcmp(pk, pk_rsp, p->cpk_bytes) != 0) { + if (memcmp(pk, pk_rsp, PARAM_cpk_bytes(p)) != 0) { printf("ERROR: pk is different from <%s>\n", fn_rsp); return KAT_VERIFICATION_ERROR; } - if (memcmp(sk, sk_rsp, p->csk_bytes) != 0) { + if (memcmp(sk, sk_rsp, PARAM_csk_bytes(p)) != 0) { printf("ERROR: sk is different from <%s>\n", fn_rsp); return KAT_VERIFICATION_ERROR; } @@ -153,7 +153,7 @@ static int test_sig_kat(const mayo_params_t *p) { } if ( mlen != mlen1 ) { - printf("crypto_sign_open returned bad 'mlen': Got <%lld>, expected <%lld>\n", mlen1, mlen); + printf("crypto_sign_open returned bad 'mlen': Got <%zu>, expected <%zu>\n", mlen1, mlen); return KAT_CRYPTO_FAILURE; } @@ -269,3 +269,4 @@ ReadHex(FILE *infile, unsigned char *A, int Length, char *str) { return 1; } + diff --git a/test/test_mayo.c b/test/test_mayo.c index a868b73..be2c664 100644 --- a/test/test_mayo.c +++ b/test/test_mayo.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include @@ -47,11 +47,11 @@ static int test_mayo(const mayo_params_t *p) { unsigned char *msg = (unsigned char *) ((uintptr_t)_msg | (uintptr_t)1); unsigned char seed[48] = { 0 }; - unsigned long long msglen = 32; + size_t msglen = 32; randombytes_init(seed, NULL, 256); - printf("Testing Keygen, Sign, Open: %s\n", p->name); + printf("Testing Keygen, Sign, Open: %s\n", PARAM_name(p)); int res = mayo_keypair(p, pk, sk); if (res != MAYO_OK) { @@ -60,10 +60,10 @@ static int test_mayo(const mayo_params_t *p) { } #ifdef ENABLE_CT_TESTING - VALGRIND_MAKE_MEM_DEFINED(pk, p->cpk_bytes); + VALGRIND_MAKE_MEM_DEFINED(pk, PARAM_cpk_bytes(p)); #endif - unsigned long long smlen = p->sig_bytes + 32; + size_t smlen = PARAM_sig_bytes(p) + 32; res = mayo_sign(p, sig, &smlen, msg, 32, sk); if (res != MAYO_OK) { @@ -72,9 +72,9 @@ static int test_mayo(const mayo_params_t *p) { } printf("pk: "); - print_hex(pk, p->cpk_bytes); + print_hex(pk, PARAM_cpk_bytes(p)); printf("sk: "); - print_hex(sk, p->csk_bytes); + print_hex(sk, PARAM_csk_bytes(p)); printf("sm: "); print_hex(sig, smlen); @@ -115,7 +115,7 @@ int main(int argc, char *argv[]) { rc = test_mayo(&MAYO_5); } #else - rc = test_mayo(&MAYO_VARIANT); + rc = test_mayo(0); #endif if (rc != MAYO_OK) { @@ -123,3 +123,4 @@ int main(int argc, char *argv[]) { } return rc; } + diff --git a/test/test_sample_solution.c b/test/test_sample_solution.c index 9d1a075..878f969 100644 --- a/test/test_sample_solution.c +++ b/test/test_sample_solution.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include static int test_sample_solution(void) { @@ -18,8 +18,8 @@ static int test_sample_solution(void) { #error "variant not supported" #endif - unsigned long long msglen = 32; - unsigned long long smlen = CRYPTO_BYTES + msglen; + size_t msglen = 32; + size_t smlen = CRYPTO_BYTES + msglen; unsigned char *pk = calloc(CRYPTO_PUBLICKEYBYTES, 1); unsigned char *sk = calloc(CRYPTO_SECRETKEYBYTES, 1); @@ -62,3 +62,4 @@ static int test_sample_solution(void) { int main(int argc, char *argv[]) { return test_sample_solution(); } +