From 3a6a6025ccf65f333944b328d761480a57b4bc7d Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Sat, 9 Sep 2023 01:04:48 +0200 Subject: [PATCH] Backport aarch64 support for aes256-gcm --- Makefile.in | 1 + builds/Makefile.in | 1 + configure | 427 ++++--- configure.ac | 139 ++- contrib/Makefile.in | 1 + dist-build/Makefile.in | 1 + msvc-scripts/Makefile.in | 1 + src/Makefile.in | 1 + src/libsodium/Makefile.am | 11 +- src/libsodium/Makefile.in | 111 +- .../crypto_aead/aes256gcm/aead_aes256gcm.c | 157 +++ .../aes256gcm/aesni/aead_aes256gcm_aesni.c | 163 +-- .../armcrypto/aead_aes256gcm_armcrypto.c | 1019 +++++++++++++++++ src/libsodium/include/Makefile.in | 1 + src/libsodium/include/sodium/runtime.h | 3 + src/libsodium/sodium/runtime.c | 113 +- test/Makefile.in | 1 + test/default/Makefile.in | 1 + 18 files changed, 1771 insertions(+), 381 deletions(-) create mode 100644 src/libsodium/crypto_aead/aes256gcm/aead_aes256gcm.c create mode 100644 src/libsodium/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c diff --git a/Makefile.in b/Makefile.in index 5d00615084..795fcc717f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -271,6 +271,7 @@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CFLAGS_AESNI = @CFLAGS_AESNI@ +CFLAGS_ARMCRYPTO = @CFLAGS_ARMCRYPTO@ CFLAGS_AVX = @CFLAGS_AVX@ CFLAGS_AVX2 = @CFLAGS_AVX2@ CFLAGS_AVX512F = @CFLAGS_AVX512F@ diff --git a/builds/Makefile.in b/builds/Makefile.in index 86cc8cd783..627dc776ce 100644 --- a/builds/Makefile.in +++ b/builds/Makefile.in @@ -150,6 +150,7 @@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CFLAGS_AESNI = @CFLAGS_AESNI@ +CFLAGS_ARMCRYPTO = @CFLAGS_ARMCRYPTO@ CFLAGS_AVX = @CFLAGS_AVX@ CFLAGS_AVX2 = @CFLAGS_AVX2@ CFLAGS_AVX512F = @CFLAGS_AVX512F@ diff --git a/configure b/configure index 967f88f7e0..7ddc794df1 100755 --- a/configure +++ b/configure @@ -693,6 +693,7 @@ CFLAGS_SSSE3 CFLAGS_SSE3 CFLAGS_SSE2 CFLAGS_MMX +CFLAGS_ARMCRYPTO LIBTOOL_DEPS LT_SYS_LIBRARY_PATH OTOOL64 @@ -1570,7 +1571,7 @@ Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-pthreads use pthreads library, or --without-pthreads to - disable threading support. + disable threading support --with-safecode For maintainers only - please do not use --with-ctgrind For maintainers only - please do not use --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use @@ -8598,82 +8599,6 @@ fi ;; esac -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a broken Xcode version" >&5 -printf %s "checking for a broken Xcode version... " >&6; } -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - -#if !defined(__APPLE_CC__) || __APPLE_CC__ != 6000 -#error Not Apple -#endif -#if !defined(__clang_major__) || __clang_major__ != 11 -#error Not Xcode 11 -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Using unsupported Xcode version" >&5 -printf "%s\n" "$as_me: WARNING: Using unsupported Xcode version" >&2;} - as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags__$CFLAGS -fno-stack-check" | $as_tr_sh` -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $CFLAGS -fno-stack-check" >&5 -printf %s "checking whether C compiler accepts $CFLAGS -fno-stack-check... " >&6; } -if eval test \${$as_CACHEVAR+y} -then : - printf %s "(cached) " >&6 -else $as_nop - - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS $CFLAGS -fno-stack-check" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main (void) -{ -time_t x; int fodder = 0; if (fodder > -1000 && time(&x)) return (int) x - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - eval "$as_CACHEVAR=yes" -else $as_nop - eval "$as_CACHEVAR=no" -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ax_check_save_flags -fi -eval ac_res=\$$as_CACHEVAR - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -printf "%s\n" "$ac_res" >&6; } -if eval test \"x\$"$as_CACHEVAR"\" = x"yes" -then : - CFLAGS="$CFLAGS -fno-stack-check" -else $as_nop - : -fi - - -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } - -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - if test "x$enable_ssp" != "xno" then : @@ -8813,6 +8738,48 @@ else $as_nop : fi +as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags__$CFLAGS -Wno-deprecated-declarations" | $as_tr_sh` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $CFLAGS -Wno-deprecated-declarations" >&5 +printf %s "checking whether C compiler accepts $CFLAGS -Wno-deprecated-declarations... " >&6; } +if eval test \${$as_CACHEVAR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS $CFLAGS -Wno-deprecated-declarations" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +time_t x; int fodder = 0; if (fodder > -1000 && time(&x)) return (int) x + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + eval "$as_CACHEVAR=yes" +else $as_nop + eval "$as_CACHEVAR=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +eval ac_res=\$$as_CACHEVAR + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } +if eval test \"x\$"$as_CACHEVAR"\" = x"yes" +then : + CFLAGS="$CFLAGS -Wno-deprecated-declarations" +else $as_nop + : +fi + as_CACHEVAR=`printf "%s\n" "ax_cv_check_cflags__$CFLAGS -Wno-unknown-pragmas" | $as_tr_sh` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $CFLAGS -Wno-unknown-pragmas" >&5 printf %s "checking whether C compiler accepts $CFLAGS -Wno-unknown-pragmas... " >&6; } @@ -18542,10 +18509,154 @@ fi +target_cpu_aarch64=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ARM64 target" >&5 +printf %s "checking for ARM64 target... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef __aarch64__ +#error Not aarch64 +#endif +#include + +int +main (void) +{ +(void) 0 + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + target_cpu_aarch64=yes +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + target_cpu_aarch64=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "x$EMSCRIPTEN" = "x" then : + if test "x$target_cpu_aarch64" = "xyes" +then : + + have_armcrypto=no + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ARM crypto instructions set" >&5 +printf %s "checking for ARM crypto instructions set... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ + vaeseq_u8(vmovq_n_u8(0), vmovq_n_u8(__ARM_FEATURE_CRYPTO)) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + have_armcrypto=yes + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + oldcflags="$CFLAGS" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -march=armv8-a+crypto+aes" >&5 +printf %s "checking whether C compiler accepts -march=armv8-a+crypto+aes... " >&6; } +if test ${ax_cv_check_cflags___march_armv8_apcryptopaes+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -march=armv8-a+crypto+aes" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +time_t x; int fodder = 0; if (fodder > -1000 && time(&x)) return (int) x + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_cv_check_cflags___march_armv8_apcryptopaes=yes +else $as_nop + ax_cv_check_cflags___march_armv8_apcryptopaes=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___march_armv8_apcryptopaes" >&5 +printf "%s\n" "$ax_cv_check_cflags___march_armv8_apcryptopaes" >&6; } +if test "x$ax_cv_check_cflags___march_armv8_apcryptopaes" = xyes +then : + + CFLAGS="$CFLAGS -march=armv8-a+crypto+aes" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ARM crypto instructions set with -march=armv8-a+crypto+aes" >&5 +printf %s "checking for ARM crypto instructions set with -march=armv8-a+crypto+aes... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ + vaeseq_u8(vmovq_n_u8(0), vmovq_n_u8(__ARM_FEATURE_CRYPTO)) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes - with addition of -march=armv8-a+crypto+aes" >&5 +printf "%s\n" "yes - with addition of -march=armv8-a+crypto+aes" >&6; } + have_armcrypto=yes + CFLAGS_ARMCRYPTO="-march=armv8-a+crypto+aes" + +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS="$oldcflags" + +else $as_nop + : +fi + + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test "$have_armcrypto" = "yes" +then : + +printf "%s\n" "#define HAVE_ARMCRYPTO 1" >>confdefs.h + +fi + +fi + oldcflags="$CFLAGS" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mmmx" >&5 printf %s "checking whether C compiler accepts -mmmx... " >&6; } @@ -18603,7 +18714,7 @@ main (void) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -18654,7 +18765,8 @@ else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext CFLAGS="$oldcflags" oldcflags="$CFLAGS" @@ -18718,7 +18830,7 @@ main (void) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -18769,7 +18881,8 @@ else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext CFLAGS="$oldcflags" oldcflags="$CFLAGS" @@ -18830,7 +18943,7 @@ main (void) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -18881,7 +18994,8 @@ else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext CFLAGS="$oldcflags" oldcflags="$CFLAGS" @@ -18941,7 +19055,7 @@ main (void) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -18992,7 +19106,8 @@ else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext CFLAGS="$oldcflags" oldcflags="$CFLAGS" @@ -19052,7 +19167,7 @@ main (void) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -19103,7 +19218,8 @@ else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext CFLAGS="$oldcflags" oldcflags="$CFLAGS" @@ -19163,7 +19279,7 @@ main (void) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -19214,7 +19330,8 @@ else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext CFLAGS="$oldcflags" oldcflags="$CFLAGS" @@ -19278,7 +19395,7 @@ return _mm256_movemask_ps(_mm256_cmp_ps(x, y, _CMP_NEQ_OQ)); return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -19341,7 +19458,7 @@ main (void) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -19352,13 +19469,15 @@ printf "%s\n" "no" >&6; } printf "%s\n" "#define _mm256_broadcastsi128_si256 _mm_broadcastsi128_si256" >>confdefs.h fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext CFLAGS="$oldcflags" oldcflags="$CFLAGS" @@ -19434,7 +19553,7 @@ __m512i y = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -19528,7 +19647,8 @@ fi fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext CFLAGS="$oldcflags" oldcflags="$CFLAGS" @@ -19630,7 +19750,7 @@ main (void) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -19722,7 +19842,8 @@ else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext CFLAGS="$oldcflags" oldcflags="$CFLAGS" @@ -19782,7 +19903,7 @@ main (void) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -19834,7 +19955,8 @@ else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext CFLAGS="$oldcflags" @@ -19852,6 +19974,7 @@ fi + ac_fn_c_check_header_compile "$LINENO" "sys/mman.h" "ac_cv_header_sys_mman_h" "$ac_includes_default" if test "x$ac_cv_header_sys_mman_h" = xyes then : @@ -19875,6 +19998,19 @@ if test "x$ac_cv_header_intrin_h" = xyes then : printf "%s\n" "#define HAVE_INTRIN_H 1" >>confdefs.h +fi +ac_fn_c_check_header_compile "$LINENO" "sys/auxv.h" "ac_cv_header_sys_auxv_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_auxv_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_AUXV_H 1" >>confdefs.h + +fi + +ac_fn_c_check_header_compile "$LINENO" "CommonCrypto/CommonRandom.h" "ac_cv_header_CommonCrypto_CommonRandom_h" "$ac_includes_default" +if test "x$ac_cv_header_CommonCrypto_CommonRandom_h" = xyes +then : + printf "%s\n" "#define HAVE_COMMONCRYPTO_COMMONRANDOM_H 1" >>confdefs.h + fi ac_fn_c_check_header_compile "$LINENO" "cet.h" "ac_cv_header_cet_h" "$ac_includes_default" @@ -20218,7 +20354,7 @@ main (void) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } @@ -20228,7 +20364,8 @@ printf "%s\n" "yes" >&6; } CPPFLAGS="$CPPFLAGS -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS" fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we can use inline asm code" >&5 printf %s "checking whether we can use inline asm code... " >&6; } @@ -20606,51 +20743,72 @@ fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if data alignment is required" >&5 -printf %s "checking if data alignment is required... " >&6; } -aligned_access_required=yes -case $host_cpu in #( - i?86|amd64|x86_64|powerpc*|s390*) : - aligned_access_required=no ;; #( - arm*) : - cat confdefs.h - <<_ACEOF >conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if atomic operations are supported" >&5 +printf %s "checking if atomic operations are supported... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#ifndef __ARM_FEATURE_UNALIGNED -# error data alignment is required -#endif - int main (void) { +static volatile int _sodium_lock; +__sync_lock_test_and_set(&_sodium_lock, 1); +__sync_lock_release(&_sodium_lock); + + ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - aligned_access_required=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - ;; #( - *) : - ;; -esac -if test "x$aligned_access_required" = "xyes" +if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define HAVE_ATOMIC_OPS 1" >>confdefs.h + else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext -printf "%s\n" "#define CPU_UNALIGNED_ACCESS 1" >>confdefs.h +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if C11 memory fences are supported" >&5 +printf %s "checking if C11 memory fences are supported... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +int +main (void) +{ + +atomic_thread_fence(memory_order_acquire); + + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define HAVE_C11_MEMORY_FENCES 1" >>confdefs.h + +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if atomic operations are supported" >&5 -printf %s "checking if atomic operations are supported... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if gcc memory fences are supported" >&5 +printf %s "checking if gcc memory fences are supported... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -20658,9 +20816,7 @@ int main (void) { -static volatile int _sodium_lock; -__sync_lock_test_and_set(&_sodium_lock, 1); -__sync_lock_release(&_sodium_lock); +__atomic_thread_fence(__ATOMIC_ACQUIRE); ; @@ -20672,7 +20828,7 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } -printf "%s\n" "#define HAVE_ATOMIC_OPS 1" >>confdefs.h +printf "%s\n" "#define HAVE_GCC_MEMORY_FENCES 1" >>confdefs.h else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 @@ -20913,16 +21069,15 @@ printf %s "checking for getrandom with a standard API... " >&6; } #ifdef HAVE_SYS_RANDOM_H # include #endif -#ifdef __SANITIZE_ADDRESS__ -# error A recent libasan version on an old system may intercept nonexistent functions -#endif int main (void) { unsigned char buf; -(void) getrandom((void *) &buf, 1U, 0U); +if (&getrandom != NULL) { + (void) getrandom((void *) &buf, 1U, 0U); +} ; return 0; @@ -20959,16 +21114,12 @@ printf %s "checking for getentropy with a standard API... " >&6; } #ifdef HAVE_SYS_RANDOM_H # include #endif -#ifdef __SANITIZE_ADDRESS__ -# error A recent libasan version on an old system may intercept nonexistent functions -#endif int main (void) { unsigned char buf; - if (&getentropy != NULL) { (void) getentropy((void *) &buf, 1U); } diff --git a/configure.ac b/configure.ac index de285a1a84..9cf4e548aa 100644 --- a/configure.ac +++ b/configure.ac @@ -131,7 +131,7 @@ AM_CONDITIONAL([MINIMAL], [test x$enable_minimal = xyes]) AC_SUBST(SODIUM_LIBRARY_MINIMAL_DEF) AC_ARG_WITH(pthreads, AS_HELP_STRING([--with-pthreads], - [use pthreads library, or --without-pthreads to disable threading support.]), + [use pthreads library, or --without-pthreads to disable threading support]), [ ], [withval="yes"]) AS_IF([test "x$withval" = "xyes"], [ @@ -293,23 +293,6 @@ AS_CASE([$host_os], ]) ]) -AC_MSG_CHECKING(for a broken Xcode version) -AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ -#if !defined(__APPLE_CC__) || __APPLE_CC__ != 6000 -#error Not Apple -#endif -#if !defined(__clang_major__) || __clang_major__ != 11 -#error Not Xcode 11 -#endif -]])], - [AC_MSG_RESULT(yes) - AC_MSG_WARN([Using unsupported Xcode version]) - AX_CHECK_COMPILE_FLAG([$CFLAGS -fno-stack-check], - [CFLAGS="$CFLAGS -fno-stack-check"]) - ], - [AC_MSG_RESULT(no) -]) - AS_IF([test "x$enable_ssp" != "xno"],[ AS_CASE([$host_os], @@ -324,6 +307,7 @@ AS_CASE([$host_os], ]) AX_CHECK_COMPILE_FLAG([$CFLAGS -Wall], [CWFLAGS="$CFLAGS -Wall"]) +AX_CHECK_COMPILE_FLAG([$CFLAGS -Wno-deprecated-declarations], [CFLAGS="$CFLAGS -Wno-deprecated-declarations"]) AX_CHECK_COMPILE_FLAG([$CFLAGS -Wno-unknown-pragmas], [CFLAGS="$CFLAGS -Wno-unknown-pragmas"]) AC_ARG_VAR([CWFLAGS], [compilation flags for generating extra warnings]) @@ -392,14 +376,55 @@ AC_SUBST(LIBTOOL_DEPS) AC_ARG_VAR([AR], [path to the ar utility]) AC_CHECK_TOOL([AR], [ar], [ar]) -dnl Checks for headers +dnl Checks for headers and codegen feature flags + +target_cpu_aarch64=no +AC_MSG_CHECKING(for ARM64 target) +AC_LINK_IFELSE( + [AC_LANG_PROGRAM([ +#ifndef __aarch64__ +#error Not aarch64 +#endif +#include + ], [(void) 0])], + [AC_MSG_RESULT(yes) + target_cpu_aarch64=yes], + [AC_MSG_RESULT(no) + target_cpu_aarch64=no]) AS_IF([test "x$EMSCRIPTEN" = "x"], [ + AS_IF([test "x$target_cpu_aarch64" = "xyes"], [ + have_armcrypto=no + AC_MSG_CHECKING(for ARM crypto instructions set) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], [[ vaeseq_u8(vmovq_n_u8(0), vmovq_n_u8(__ARM_FEATURE_CRYPTO)) ]])], + [ + AC_MSG_RESULT(yes) + have_armcrypto=yes + ], + [ + AC_MSG_RESULT(no) + oldcflags="$CFLAGS" + AX_CHECK_COMPILE_FLAG([-march=armv8-a+crypto+aes], [ + CFLAGS="$CFLAGS -march=armv8-a+crypto+aes" + AC_MSG_CHECKING(for ARM crypto instructions set with -march=armv8-a+crypto+aes) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], [[ vaeseq_u8(vmovq_n_u8(0), vmovq_n_u8(__ARM_FEATURE_CRYPTO)) ]])], + [ + AC_MSG_RESULT(yes - with addition of -march=armv8-a+crypto+aes) + have_armcrypto=yes + CFLAGS_ARMCRYPTO="-march=armv8-a+crypto+aes" + ], + [AC_MSG_RESULT(no)]) + CFLAGS="$oldcflags" + ]) + ]) + AS_IF([test "$have_armcrypto" = "yes"],[AC_DEFINE([HAVE_ARMCRYPTO], [1], [ARM crypto extensions are available])]) + ]) + oldcflags="$CFLAGS" AX_CHECK_COMPILE_FLAG([-mmmx], [CFLAGS="$CFLAGS -mmmx"]) AC_MSG_CHECKING(for MMX instructions set) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #pragma GCC target("mmx") #include ]], [[ __m64 x = _mm_setzero_si64(); ]])], @@ -412,7 +437,7 @@ AS_IF([test "x$EMSCRIPTEN" = "x"], [ oldcflags="$CFLAGS" AX_CHECK_COMPILE_FLAG([-msse2], [CFLAGS="$CFLAGS -msse2"]) AC_MSG_CHECKING(for SSE2 instructions set) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #pragma GCC target("sse2") #ifndef __SSE2__ # define __SSE2__ @@ -429,7 +454,7 @@ AS_IF([test "x$EMSCRIPTEN" = "x"], [ oldcflags="$CFLAGS" AX_CHECK_COMPILE_FLAG([-msse3], [CFLAGS="$CFLAGS -msse3"]) AC_MSG_CHECKING(for SSE3 instructions set) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #pragma GCC target("sse3") #include ]], [[ __m128 x = _mm_addsub_ps(_mm_cvtpd_ps(_mm_setzero_pd()), @@ -443,7 +468,7 @@ AS_IF([test "x$EMSCRIPTEN" = "x"], [ oldcflags="$CFLAGS" AX_CHECK_COMPILE_FLAG([-mssse3], [CFLAGS="$CFLAGS -mssse3"]) AC_MSG_CHECKING(for SSSE3 instructions set) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #pragma GCC target("ssse3") #include ]], [[ __m64 x = _mm_abs_pi32(_m_from_int(0)); ]])], @@ -456,7 +481,7 @@ AS_IF([test "x$EMSCRIPTEN" = "x"], [ oldcflags="$CFLAGS" AX_CHECK_COMPILE_FLAG([-msse4.1], [CFLAGS="$CFLAGS -msse4.1"]) AC_MSG_CHECKING(for SSE4.1 instructions set) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #pragma GCC target("sse4.1") #include ]], [[ __m128i x = _mm_minpos_epu16(_mm_setzero_si128()); ]])], @@ -469,7 +494,7 @@ AS_IF([test "x$EMSCRIPTEN" = "x"], [ oldcflags="$CFLAGS" AX_CHECK_COMPILE_FLAG([-mavx], [CFLAGS="$CFLAGS -mavx"]) AC_MSG_CHECKING(for AVX instructions set) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #pragma GCC target("avx") #include ]], [[ _mm256_zeroall(); ]])], @@ -482,7 +507,7 @@ AS_IF([test "x$EMSCRIPTEN" = "x"], [ oldcflags="$CFLAGS" AX_CHECK_COMPILE_FLAG([-mavx2], [CFLAGS="$CFLAGS -mavx2"]) AC_MSG_CHECKING(for AVX2 instructions set) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #pragma GCC target("avx2") #include ]], [[ @@ -494,7 +519,7 @@ return _mm256_movemask_ps(_mm256_cmp_ps(x, y, _CMP_NEQ_OQ)); AC_DEFINE([HAVE_AVX2INTRIN_H], [1], [AVX2 is available]) AX_CHECK_COMPILE_FLAG([-mavx2], [CFLAGS_AVX2="-mavx2"]) AC_MSG_CHECKING(if _mm256_broadcastsi128_si256 is correctly defined) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #pragma GCC target("avx2") #include ]], [[ __m256i y = _mm256_broadcastsi128_si256(_mm_setzero_si128()); ]])], @@ -509,7 +534,7 @@ return _mm256_movemask_ps(_mm256_cmp_ps(x, y, _CMP_NEQ_OQ)); oldcflags="$CFLAGS" AX_CHECK_COMPILE_FLAG([-mavx512f], [CFLAGS="$CFLAGS -mavx512f"]) AC_MSG_CHECKING(for AVX512F instructions set) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #pragma GCC target("avx512f") #include ]], [[ @@ -542,7 +567,7 @@ __m512i y = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), AX_CHECK_COMPILE_FLAG([-maes], [CFLAGS="$CFLAGS -maes"]) AX_CHECK_COMPILE_FLAG([-mpclmul], [CFLAGS="$CFLAGS -mpclmul"]) AC_MSG_CHECKING(for AESNI instructions set and PCLMULQDQ) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #pragma GCC target("aes") #pragma GCC target("pclmul") #include @@ -559,7 +584,7 @@ __m512i y = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), oldcflags="$CFLAGS" AX_CHECK_COMPILE_FLAG([-mrdrnd], [CFLAGS="$CFLAGS -mrdrnd"]) AC_MSG_CHECKING(for RDRAND) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #pragma GCC target("rdrnd") #include ]], [[ unsigned long long x; _rdrand64_step(&x); ]])], @@ -572,6 +597,7 @@ __m512i y = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), ]) +AC_SUBST(CFLAGS_ARMCRYPTO) AC_SUBST(CFLAGS_MMX) AC_SUBST(CFLAGS_SSE2) AC_SUBST(CFLAGS_SSE3) @@ -584,7 +610,8 @@ AC_SUBST(CFLAGS_AESNI) AC_SUBST(CFLAGS_PCLMUL) AC_SUBST(CFLAGS_RDRAND) -AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/random.h intrin.h]) +AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/random.h intrin.h sys/auxv.h]) +AC_CHECK_HEADERS([CommonCrypto/CommonRandom.h]) AC_CHECK_HEADERS([cet.h]) AC_MSG_CHECKING([if _xgetbv() is available]) @@ -609,7 +636,7 @@ AC_C_BIGENDIAN( ) AC_MSG_CHECKING(whether __STDC_LIMIT_MACROS is required) -AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #include #include ]], [[ @@ -793,23 +820,6 @@ void f(void *x) { __dummy(x); } AC_DEFINE([HAVE_WEAK_SYMBOLS], [1], [weak symbols are supported])], [AC_MSG_RESULT(no)]) -AC_MSG_CHECKING(if data alignment is required) -aligned_access_required=yes -AS_CASE([$host_cpu], - [i?86|amd64|x86_64|powerpc*|s390*], - [aligned_access_required=no], - [arm*], - [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ -#ifndef __ARM_FEATURE_UNALIGNED -# error data alignment is required -#endif - ]], [[]])], [aligned_access_required=no], [])] -) -AS_IF([test "x$aligned_access_required" = "xyes"], - [AC_MSG_RESULT(yes)], - [AC_MSG_RESULT(no) - AC_DEFINE([CPU_UNALIGNED_ACCESS], [1], [unaligned memory access is supported])]) - AC_MSG_CHECKING(if atomic operations are supported) AC_LINK_IFELSE([AC_LANG_PROGRAM([[ ]], [[ static volatile int _sodium_lock; @@ -821,6 +831,26 @@ __sync_lock_release(&_sodium_lock); AC_DEFINE([HAVE_ATOMIC_OPS], [1], [atomic operations are supported])], [AC_MSG_RESULT(no)]) +AC_MSG_CHECKING(if C11 memory fences are supported) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[ +#include + ]], [[ +atomic_thread_fence(memory_order_acquire); +]] +)], +[AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_C11_MEMORY_FENCES], [1], [C11 memory fences are supported])], +[AC_MSG_RESULT(no)]) + +AC_MSG_CHECKING(if gcc memory fences are supported) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[ ]], [[ +__atomic_thread_fence(__ATOMIC_ACQUIRE); +]] +)], +[AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_GCC_MEMORY_FENCES], [1], [GCC memory fences are supported])], +[AC_MSG_RESULT(no)]) + dnl Checks for functions and headers AC_FUNC_ALLOCA @@ -840,12 +870,11 @@ AS_IF([test "x$EMSCRIPTEN" = "x"],[ #ifdef HAVE_SYS_RANDOM_H # include #endif -#ifdef __SANITIZE_ADDRESS__ -# error A recent libasan version on an old system may intercept nonexistent functions -#endif ]], [[ unsigned char buf; -(void) getrandom((void *) &buf, 1U, 0U); +if (&getrandom != NULL) { + (void) getrandom((void *) &buf, 1U, 0U); +} ]])], [AC_MSG_RESULT(yes) AC_CHECK_FUNCS([getrandom])], @@ -861,12 +890,8 @@ unsigned char buf; #ifdef HAVE_SYS_RANDOM_H # include #endif -#ifdef __SANITIZE_ADDRESS__ -# error A recent libasan version on an old system may intercept nonexistent functions -#endif ]], [[ unsigned char buf; - if (&getentropy != NULL) { (void) getentropy((void *) &buf, 1U); } diff --git a/contrib/Makefile.in b/contrib/Makefile.in index eeb9012905..31969e1c89 100644 --- a/contrib/Makefile.in +++ b/contrib/Makefile.in @@ -150,6 +150,7 @@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CFLAGS_AESNI = @CFLAGS_AESNI@ +CFLAGS_ARMCRYPTO = @CFLAGS_ARMCRYPTO@ CFLAGS_AVX = @CFLAGS_AVX@ CFLAGS_AVX2 = @CFLAGS_AVX2@ CFLAGS_AVX512F = @CFLAGS_AVX512F@ diff --git a/dist-build/Makefile.in b/dist-build/Makefile.in index 36d8e33af7..4ecd77d74d 100644 --- a/dist-build/Makefile.in +++ b/dist-build/Makefile.in @@ -150,6 +150,7 @@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CFLAGS_AESNI = @CFLAGS_AESNI@ +CFLAGS_ARMCRYPTO = @CFLAGS_ARMCRYPTO@ CFLAGS_AVX = @CFLAGS_AVX@ CFLAGS_AVX2 = @CFLAGS_AVX2@ CFLAGS_AVX512F = @CFLAGS_AVX512F@ diff --git a/msvc-scripts/Makefile.in b/msvc-scripts/Makefile.in index 0b38cca496..39e4b60ecb 100644 --- a/msvc-scripts/Makefile.in +++ b/msvc-scripts/Makefile.in @@ -150,6 +150,7 @@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CFLAGS_AESNI = @CFLAGS_AESNI@ +CFLAGS_ARMCRYPTO = @CFLAGS_ARMCRYPTO@ CFLAGS_AVX = @CFLAGS_AVX@ CFLAGS_AVX2 = @CFLAGS_AVX2@ CFLAGS_AVX512F = @CFLAGS_AVX512F@ diff --git a/src/Makefile.in b/src/Makefile.in index e1bea9095a..de664f13cf 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -208,6 +208,7 @@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CFLAGS_AESNI = @CFLAGS_AESNI@ +CFLAGS_ARMCRYPTO = @CFLAGS_ARMCRYPTO@ CFLAGS_AVX = @CFLAGS_AVX@ CFLAGS_AVX2 = @CFLAGS_AVX2@ CFLAGS_AVX512F = @CFLAGS_AVX512F@ diff --git a/src/libsodium/Makefile.am b/src/libsodium/Makefile.am index 4c2b506c31..312f4f3d40 100644 --- a/src/libsodium/Makefile.am +++ b/src/libsodium/Makefile.am @@ -2,6 +2,7 @@ lib_LTLIBRARIES = \ libsodium.la libsodium_la_SOURCES = \ + crypto_aead/aes256gcm/aead_aes256gcm.c \ crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c \ crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c \ crypto_auth/crypto_auth.c \ @@ -195,8 +196,8 @@ endif SUBDIRS = \ include -libsodium_la_LIBADD = libaesni.la libsse2.la libssse3.la libsse41.la libavx2.la libavx512f.la -noinst_LTLIBRARIES = libaesni.la libsse2.la libssse3.la libsse41.la libavx2.la libavx512f.la +libsodium_la_LIBADD = libaesni.la libarmcrypto.la libsse2.la libssse3.la libsse41.la libavx2.la libavx512f.la +noinst_LTLIBRARIES = libaesni.la libarmcrypto.la libsse2.la libssse3.la libsse41.la libavx2.la libavx512f.la librdrand_la_LDFLAGS = $(libsodium_la_LDFLAGS) librdrand_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \ @@ -212,6 +213,12 @@ libsodium_la_SOURCES += \ randombytes/sysrandom/randombytes_sysrandom.c endif +libarmcrypto_la_LDFLAGS = $(libsodium_la_LDFLAGS) +libarmcrypto_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \ + @CFLAGS_ARMCRYPTO@ +libarmcrypto_la_SOURCES = \ + crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c + libaesni_la_LDFLAGS = $(libsodium_la_LDFLAGS) libaesni_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \ @CFLAGS_SSE2@ @CFLAGS_SSSE3@ @CFLAGS_AVX@ @CFLAGS_AESNI@ @CFLAGS_PCLMUL@ diff --git a/src/libsodium/Makefile.in b/src/libsodium/Makefile.in index c175fcc349..cf31cd1215 100644 --- a/src/libsodium/Makefile.in +++ b/src/libsodium/Makefile.in @@ -228,6 +228,13 @@ am__v_lt_1 = libaesni_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(libaesni_la_LDFLAGS) $(LDFLAGS) -o $@ +libarmcrypto_la_LIBADD = +am_libarmcrypto_la_OBJECTS = crypto_aead/aes256gcm/armcrypto/libarmcrypto_la-aead_aes256gcm_armcrypto.lo +libarmcrypto_la_OBJECTS = $(am_libarmcrypto_la_OBJECTS) +libarmcrypto_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(libarmcrypto_la_LDFLAGS) $(LDFLAGS) \ + -o $@ libavx2_la_LIBADD = am_libavx2_la_OBJECTS = crypto_generichash/blake2b/ref/libavx2_la-blake2b-compress-avx2.lo \ crypto_pwhash/argon2/libavx2_la-argon2-fill-block-avx2.lo \ @@ -250,9 +257,11 @@ librdrand_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(librdrand_la_LDFLAGS) $(LDFLAGS) -o $@ @EMSCRIPTEN_FALSE@am_librdrand_la_rpath = -libsodium_la_DEPENDENCIES = libaesni.la libsse2.la libssse3.la \ - libsse41.la libavx2.la libavx512f.la $(am__append_8) +libsodium_la_DEPENDENCIES = libaesni.la libarmcrypto.la libsse2.la \ + libssse3.la libsse41.la libavx2.la libavx512f.la \ + $(am__append_8) am__libsodium_la_SOURCES_DIST = \ + crypto_aead/aes256gcm/aead_aes256gcm.c \ crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c \ crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c \ crypto_auth/crypto_auth.c \ @@ -412,7 +421,9 @@ am__objects_1 = @MINIMAL_FALSE@ crypto_stream/salsa208/libsodium_la-stream_salsa208.lo \ @MINIMAL_FALSE@ crypto_stream/xchacha20/libsodium_la-stream_xchacha20.lo @EMSCRIPTEN_FALSE@am__objects_6 = randombytes/sysrandom/libsodium_la-randombytes_sysrandom.lo -am_libsodium_la_OBJECTS = crypto_aead/chacha20poly1305/sodium/libsodium_la-aead_chacha20poly1305.lo \ +am_libsodium_la_OBJECTS = \ + crypto_aead/aes256gcm/libsodium_la-aead_aes256gcm.lo \ + crypto_aead/chacha20poly1305/sodium/libsodium_la-aead_chacha20poly1305.lo \ crypto_aead/xchacha20poly1305/sodium/libsodium_la-aead_xchacha20poly1305.lo \ crypto_auth/libsodium_la-crypto_auth.lo \ crypto_auth/hmacsha256/libsodium_la-auth_hmacsha256.lo \ @@ -530,7 +541,9 @@ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles -am__depfiles_remade = crypto_aead/aes256gcm/aesni/$(DEPDIR)/libaesni_la-aead_aes256gcm_aesni.Plo \ +am__depfiles_remade = crypto_aead/aes256gcm/$(DEPDIR)/libsodium_la-aead_aes256gcm.Plo \ + crypto_aead/aes256gcm/aesni/$(DEPDIR)/libaesni_la-aead_aes256gcm_aesni.Plo \ + crypto_aead/aes256gcm/armcrypto/$(DEPDIR)/libarmcrypto_la-aead_aes256gcm_armcrypto.Plo \ crypto_aead/chacha20poly1305/sodium/$(DEPDIR)/libsodium_la-aead_chacha20poly1305.Plo \ crypto_aead/xchacha20poly1305/sodium/$(DEPDIR)/libsodium_la-aead_xchacha20poly1305.Plo \ crypto_auth/$(DEPDIR)/libsodium_la-crypto_auth.Plo \ @@ -667,13 +680,14 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = -SOURCES = $(libaesni_la_SOURCES) $(libavx2_la_SOURCES) \ - $(libavx512f_la_SOURCES) $(librdrand_la_SOURCES) \ - $(libsodium_la_SOURCES) $(libsse2_la_SOURCES) \ - $(libsse41_la_SOURCES) $(libssse3_la_SOURCES) -DIST_SOURCES = $(libaesni_la_SOURCES) $(libavx2_la_SOURCES) \ - $(libavx512f_la_SOURCES) $(librdrand_la_SOURCES) \ - $(am__libsodium_la_SOURCES_DIST) \ +SOURCES = $(libaesni_la_SOURCES) $(libarmcrypto_la_SOURCES) \ + $(libavx2_la_SOURCES) $(libavx512f_la_SOURCES) \ + $(librdrand_la_SOURCES) $(libsodium_la_SOURCES) \ + $(libsse2_la_SOURCES) $(libsse41_la_SOURCES) \ + $(libssse3_la_SOURCES) +DIST_SOURCES = $(libaesni_la_SOURCES) $(libarmcrypto_la_SOURCES) \ + $(libavx2_la_SOURCES) $(libavx512f_la_SOURCES) \ + $(librdrand_la_SOURCES) $(am__libsodium_la_SOURCES_DIST) \ $(am__libsse2_la_SOURCES_DIST) $(libsse41_la_SOURCES) \ $(libssse3_la_SOURCES) RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ @@ -765,6 +779,7 @@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CFLAGS_AESNI = @CFLAGS_AESNI@ +CFLAGS_ARMCRYPTO = @CFLAGS_ARMCRYPTO@ CFLAGS_AVX = @CFLAGS_AVX@ CFLAGS_AVX2 = @CFLAGS_AVX2@ CFLAGS_AVX512F = @CFLAGS_AVX512F@ @@ -920,7 +935,7 @@ valgrind_tools = @valgrind_tools@ lib_LTLIBRARIES = \ libsodium.la -libsodium_la_SOURCES = \ +libsodium_la_SOURCES = crypto_aead/aes256gcm/aead_aes256gcm.c \ crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c \ crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c \ crypto_auth/crypto_auth.c \ @@ -1026,10 +1041,12 @@ libsodium_la_CPPFLAGS = \ SUBDIRS = \ include -libsodium_la_LIBADD = libaesni.la libsse2.la libssse3.la libsse41.la \ - libavx2.la libavx512f.la $(am__append_8) -noinst_LTLIBRARIES = libaesni.la libsse2.la libssse3.la libsse41.la \ - libavx2.la libavx512f.la $(am__append_9) +libsodium_la_LIBADD = libaesni.la libarmcrypto.la libsse2.la \ + libssse3.la libsse41.la libavx2.la libavx512f.la \ + $(am__append_8) +noinst_LTLIBRARIES = libaesni.la libarmcrypto.la libsse2.la \ + libssse3.la libsse41.la libavx2.la libavx512f.la \ + $(am__append_9) librdrand_la_LDFLAGS = $(libsodium_la_LDFLAGS) librdrand_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \ @CFLAGS_RDRAND@ @@ -1037,6 +1054,13 @@ librdrand_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \ librdrand_la_SOURCES = \ randombytes/internal/randombytes_internal_random.c +libarmcrypto_la_LDFLAGS = $(libsodium_la_LDFLAGS) +libarmcrypto_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \ + @CFLAGS_ARMCRYPTO@ + +libarmcrypto_la_SOURCES = \ + crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c + libaesni_la_LDFLAGS = $(libsodium_la_LDFLAGS) libaesni_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \ @CFLAGS_SSE2@ @CFLAGS_SSSE3@ @CFLAGS_AVX@ @CFLAGS_AESNI@ @CFLAGS_PCLMUL@ @@ -1192,6 +1216,18 @@ crypto_aead/aes256gcm/aesni/libaesni_la-aead_aes256gcm_aesni.lo: \ libaesni.la: $(libaesni_la_OBJECTS) $(libaesni_la_DEPENDENCIES) $(EXTRA_libaesni_la_DEPENDENCIES) $(AM_V_CCLD)$(libaesni_la_LINK) $(libaesni_la_OBJECTS) $(libaesni_la_LIBADD) $(LIBS) +crypto_aead/aes256gcm/armcrypto/$(am__dirstamp): + @$(MKDIR_P) crypto_aead/aes256gcm/armcrypto + @: > crypto_aead/aes256gcm/armcrypto/$(am__dirstamp) +crypto_aead/aes256gcm/armcrypto/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) crypto_aead/aes256gcm/armcrypto/$(DEPDIR) + @: > crypto_aead/aes256gcm/armcrypto/$(DEPDIR)/$(am__dirstamp) +crypto_aead/aes256gcm/armcrypto/libarmcrypto_la-aead_aes256gcm_armcrypto.lo: \ + crypto_aead/aes256gcm/armcrypto/$(am__dirstamp) \ + crypto_aead/aes256gcm/armcrypto/$(DEPDIR)/$(am__dirstamp) + +libarmcrypto.la: $(libarmcrypto_la_OBJECTS) $(libarmcrypto_la_DEPENDENCIES) $(EXTRA_libarmcrypto_la_DEPENDENCIES) + $(AM_V_CCLD)$(libarmcrypto_la_LINK) $(libarmcrypto_la_OBJECTS) $(libarmcrypto_la_LIBADD) $(LIBS) crypto_generichash/blake2b/ref/$(am__dirstamp): @$(MKDIR_P) crypto_generichash/blake2b/ref @: > crypto_generichash/blake2b/ref/$(am__dirstamp) @@ -1249,6 +1285,15 @@ randombytes/internal/librdrand_la-randombytes_internal_random.lo: \ librdrand.la: $(librdrand_la_OBJECTS) $(librdrand_la_DEPENDENCIES) $(EXTRA_librdrand_la_DEPENDENCIES) $(AM_V_CCLD)$(librdrand_la_LINK) $(am_librdrand_la_rpath) $(librdrand_la_OBJECTS) $(librdrand_la_LIBADD) $(LIBS) +crypto_aead/aes256gcm/$(am__dirstamp): + @$(MKDIR_P) crypto_aead/aes256gcm + @: > crypto_aead/aes256gcm/$(am__dirstamp) +crypto_aead/aes256gcm/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) crypto_aead/aes256gcm/$(DEPDIR) + @: > crypto_aead/aes256gcm/$(DEPDIR)/$(am__dirstamp) +crypto_aead/aes256gcm/libsodium_la-aead_aes256gcm.lo: \ + crypto_aead/aes256gcm/$(am__dirstamp) \ + crypto_aead/aes256gcm/$(DEPDIR)/$(am__dirstamp) crypto_aead/chacha20poly1305/sodium/$(am__dirstamp): @$(MKDIR_P) crypto_aead/chacha20poly1305/sodium @: > crypto_aead/chacha20poly1305/sodium/$(am__dirstamp) @@ -1945,8 +1990,12 @@ libssse3.la: $(libssse3_la_OBJECTS) $(libssse3_la_DEPENDENCIES) $(EXTRA_libssse3 mostlyclean-compile: -rm -f *.$(OBJEXT) + -rm -f crypto_aead/aes256gcm/*.$(OBJEXT) + -rm -f crypto_aead/aes256gcm/*.lo -rm -f crypto_aead/aes256gcm/aesni/*.$(OBJEXT) -rm -f crypto_aead/aes256gcm/aesni/*.lo + -rm -f crypto_aead/aes256gcm/armcrypto/*.$(OBJEXT) + -rm -f crypto_aead/aes256gcm/armcrypto/*.lo -rm -f crypto_aead/chacha20poly1305/sodium/*.$(OBJEXT) -rm -f crypto_aead/chacha20poly1305/sodium/*.lo -rm -f crypto_aead/xchacha20poly1305/sodium/*.$(OBJEXT) @@ -2091,7 +2140,9 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c +@AMDEP_TRUE@@am__include@ @am__quote@crypto_aead/aes256gcm/$(DEPDIR)/libsodium_la-aead_aes256gcm.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@crypto_aead/aes256gcm/aesni/$(DEPDIR)/libaesni_la-aead_aes256gcm_aesni.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@crypto_aead/aes256gcm/armcrypto/$(DEPDIR)/libarmcrypto_la-aead_aes256gcm_armcrypto.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@crypto_aead/chacha20poly1305/sodium/$(DEPDIR)/libsodium_la-aead_chacha20poly1305.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@crypto_aead/xchacha20poly1305/sodium/$(DEPDIR)/libsodium_la-aead_xchacha20poly1305.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@crypto_auth/$(DEPDIR)/libsodium_la-crypto_auth.Plo@am__quote@ # am--include-marker @@ -2275,6 +2326,13 @@ crypto_aead/aes256gcm/aesni/libaesni_la-aead_aes256gcm_aesni.lo: crypto_aead/aes @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libaesni_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o crypto_aead/aes256gcm/aesni/libaesni_la-aead_aes256gcm_aesni.lo `test -f 'crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c' || echo '$(srcdir)/'`crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +crypto_aead/aes256gcm/armcrypto/libarmcrypto_la-aead_aes256gcm_armcrypto.lo: crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libarmcrypto_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT crypto_aead/aes256gcm/armcrypto/libarmcrypto_la-aead_aes256gcm_armcrypto.lo -MD -MP -MF crypto_aead/aes256gcm/armcrypto/$(DEPDIR)/libarmcrypto_la-aead_aes256gcm_armcrypto.Tpo -c -o crypto_aead/aes256gcm/armcrypto/libarmcrypto_la-aead_aes256gcm_armcrypto.lo `test -f 'crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c' || echo '$(srcdir)/'`crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) crypto_aead/aes256gcm/armcrypto/$(DEPDIR)/libarmcrypto_la-aead_aes256gcm_armcrypto.Tpo crypto_aead/aes256gcm/armcrypto/$(DEPDIR)/libarmcrypto_la-aead_aes256gcm_armcrypto.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c' object='crypto_aead/aes256gcm/armcrypto/libarmcrypto_la-aead_aes256gcm_armcrypto.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libarmcrypto_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o crypto_aead/aes256gcm/armcrypto/libarmcrypto_la-aead_aes256gcm_armcrypto.lo `test -f 'crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c' || echo '$(srcdir)/'`crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c + crypto_generichash/blake2b/ref/libavx2_la-blake2b-compress-avx2.lo: crypto_generichash/blake2b/ref/blake2b-compress-avx2.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libavx2_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT crypto_generichash/blake2b/ref/libavx2_la-blake2b-compress-avx2.lo -MD -MP -MF crypto_generichash/blake2b/ref/$(DEPDIR)/libavx2_la-blake2b-compress-avx2.Tpo -c -o crypto_generichash/blake2b/ref/libavx2_la-blake2b-compress-avx2.lo `test -f 'crypto_generichash/blake2b/ref/blake2b-compress-avx2.c' || echo '$(srcdir)/'`crypto_generichash/blake2b/ref/blake2b-compress-avx2.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) crypto_generichash/blake2b/ref/$(DEPDIR)/libavx2_la-blake2b-compress-avx2.Tpo crypto_generichash/blake2b/ref/$(DEPDIR)/libavx2_la-blake2b-compress-avx2.Plo @@ -2317,6 +2375,13 @@ randombytes/internal/librdrand_la-randombytes_internal_random.lo: randombytes/in @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(librdrand_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o randombytes/internal/librdrand_la-randombytes_internal_random.lo `test -f 'randombytes/internal/randombytes_internal_random.c' || echo '$(srcdir)/'`randombytes/internal/randombytes_internal_random.c +crypto_aead/aes256gcm/libsodium_la-aead_aes256gcm.lo: crypto_aead/aes256gcm/aead_aes256gcm.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libsodium_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT crypto_aead/aes256gcm/libsodium_la-aead_aes256gcm.lo -MD -MP -MF crypto_aead/aes256gcm/$(DEPDIR)/libsodium_la-aead_aes256gcm.Tpo -c -o crypto_aead/aes256gcm/libsodium_la-aead_aes256gcm.lo `test -f 'crypto_aead/aes256gcm/aead_aes256gcm.c' || echo '$(srcdir)/'`crypto_aead/aes256gcm/aead_aes256gcm.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) crypto_aead/aes256gcm/$(DEPDIR)/libsodium_la-aead_aes256gcm.Tpo crypto_aead/aes256gcm/$(DEPDIR)/libsodium_la-aead_aes256gcm.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='crypto_aead/aes256gcm/aead_aes256gcm.c' object='crypto_aead/aes256gcm/libsodium_la-aead_aes256gcm.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libsodium_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o crypto_aead/aes256gcm/libsodium_la-aead_aes256gcm.lo `test -f 'crypto_aead/aes256gcm/aead_aes256gcm.c' || echo '$(srcdir)/'`crypto_aead/aes256gcm/aead_aes256gcm.c + crypto_aead/chacha20poly1305/sodium/libsodium_la-aead_chacha20poly1305.lo: crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libsodium_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT crypto_aead/chacha20poly1305/sodium/libsodium_la-aead_chacha20poly1305.lo -MD -MP -MF crypto_aead/chacha20poly1305/sodium/$(DEPDIR)/libsodium_la-aead_chacha20poly1305.Tpo -c -o crypto_aead/chacha20poly1305/sodium/libsodium_la-aead_chacha20poly1305.lo `test -f 'crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c' || echo '$(srcdir)/'`crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) crypto_aead/chacha20poly1305/sodium/$(DEPDIR)/libsodium_la-aead_chacha20poly1305.Tpo crypto_aead/chacha20poly1305/sodium/$(DEPDIR)/libsodium_la-aead_chacha20poly1305.Plo @@ -3015,7 +3080,9 @@ mostlyclean-libtool: clean-libtool: -rm -rf .libs _libs + -rm -rf crypto_aead/aes256gcm/.libs crypto_aead/aes256gcm/_libs -rm -rf crypto_aead/aes256gcm/aesni/.libs crypto_aead/aes256gcm/aesni/_libs + -rm -rf crypto_aead/aes256gcm/armcrypto/.libs crypto_aead/aes256gcm/armcrypto/_libs -rm -rf crypto_aead/chacha20poly1305/sodium/.libs crypto_aead/chacha20poly1305/sodium/_libs -rm -rf crypto_aead/xchacha20poly1305/sodium/.libs crypto_aead/xchacha20poly1305/sodium/_libs -rm -rf crypto_auth/.libs crypto_auth/_libs @@ -3304,8 +3371,12 @@ clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f crypto_aead/aes256gcm/$(DEPDIR)/$(am__dirstamp) + -rm -f crypto_aead/aes256gcm/$(am__dirstamp) -rm -f crypto_aead/aes256gcm/aesni/$(DEPDIR)/$(am__dirstamp) -rm -f crypto_aead/aes256gcm/aesni/$(am__dirstamp) + -rm -f crypto_aead/aes256gcm/armcrypto/$(DEPDIR)/$(am__dirstamp) + -rm -f crypto_aead/aes256gcm/armcrypto/$(am__dirstamp) -rm -f crypto_aead/chacha20poly1305/sodium/$(DEPDIR)/$(am__dirstamp) -rm -f crypto_aead/chacha20poly1305/sodium/$(am__dirstamp) -rm -f crypto_aead/xchacha20poly1305/sodium/$(DEPDIR)/$(am__dirstamp) @@ -3476,7 +3547,9 @@ clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ clean-noinstLTLIBRARIES mostlyclean-am distclean: distclean-recursive - -rm -f crypto_aead/aes256gcm/aesni/$(DEPDIR)/libaesni_la-aead_aes256gcm_aesni.Plo + -rm -f crypto_aead/aes256gcm/$(DEPDIR)/libsodium_la-aead_aes256gcm.Plo + -rm -f crypto_aead/aes256gcm/aesni/$(DEPDIR)/libaesni_la-aead_aes256gcm_aesni.Plo + -rm -f crypto_aead/aes256gcm/armcrypto/$(DEPDIR)/libarmcrypto_la-aead_aes256gcm_armcrypto.Plo -rm -f crypto_aead/chacha20poly1305/sodium/$(DEPDIR)/libsodium_la-aead_chacha20poly1305.Plo -rm -f crypto_aead/xchacha20poly1305/sodium/$(DEPDIR)/libsodium_la-aead_xchacha20poly1305.Plo -rm -f crypto_auth/$(DEPDIR)/libsodium_la-crypto_auth.Plo @@ -3629,7 +3702,9 @@ install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive - -rm -f crypto_aead/aes256gcm/aesni/$(DEPDIR)/libaesni_la-aead_aes256gcm_aesni.Plo + -rm -f crypto_aead/aes256gcm/$(DEPDIR)/libsodium_la-aead_aes256gcm.Plo + -rm -f crypto_aead/aes256gcm/aesni/$(DEPDIR)/libaesni_la-aead_aes256gcm_aesni.Plo + -rm -f crypto_aead/aes256gcm/armcrypto/$(DEPDIR)/libarmcrypto_la-aead_aes256gcm_armcrypto.Plo -rm -f crypto_aead/chacha20poly1305/sodium/$(DEPDIR)/libsodium_la-aead_chacha20poly1305.Plo -rm -f crypto_aead/xchacha20poly1305/sodium/$(DEPDIR)/libsodium_la-aead_xchacha20poly1305.Plo -rm -f crypto_auth/$(DEPDIR)/libsodium_la-crypto_auth.Plo diff --git a/src/libsodium/crypto_aead/aes256gcm/aead_aes256gcm.c b/src/libsodium/crypto_aead/aes256gcm/aead_aes256gcm.c new file mode 100644 index 0000000000..2946ba873b --- /dev/null +++ b/src/libsodium/crypto_aead/aes256gcm/aead_aes256gcm.c @@ -0,0 +1,157 @@ +#include +#include + +#include "crypto_aead_aes256gcm.h" +#include "private/common.h" +#include "randombytes.h" + +size_t +crypto_aead_aes256gcm_keybytes(void) +{ + return crypto_aead_aes256gcm_KEYBYTES; +} + +size_t +crypto_aead_aes256gcm_nsecbytes(void) +{ + return crypto_aead_aes256gcm_NSECBYTES; +} + +size_t +crypto_aead_aes256gcm_npubbytes(void) +{ + return crypto_aead_aes256gcm_NPUBBYTES; +} + +size_t +crypto_aead_aes256gcm_abytes(void) +{ + return crypto_aead_aes256gcm_ABYTES; +} + +size_t +crypto_aead_aes256gcm_statebytes(void) +{ + return (sizeof(crypto_aead_aes256gcm_state) + (size_t) 15U) & ~(size_t) 15U; +} + +size_t +crypto_aead_aes256gcm_messagebytes_max(void) +{ + return crypto_aead_aes256gcm_MESSAGEBYTES_MAX; +} + +void +crypto_aead_aes256gcm_keygen(unsigned char k[crypto_aead_aes256gcm_KEYBYTES]) +{ + randombytes_buf(k, crypto_aead_aes256gcm_KEYBYTES); +} + +#if !((defined(HAVE_ARMCRYPTO) && defined(__clang__) && defined(NATIVE_LITTLE_ENDIAN)) || \ + (defined(HAVE_TMMINTRIN_H) && defined(HAVE_WMMINTRIN_H))) + +#ifndef ENOSYS +#define ENOSYS ENXIO +#endif + +int +crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *mac, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *mac, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p, + unsigned char *nsec, const unsigned char *c, + unsigned long long clen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_is_available(void) +{ + return 0; +} + +#endif \ No newline at end of file diff --git a/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c b/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c index 034fdf90d2..91e578df72 100644 --- a/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +++ b/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c @@ -17,7 +17,7 @@ #if defined(HAVE_TMMINTRIN_H) && defined(HAVE_WMMINTRIN_H) #ifdef __GNUC__ -#pragma GCC target("aes,pclmul,avx") +#pragma GCC target("avx,aes,pclmul") #endif #if !defined(_MSC_VER) || _MSC_VER < 1800 @@ -442,6 +442,11 @@ aes_gcm_encrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); encrypt_xor_wide(st, dst + i, src + i, rev_counters); + PREFETCH_READ(src + i + PARALLEL_BLOCKS * 16); +#if PARALLEL_BLOCKS >= 64 / 16 + PREFETCH_READ(src + i + PARALLEL_BLOCKS * 16 + 64); +#endif + pi = i - PARALLEL_BLOCKS * 16; u = gh_update0(sth, dst + pi, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]); for (j = 1; j < PARALLEL_BLOCKS; j += 1) { @@ -452,6 +457,10 @@ aes_gcm_encrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], encrypt_xor_wide(st, dst + i + PARALLEL_BLOCKS * 16, src + i + PARALLEL_BLOCKS * 16, rev_counters); + PREFETCH_READ(src + i + 2 * PARALLEL_BLOCKS * 16); +#if PARALLEL_BLOCKS >= 64 / 16 + PREFETCH_READ(src + i + 2 * PARALLEL_BLOCKS * 16 + 64); +#endif pi = i; for (j = 0; j < PARALLEL_BLOCKS; j += 1) { gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); @@ -746,7 +755,7 @@ crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char * } gh_required_blocks = required_blocks(ad_len, m_len); if (gh_required_blocks == 0) { - memset(mac, 0x00, ABYTES); + memset(mac, 0xd0, ABYTES); memset(c, 0, m_len); return -1; } @@ -921,7 +930,7 @@ crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char * if (crypto_verify_16(mac, computed_mac) != 0) { sodium_memzero(computed_mac, sizeof computed_mac); - memset(m, 0x00, m_len); + memset(m, 0xd0, m_len); return -1; } return 0; @@ -997,152 +1006,4 @@ crypto_aead_aes256gcm_is_available(void) return sodium_runtime_has_pclmul() & sodium_runtime_has_aesni() & sodium_runtime_has_avx(); } -#else - -#ifndef ENOSYS -#define ENOSYS ENXIO -#endif - -int -crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac, - unsigned long long *maclen_p, const unsigned char *m, - unsigned long long mlen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *nsec, - const unsigned char *npub, const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m, - unsigned long long mlen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *nsec, - const unsigned char *npub, const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *mac, const unsigned char *ad, - unsigned long long adlen, const unsigned char *npub, - const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac, - unsigned long long *maclen_p, const unsigned char *m, - unsigned long long mlen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *nsec, - const unsigned char *npub, - const crypto_aead_aes256gcm_state *st_) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const crypto_aead_aes256gcm_state *st_) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *mac, const unsigned char *ad, - unsigned long long adlen, const unsigned char *npub, - const crypto_aead_aes256gcm_state *st_) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p, - unsigned char *nsec, const unsigned char *c, - unsigned long long clen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *npub, - const crypto_aead_aes256gcm_state *st_) -{ - errno = ENOSYS; - return -1; -} - -int -crypto_aead_aes256gcm_is_available(void) -{ - return 0; -} - #endif - -size_t -crypto_aead_aes256gcm_keybytes(void) -{ - return crypto_aead_aes256gcm_KEYBYTES; -} - -size_t -crypto_aead_aes256gcm_nsecbytes(void) -{ - return crypto_aead_aes256gcm_NSECBYTES; -} - -size_t -crypto_aead_aes256gcm_npubbytes(void) -{ - return crypto_aead_aes256gcm_NPUBBYTES; -} - -size_t -crypto_aead_aes256gcm_abytes(void) -{ - return crypto_aead_aes256gcm_ABYTES; -} - -size_t -crypto_aead_aes256gcm_statebytes(void) -{ - return (sizeof(crypto_aead_aes256gcm_state) + (size_t) 15U) & ~(size_t) 15U; -} - -size_t -crypto_aead_aes256gcm_messagebytes_max(void) -{ - return crypto_aead_aes256gcm_MESSAGEBYTES_MAX; -} - -void -crypto_aead_aes256gcm_keygen(unsigned char k[crypto_aead_aes256gcm_KEYBYTES]) -{ - randombytes_buf(k, crypto_aead_aes256gcm_KEYBYTES); -} diff --git a/src/libsodium/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c b/src/libsodium/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c new file mode 100644 index 0000000000..b633064268 --- /dev/null +++ b/src/libsodium/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c @@ -0,0 +1,1019 @@ +#include +#include +#include +#include +#include + +#include "core.h" +#include "crypto_aead_aes256gcm.h" +#include "crypto_verify_16.h" +#include "export.h" +#include "private/common.h" +#include "randombytes.h" +#include "runtime.h" +#include "utils.h" + +#if defined(HAVE_ARMCRYPTO) && defined(__clang__) && defined(NATIVE_LITTLE_ENDIAN) + +#if !defined(MSC_VER) || _MSC_VER < 1800 +#define __vectorcall +#endif + +#ifndef __ARM_FEATURE_AES +#define __ARM_FEATURE_AES 1 +#endif + +#include + +#define ABYTES crypto_aead_aes256gcm_ABYTES +#define NPUBBYTES crypto_aead_aes256gcm_NPUBBYTES +#define KEYBYTES crypto_aead_aes256gcm_KEYBYTES + +#define PARALLEL_BLOCKS 6 +#undef USE_KARATSUBA_MULTIPLICATION + +typedef uint64x2_t BlockVec; + +#define LOAD128(a) vld1q_u64((const uint64_t *) (const void *) (a)) +#define STORE128(a, b) vst1q_u64((uint64_t *) (void *) (a), (b)) +#define AES_XENCRYPT(block_vec, rkey) \ + vreinterpretq_u64_u8(vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), rkey))) +#define AES_XENCRYPTLAST(block_vec, rkey) \ + vreinterpretq_u64_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), rkey)) +#define XOR128(a, b) veorq_u64((a), (b)) +#define AND128(a, b) vandq_u64((a), (b)) +#define OR128(a, b) vorrq_u64((a), (b)) +#define SET64x2(a, b) vsetq_lane_u64((uint64_t) (a), vmovq_n_u64((uint64_t) (b)), 1) +#define ZERO128 vmovq_n_u8(0) +#define ONE128 SET64x2(0, 1) +#define ADD64x2(a, b) vaddq_u64((a), (b)) +#define SUB64x2(a, b) vsubq_u64((a), (b)) +#define SHL64x2(a, b) vshlq_n_u64((a), (b)) +#define SHR64x2(a, b) vshrq_n_u64((a), (b)) +#define REV128(x) \ + vreinterpretq_u64_u8(__builtin_shufflevector(vreinterpretq_u8_u64(x), vreinterpretq_u8_u64(x), \ + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, \ + 1, 0)) +#define SHUFFLE32x4(x, a, b, c, d) \ + vreinterpretq_u64_u32(__builtin_shufflevector(vreinterpretq_u32_u64(x), \ + vreinterpretq_u32_u64(x), (a), (b), (c), (d))) +#define BYTESHL128(a, b) vreinterpretq_u64_u8(vextq_s8(vdupq_n_s8(0), (int8x16_t) a, 16 - (b))) +#define BYTESHR128(a, b) vreinterpretq_u64_u8(vextq_s8((int8x16_t) a, vdupq_n_s8(0), (b))) + +#define SHL128(a, b) OR128(SHL64x2((a), (b)), SHR64x2(BYTESHL128((a), 8), 64 - (b))) +#define CLMULLO128(a, b) \ + vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_low_u64(a), (poly64_t) vget_low_u64(b))) +#define CLMULHI128(a, b) \ + vreinterpretq_u64_p128(vmull_high_p64(vreinterpretq_p64_s64(a), vreinterpretq_p64_s64(b))) +#define CLMULLOHI128(a, b) \ + vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_low_u64(a), (poly64_t) vget_high_u64(b))) +#define CLMULHILO128(a, b) \ + vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_high_u64(a), (poly64_t) vget_low_u64(b))) +#define PREFETCH_READ(x) __builtin_prefetch((x), 0, 2) +#define PREFETCH_WRITE(x) __builtin_prefetch((x), 1, 2); + +static inline BlockVec +AES_KEYGEN(BlockVec block_vec, const int rc) +{ + uint8x16_t a = vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0)); + const uint8x16_t b = + __builtin_shufflevector(a, a, 4, 1, 14, 11, 1, 14, 11, 4, 12, 9, 6, 3, 9, 6, 3, 12); + const uint64x2_t c = SET64x2((uint64_t) rc << 32, (uint64_t) rc << 32); + return XOR128(b, c); +} + +#define ROUNDS 14 + +#define PC_COUNT (2 * PARALLEL_BLOCKS) + +typedef struct I256 { + BlockVec hi; + BlockVec lo; + BlockVec mid; +} I256; + +typedef BlockVec Precomp; + +typedef struct GHash { + BlockVec acc; +} GHash; + +typedef struct State { + BlockVec rkeys[ROUNDS + 1]; + Precomp hx[PC_COUNT]; +} State; + +static void __vectorcall expand256(const unsigned char key[KEYBYTES], BlockVec rkeys[1 + ROUNDS]) +{ + BlockVec t1, t2, s; + size_t i = 0; + +#define EXPAND_KEY_1(RC) \ + rkeys[i++] = t2; \ + s = AES_KEYGEN(t2, RC); \ + t1 = XOR128(t1, BYTESHL128(t1, 4)); \ + t1 = XOR128(t1, BYTESHL128(t1, 8)); \ + t1 = XOR128(t1, SHUFFLE32x4(s, 3, 3, 3, 3)); + +#define EXPAND_KEY_2(RC) \ + rkeys[i++] = t1; \ + s = AES_KEYGEN(t1, RC); \ + t2 = XOR128(t2, BYTESHL128(t2, 4)); \ + t2 = XOR128(t2, BYTESHL128(t2, 8)); \ + t2 = XOR128(t2, SHUFFLE32x4(s, 2, 2, 2, 2)); + + t1 = LOAD128(&key[0]); + t2 = LOAD128(&key[16]); + + rkeys[i++] = t1; + EXPAND_KEY_1(0x01); + EXPAND_KEY_2(0x01); + EXPAND_KEY_1(0x02); + EXPAND_KEY_2(0x02); + EXPAND_KEY_1(0x04); + EXPAND_KEY_2(0x04); + EXPAND_KEY_1(0x08); + EXPAND_KEY_2(0x08); + EXPAND_KEY_1(0x10); + EXPAND_KEY_2(0x10); + EXPAND_KEY_1(0x20); + EXPAND_KEY_2(0x20); + EXPAND_KEY_1(0x40); + rkeys[i++] = t1; +} + +/* Encrypt a single AES block */ + +static inline void +encrypt(const State *st, unsigned char dst[16], const unsigned char src[16]) +{ + BlockVec t; + + size_t i; + + t = AES_XENCRYPT(LOAD128(src), st->rkeys[0]); + for (i = 1; i < ROUNDS - 1; i++) { + t = AES_XENCRYPT(t, st->rkeys[i]); + } + t = AES_XENCRYPTLAST(t, st->rkeys[i]); + t = XOR128(t, st->rkeys[ROUNDS]); + STORE128(dst, t); +} + +/* Encrypt and add a single AES block */ + +static inline void __vectorcall encrypt_xor_block(const State *st, unsigned char dst[16], + const unsigned char src[16], + const BlockVec counter) +{ + BlockVec ts; + size_t i; + + ts = AES_XENCRYPT(counter, st->rkeys[0]); + for (i = 1; i < ROUNDS - 1; i++) { + ts = AES_XENCRYPT(ts, st->rkeys[i]); + } + ts = AES_XENCRYPTLAST(ts, st->rkeys[i]); + ts = XOR128(ts, XOR128(st->rkeys[ROUNDS], LOAD128(src))); + STORE128(dst, ts); +} + +/* Encrypt and add PARALLEL_BLOCKS AES blocks */ + +static inline void __vectorcall encrypt_xor_wide(const State *st, + unsigned char dst[16 * PARALLEL_BLOCKS], + const unsigned char src[16 * PARALLEL_BLOCKS], + const BlockVec counters[PARALLEL_BLOCKS]) +{ + BlockVec ts[PARALLEL_BLOCKS]; + size_t i, j; + + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = AES_XENCRYPT(counters[j], st->rkeys[0]); + } + for (i = 1; i < ROUNDS - 1; i++) { + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = AES_XENCRYPT(ts[j], st->rkeys[i]); + } + } + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = AES_XENCRYPTLAST(ts[j], st->rkeys[i]); + ts[j] = XOR128(ts[j], XOR128(st->rkeys[ROUNDS], LOAD128(&src[16 * j]))); + } + for (j = 0; j < PARALLEL_BLOCKS; j++) { + STORE128(&dst[16 * j], ts[j]); + } +} + +/* Square a field element */ + +static inline I256 __vectorcall clsq128(const BlockVec x) +{ + const BlockVec r_lo = CLMULLO128(x, x); + const BlockVec r_hi = CLMULHI128(x, x); + + return (I256) { + SODIUM_C99(.hi =) r_hi, + SODIUM_C99(.lo =) r_lo, + SODIUM_C99(.mid =) ZERO128, + }; +} + +/* Multiply two field elements -- Textbook multiplication is faster than Karatsuba on some recent + * CPUs */ + +static inline I256 __vectorcall clmul128(const BlockVec x, const BlockVec y) +{ +#ifdef USE_KARATSUBA_MULTIPLICATION + const BlockVec x_hi = BYTESHR128(x, 8); + const BlockVec y_hi = BYTESHR128(y, 8); + const BlockVec r_lo = CLMULLO128(x, y); + const BlockVec r_hi = CLMULHI128(x, y); + const BlockVec r_mid = XOR128(CLMULLO128(XOR128(x, x_hi), XOR128(y, y_hi)), XOR128(r_lo, r_hi)); + + return (I256) { + SODIUM_C99(.hi =) r_hi, + SODIUM_C99(.lo =) r_lo, + SODIUM_C99(.mid =) r_mid, + }; +#else + const BlockVec r_hi = CLMULHI128(x, y); + const BlockVec r_lo = CLMULLO128(x, y); + const BlockVec r_mid = XOR128(CLMULHILO128(x, y), CLMULLOHI128(x, y)); + + return (I256) { + SODIUM_C99(.hi =) r_hi, + SODIUM_C99(.lo =) r_lo, + SODIUM_C99(.mid =) r_mid, + }; +#endif +} + +/* Merge the middle word and reduce a field element */ + +static inline BlockVec __vectorcall gcm_reduce(const I256 x) +{ + const BlockVec hi = XOR128(x.hi, BYTESHR128(x.mid, 8)); + const BlockVec lo = XOR128(x.lo, BYTESHL128(x.mid, 8)); + + const BlockVec p64 = SET64x2(0, 0xc200000000000000); + const BlockVec a = CLMULLO128(lo, p64); + const BlockVec b = XOR128(SHUFFLE32x4(lo, 2, 3, 0, 1), a); + const BlockVec c = CLMULLO128(b, p64); + const BlockVec d = XOR128(SHUFFLE32x4(b, 2, 3, 0, 1), c); + + return XOR128(d, hi); +} + +/* Precompute powers of H from `from` to `to` */ + +static inline void __vectorcall precomp(Precomp hx[PC_COUNT], const size_t from, const size_t to) +{ + const Precomp h = hx[0]; + size_t i; + + for (i = from & ~1U; i < to; i += 2) { + hx[i] = gcm_reduce(clmul128(hx[i - 1], h)); + hx[i + 1] = gcm_reduce(clsq128(hx[i / 2])); + } +} + +/* Precompute powers of H given a key and a block count */ + +static void __vectorcall precomp_for_block_count(Precomp hx[PC_COUNT], + const unsigned char gh_key[16], + const size_t block_count) +{ + const BlockVec h0 = REV128(LOAD128(gh_key)); + BlockVec carry = SET64x2(0xc200000000000000, 1); + BlockVec mask = SUB64x2(ZERO128, SHR64x2(h0, 63)); + BlockVec h0_shifted; + BlockVec h; + + mask = SHUFFLE32x4(mask, 3, 3, 3, 3); + carry = AND128(carry, mask); + h0_shifted = SHL128(h0, 1); + h = XOR128(h0_shifted, carry); + + hx[0] = h; + hx[1] = gcm_reduce(clsq128(hx[0])); + + if (block_count >= PC_COUNT) { + precomp(hx, 2, PC_COUNT); + } else { + precomp(hx, 2, block_count); + } +} + +/* Initialize a GHash */ + +static inline void +gh_init(GHash *sth) +{ + sth->acc = ZERO128; +} + +static inline I256 __vectorcall gh_update0(const GHash *const sth, const unsigned char *const p, + const Precomp hn) +{ + const BlockVec m = REV128(LOAD128(p)); + return clmul128(XOR128(sth->acc, m), hn); +} + +static inline void __vectorcall gh_update(I256 *const u, const unsigned char *p, const Precomp hn) +{ + const BlockVec m = REV128(LOAD128(p)); + const I256 t = clmul128(m, hn); + *u = (I256) { SODIUM_C99(.hi =) XOR128(u->hi, t.hi), SODIUM_C99(.lo =) XOR128(u->lo, t.lo), + SODIUM_C99(.mid =) XOR128(u->mid, t.mid) }; +} + +/* Absorb ad_len bytes of associated data. There has to be no partial block. */ + +static inline void +gh_ad_blocks(const State *st, GHash *sth, const unsigned char *ad, size_t ad_len) +{ + size_t i; + + i = (size_t) 0U; + for (; i + PC_COUNT * 16 <= ad_len; i += PC_COUNT * 16) { + I256 u = gh_update0(sth, ad + i, st->hx[PC_COUNT - 1 - 0]); + size_t j; + + for (j = 1; j < PC_COUNT; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[PC_COUNT - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + for (; i + PC_COUNT * 16 / 2 <= ad_len; i += PC_COUNT * 16 / 2) { + I256 u = gh_update0(sth, ad + i, st->hx[PC_COUNT / 2 - 1 - 0]); + size_t j; + + for (j = 1; j < PC_COUNT / 2; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[PC_COUNT / 2 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + for (; i + 4 * 16 <= ad_len; i += 4 * 16) { + size_t j; + I256 u = gh_update0(sth, ad + i, st->hx[4 - 1 - 0]); + + for (j = 1; j < 4; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[4 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + for (; i + 2 * 16 <= ad_len; i += 2 * 16) { + size_t j; + I256 u = gh_update0(sth, ad + i, st->hx[2 - 1 - 0]); + + for (j = 1; j < 2; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[2 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + if (i < ad_len) { + I256 u = gh_update0(sth, ad + i, st->hx[0]); + sth->acc = gcm_reduce(u); + } +} + +/* Increment counters */ + +static inline BlockVec __vectorcall incr_counters(BlockVec rev_counters[], BlockVec counter, + const size_t n) +{ + size_t i; + + const BlockVec one = ONE128; + for (i = 0; i < n; i++) { + rev_counters[i] = REV128(counter); + counter = ADD64x2(counter, one); + } + return counter; +} + +/* Compute the number of required blocks to encrypt and authenticate `ad_len` of associated data, + * and `m_len` of encrypted bytes. Return `0` if limits would be exceeded.*/ + +static inline size_t +required_blocks(const size_t ad_len, const size_t m_len) +{ + const size_t ad_blocks = (ad_len + 15) / 16; + const size_t m_blocks = (m_len + 15) / 16; + + if (ad_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 || + m_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 || ad_len < ad_blocks || m_len < m_blocks || + m_blocks >= (1ULL << 32) - 2) { + return 0; + } + return ad_blocks + m_blocks + 1; +} + +/* Generic AES-GCM encryption. "Generic" as it can handle arbitrary input sizes, +unlike a length-limited version that would precompute all the required powers of H */ + +static void +aes_gcm_encrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst, + const unsigned char *src, size_t src_len, const unsigned char *ad, + size_t ad_len, unsigned char counter_[16]) +{ + CRYPTO_ALIGN(32) I256 u; + CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16]; + const BlockVec one = ONE128; + BlockVec final_block; + BlockVec rev_counters[PARALLEL_BLOCKS]; + BlockVec counter; + size_t i; + size_t j; + size_t left; + size_t pi; + + COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0); + + /* Associated data */ + + if (ad != NULL && ad_len != 0) { + gh_ad_blocks(st, sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, sth, pad, sizeof pad); + } + } + + /* Encrypted data */ + + counter = REV128(LOAD128(counter_)); + i = 0; + + /* 2*PARALLEL_BLOCKS aggregation */ + + if (src_len - i >= 2 * PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + i += PARALLEL_BLOCKS * 16; + + for (; i + 2 * PARALLEL_BLOCKS * 16 <= src_len; i += 2 * PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[2 * PARALLEL_BLOCKS - 1 - j]); + } + + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i + PARALLEL_BLOCKS * 16, src + i + PARALLEL_BLOCKS * 16, + rev_counters); + + pi = i; + for (j = 0; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + /* PARALLEL_BLOCKS aggregation */ + + if (src_len - i >= PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + i += PARALLEL_BLOCKS * 16; + + for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + /* 4-blocks aggregation */ + + for (; i + 4 * 16 <= src_len; i += 4 * 16) { + counter = incr_counters(rev_counters, counter, 4); + for (j = 0; j < 4; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + + u = gh_update0(sth, dst + i, st->hx[4 - 1 - 0]); + for (j = 1; j < 4; j += 1) { + gh_update(&u, dst + i + j * 16, st->hx[4 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + /* 2-blocks aggregation */ + + for (; i + 2 * 16 <= src_len; i += 2 * 16) { + counter = incr_counters(rev_counters, counter, 2); + for (j = 0; j < 2; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + + u = gh_update0(sth, dst + i, st->hx[2 - 1 - 0]); + for (j = 1; j < 2; j += 1) { + gh_update(&u, dst + i + j * 16, st->hx[2 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the + full block authenticated along with the final block, hence < and not <= */ + + for (; i + 16 < src_len; i += 16) { + encrypt_xor_block(st, dst + i, src + i, REV128(counter)); + u = gh_update0(sth, dst + i, st->hx[1 - 1 - 0]); + sth->acc = gcm_reduce(u); + counter = ADD64x2(counter, one); + } + + /* Authenticate both the last block of the message and the final block */ + + final_block = REV128(SET64x2(ad_len * 8, src_len * 8)); + STORE32_BE(counter_ + NPUBBYTES, 1); + encrypt(st, mac, counter_); + left = src_len - i; + if (left != 0) { + for (j = 0; j < left; j++) { + last_blocks[j] = src[i + j]; + } + STORE128(last_blocks + 16, final_block); + encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter)); + for (; j < 16; j++) { + last_blocks[j] = 0; + } + for (j = 0; j < left; j++) { + dst[i + j] = last_blocks[j]; + } + gh_ad_blocks(st, sth, last_blocks, 32); + } else { + STORE128(last_blocks, final_block); + gh_ad_blocks(st, sth, last_blocks, 16); + } + STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc))); +} + +/* Generic AES-GCM decryption. "Generic" as it can handle arbitrary input sizes, +unlike a length-limited version that would precompute all the required powers of H */ + +static void +aes_gcm_decrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst, + const unsigned char *src, size_t src_len, const unsigned char *ad, + size_t ad_len, unsigned char counter_[16]) +{ + CRYPTO_ALIGN(32) I256 u; + CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16]; + const BlockVec one = ONE128; + BlockVec final_block; + BlockVec rev_counters[PARALLEL_BLOCKS]; + BlockVec counter; + size_t i; + size_t j; + size_t left; + + COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0); + + /* Associated data */ + + if (ad != NULL && ad_len != 0) { + gh_ad_blocks(st, sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, sth, pad, sizeof pad); + } + } + + /* Encrypted data */ + + counter = REV128(LOAD128(counter_)); + i = 0; + + /* 2*PARALLEL_BLOCKS aggregation */ + + while (i + 2 * PARALLEL_BLOCKS * 16 <= src_len) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + + u = gh_update0(sth, src + i, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[2 * PARALLEL_BLOCKS - 1 - j]); + } + + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + + i += PARALLEL_BLOCKS * 16; + for (j = 0; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + i += PARALLEL_BLOCKS * 16; + } + + /* PARALLEL_BLOCKS aggregation */ + + for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + + u = gh_update0(sth, src + i, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + } + + /* 4-blocks aggregation */ + + for (; i + 4 * 16 <= src_len; i += 4 * 16) { + counter = incr_counters(rev_counters, counter, 4); + + u = gh_update0(sth, src + i, st->hx[4 - 1 - 0]); + for (j = 1; j < 4; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[4 - 1 - j]); + } + sth->acc = gcm_reduce(u); + + for (j = 0; j < 4; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + } + + /* 2-blocks aggregation */ + + for (; i + 2 * 16 <= src_len; i += 2 * 16) { + counter = incr_counters(rev_counters, counter, 2); + + u = gh_update0(sth, src + i, st->hx[2 - 1 - 0]); + for (j = 1; j < 2; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[2 - 1 - j]); + } + sth->acc = gcm_reduce(u); + + for (j = 0; j < 2; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + } + + /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the + full block authenticated along with the final block, hence < and not <= */ + + for (; i + 16 < src_len; i += 16) { + u = gh_update0(sth, src + i, st->hx[1 - 1 - 0]); + sth->acc = gcm_reduce(u); + encrypt_xor_block(st, dst + i, src + i, REV128(counter)); + counter = ADD64x2(counter, one); + } + + /* Authenticate both the last block of the message and the final block */ + + final_block = REV128(SET64x2(ad_len * 8, src_len * 8)); + STORE32_BE(counter_ + NPUBBYTES, 1); + encrypt(st, mac, counter_); + left = src_len - i; + if (left != 0) { + for (j = 0; j < left; j++) { + last_blocks[j] = src[i + j]; + } + for (; j < 16; j++) { + last_blocks[j] = 0; + } + STORE128(last_blocks + 16, final_block); + gh_ad_blocks(st, sth, last_blocks, 32); + encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter)); + for (j = 0; j < left; j++) { + dst[i + j] = last_blocks[j]; + } + } else { + STORE128(last_blocks, final_block); + gh_ad_blocks(st, sth, last_blocks, 16); + } + STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc))); +} + +int +crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k) +{ + State *st = (State *) (void *) st_; + CRYPTO_ALIGN(16) unsigned char h[16]; + + COMPILER_ASSERT(sizeof *st_ >= sizeof *st); + + expand256(k, st->rkeys); + memset(h, 0, sizeof h); + encrypt(st, h, h); + + precomp_for_block_count(st->hx, h, PC_COUNT); + + return 0; +} + +int +crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long m_len_, const unsigned char *ad, + unsigned long long ad_len_, + const unsigned char *nsec, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + CRYPTO_ALIGN(16) unsigned char j[16]; + size_t gh_required_blocks; + const size_t ad_len = (size_t) ad_len_; + const size_t m_len = (size_t) m_len_; + + (void) nsec; + if (maclen_p != NULL) { + *maclen_p = 0; + } + if (ad_len_ > SODIUM_SIZE_MAX || m_len_ > SODIUM_SIZE_MAX) { + sodium_misuse(); + } + gh_required_blocks = required_blocks(ad_len, m_len); + if (gh_required_blocks == 0) { + memset(mac, 0xd0, ABYTES); + memset(c, 0, m_len); + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + aes_gcm_encrypt_generic(st, &sth, mac, c, m, m_len, ad, ad_len, j); + + if (maclen_p != NULL) { + *maclen_p = ABYTES; + } + return 0; +} + +int +crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m, + unsigned long long m_len, const unsigned char *ad, + unsigned long long ad_len, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + const int ret = crypto_aead_aes256gcm_encrypt_detached(c, c + m_len, NULL, m, m_len, ad, ad_len, + nsec, npub, k); + if (clen_p != NULL) { + if (ret == 0) { + *clen_p = m_len + crypto_aead_aes256gcm_ABYTES; + } else { + *clen_p = 0; + } + } + return ret; +} + +int +crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long m_len, const unsigned char *ad, + unsigned long long ad_len, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; + int ret; + + PREFETCH_WRITE(c); + PREFETCH_READ(m); + PREFETCH_READ(ad); + + crypto_aead_aes256gcm_beforenm(&st, k); + ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, mac, maclen_p, m, m_len, ad, ad_len, + nsec, npub, &st); + sodium_memzero(&st, sizeof st); + + return ret; +} + +int +crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + int ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, c + mlen, NULL, m, mlen, ad, adlen, + nsec, npub, st_); + if (clen_p != NULL) { + *clen_p = mlen + crypto_aead_aes256gcm_ABYTES; + } + return ret; +} + +static int +crypto_aead_aes256gcm_verify_mac(unsigned char *nsec, const unsigned char *c, + unsigned long long c_len_, const unsigned char *mac, + const unsigned char *ad, unsigned long long ad_len_, + const unsigned char *npub, const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + BlockVec final_block; + CRYPTO_ALIGN(16) unsigned char j[16]; + CRYPTO_ALIGN(16) unsigned char computed_mac[16]; + CRYPTO_ALIGN(16) unsigned char last_block[16]; + size_t gh_required_blocks; + size_t left; + const size_t ad_len = (size_t) ad_len_; + const size_t c_len = (size_t) c_len_; + int ret; + + (void) nsec; + if (ad_len_ > SODIUM_SIZE_MAX || c_len_ > SODIUM_SIZE_MAX) { + sodium_misuse(); + } + gh_required_blocks = required_blocks(ad_len, c_len); + if (gh_required_blocks == 0) { + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + gh_ad_blocks(st, &sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, &sth, pad, sizeof pad); + } + + gh_ad_blocks(st, &sth, c, c_len & ~15); + left = c_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, c + c_len - left, left); + gh_ad_blocks(st, &sth, pad, sizeof pad); + } + final_block = REV128(SET64x2(ad_len * 8, c_len * 8)); + STORE32_BE(j + NPUBBYTES, 1); + encrypt(st, computed_mac, j); + STORE128(last_block, final_block); + gh_ad_blocks(st, &sth, last_block, 16); + STORE128(computed_mac, XOR128(LOAD128(computed_mac), REV128(sth.acc))); + + ret = crypto_verify_16(mac, computed_mac); + sodium_memzero(computed_mac, sizeof computed_mac); + + return ret; +} + +int +crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long c_len_, + const unsigned char *mac, const unsigned char *ad, + unsigned long long ad_len_, + const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + CRYPTO_ALIGN(16) unsigned char j[16]; + unsigned char computed_mac[16]; + size_t gh_required_blocks; + const size_t ad_len = (size_t) ad_len_; + const size_t c_len = (size_t) c_len_; + const size_t m_len = c_len; + + (void) nsec; + if (ad_len_ > SODIUM_SIZE_MAX || c_len_ > SODIUM_SIZE_MAX) { + sodium_misuse(); + } + if (m == NULL) { + return crypto_aead_aes256gcm_verify_mac(nsec, c, c_len, mac, ad, ad_len, npub, st_); + } + gh_required_blocks = required_blocks(ad_len, m_len); + if (gh_required_blocks == 0) { + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + aes_gcm_decrypt_generic(st, &sth, computed_mac, m, c, m_len, ad, ad_len, j); + + if (crypto_verify_16(mac, computed_mac) != 0) { + sodium_memzero(computed_mac, sizeof computed_mac); + memset(m, 0xd0, m_len); + return -1; + } + return 0; +} + +int +crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p, + unsigned char *nsec, const unsigned char *c, + unsigned long long clen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + unsigned long long mlen = 0ULL; + int ret = -1; + + if (clen >= ABYTES) { + ret = crypto_aead_aes256gcm_decrypt_detached_afternm( + m, nsec, c, clen - ABYTES, c + clen - ABYTES, ad, adlen, npub, st_); + } + if (mlen_p != NULL) { + if (ret == 0) { + mlen = clen - ABYTES; + } + *mlen_p = mlen; + } + return ret; +} + +int +crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *mac, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const unsigned char *k) +{ + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; + + PREFETCH_WRITE(m); + PREFETCH_READ(c); + PREFETCH_READ(ad); + + crypto_aead_aes256gcm_beforenm(&st, k); + + return crypto_aead_aes256gcm_decrypt_detached_afternm( + m, nsec, c, clen, mac, ad, adlen, npub, (const crypto_aead_aes256gcm_state *) &st); +} + +int +crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; + int ret; + + PREFETCH_WRITE(m); + PREFETCH_READ(c); + PREFETCH_READ(ad); + + crypto_aead_aes256gcm_beforenm(&st, k); + + ret = crypto_aead_aes256gcm_decrypt_afternm(m, mlen_p, nsec, c, clen, ad, adlen, npub, + (const crypto_aead_aes256gcm_state *) &st); + sodium_memzero(&st, sizeof st); + + return ret; +} + +int +crypto_aead_aes256gcm_is_available(void) +{ + return sodium_runtime_has_armcrypto(); +} + +#endif diff --git a/src/libsodium/include/Makefile.in b/src/libsodium/include/Makefile.in index b19a7e617d..3104fbbaf3 100644 --- a/src/libsodium/include/Makefile.in +++ b/src/libsodium/include/Makefile.in @@ -197,6 +197,7 @@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CFLAGS_AESNI = @CFLAGS_AESNI@ +CFLAGS_ARMCRYPTO = @CFLAGS_ARMCRYPTO@ CFLAGS_AVX = @CFLAGS_AVX@ CFLAGS_AVX2 = @CFLAGS_AVX2@ CFLAGS_AVX512F = @CFLAGS_AVX512F@ diff --git a/src/libsodium/include/sodium/runtime.h b/src/libsodium/include/sodium/runtime.h index 7f15d58e7c..c1cec853eb 100644 --- a/src/libsodium/include/sodium/runtime.h +++ b/src/libsodium/include/sodium/runtime.h @@ -11,6 +11,9 @@ extern "C" { SODIUM_EXPORT_WEAK int sodium_runtime_has_neon(void); +SODIUM_EXPORT_WEAK +int sodium_runtime_has_armcrypto(void); + SODIUM_EXPORT_WEAK int sodium_runtime_has_sse2(void); diff --git a/src/libsodium/sodium/runtime.c b/src/libsodium/sodium/runtime.c index 9dfe54f849..7f0997d494 100644 --- a/src/libsodium/sodium/runtime.c +++ b/src/libsodium/sodium/runtime.c @@ -3,6 +3,14 @@ #ifdef HAVE_ANDROID_GETCPUFEATURES # include #endif +#ifdef __APPLE__ +# include +# include +# include +#endif +#ifdef HAVE_SYS_AUXV_H +# include +#endif #include "private/common.h" #include "runtime.h" @@ -10,6 +18,7 @@ typedef struct CPUFeatures_ { int initialized; int has_neon; + int has_armcrypto; int has_sse2; int has_sse3; int has_ssse3; @@ -48,25 +57,94 @@ static CPUFeatures _cpu_features; static int _sodium_runtime_arm_cpu_features(CPUFeatures * const cpu_features) { -#ifndef __arm__ cpu_features->has_neon = 0; - return -1; -#else -# ifdef __APPLE__ -# ifdef __ARM_NEON__ + cpu_features->has_armcrypto = 0; + +#ifndef __ARM_ARCH + return -1; /* LCOV_EXCL_LINE */ +#endif + +#if defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64) cpu_features->has_neon = 1; -# else - cpu_features->has_neon = 0; -# endif -# elif defined(HAVE_ANDROID_GETCPUFEATURES) && \ - defined(ANDROID_CPU_ARM_FEATURE_NEON) +#elif defined(HAVE_ANDROID_GETCPUFEATURES) && defined(ANDROID_CPU_ARM_FEATURE_NEON) cpu_features->has_neon = (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0x0; -# else - cpu_features->has_neon = 0; +#elif (defined(__aarch64__) || defined(_M_ARM64)) && defined(AT_HWCAP) +# ifdef HAVE_GETAUXVAL + cpu_features->has_neon = (getauxval(AT_HWCAP) & (1L << 1)) != 0; +# elif defined(HAVE_ELF_AUX_INFO) + { + unsigned long buf; + if (elf_aux_info(AT_HWCAP, (void *) &buf, (int) sizeof buf) == 0) { + cpu_features->has_neon = (buf & (1L << 1)) != 0; + } + } +# endif +#elif defined(__arm__) && defined(AT_HWCAP) +# ifdef HAVE_GETAUXVAL + cpu_features->has_neon = (getauxval(AT_HWCAP) & (1L << 12)) != 0; +# elif defined(HAVE_ELF_AUX_INFO) + { + unsigned long buf; + if (elf_aux_info(AT_HWCAP, (void *) &buf, (int) sizeof buf) == 0) { + cpu_features->has_neon = (buf & (1L << 12)) != 0; + } + } # endif - return 0; #endif + + if (cpu_features->has_neon == 0) { + return 0; + } + +#if __ARM_FEATURE_CRYPTO + cpu_features->has_armcrypto = 1; +#elif defined(_M_ARM64) + cpu_features->has_armcrypto = 1; /* assuming all CPUs supported by ARM Windows have the crypto extensions */ +#elif defined(__APPLE__) && defined(CPU_TYPE_ARM64) && defined(CPU_SUBTYPE_ARM64E) + { + cpu_type_t cpu_type; + cpu_subtype_t cpu_subtype; + size_t cpu_type_len = sizeof cpu_type; + size_t cpu_subtype_len = sizeof cpu_subtype; + + if (sysctlbyname("hw.cputype", &cpu_type, &cpu_type_len, + NULL, 0) == 0 && cpu_type == CPU_TYPE_ARM64 && + sysctlbyname("hw.cpusubtype", &cpu_subtype, &cpu_subtype_len, + NULL, 0) == 0 && + (cpu_subtype == CPU_SUBTYPE_ARM64E || + cpu_subtype == CPU_SUBTYPE_ARM64_V8)) { + cpu_features->has_armcrypto = 1; + } + } +#elif defined(HAVE_ANDROID_GETCPUFEATURES) && defined(ANDROID_CPU_ARM_FEATURE_AES) + cpu_features->has_armcrypto = + (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_AES) != 0x0; +#elif (defined(__aarch64__) || defined(_M_ARM64)) && defined(AT_HWCAP) +# ifdef HAVE_GETAUXVAL + cpu_features->has_armcrypto = (getauxval(AT_HWCAP) & (1L << 3)) != 0; +# elif defined(HAVE_ELF_AUX_INFO) + { + unsigned long buf; + if (elf_aux_info(AT_HWCAP, (void *) &buf, (int) sizeof buf) == 0) { + cpu_features->has_armcrypto = (buf & (1L << 3)) != 0; + } + } +# endif +#elif defined(__arm__) && defined(AT_HWCAP2) +# ifdef HAVE_GETAUXVAL + cpu_features->has_armcrypto = (getauxval(AT_HWCAP2) & (1L << 0)) != 0; +# elif defined(HAVE_ELF_AUX_INFO) + { + unsigned long buf; + if (elf_aux_info(AT_HWCAP2, (void *) &buf, (int) sizeof buf) == 0) { + cpu_features->has_armcrypto = (buf & (1L << 0)) != 0; + } + } +# endif +#endif + + return 0; } static void @@ -116,11 +194,10 @@ static int _sodium_runtime_intel_cpu_features(CPUFeatures * const cpu_features) { unsigned int cpu_info[4]; - unsigned int id; uint32_t xcr0 = 0U; _cpuid(cpu_info, 0x0); - if ((id = cpu_info[0]) == 0U) { + if (cpu_info[0] == 0U) { return -1; /* LCOV_EXCL_LINE */ } _cpuid(cpu_info, 0x00000001); @@ -248,6 +325,12 @@ sodium_runtime_has_neon(void) return _cpu_features.has_neon; } +int +sodium_runtime_has_armcrypto(void) +{ + return _cpu_features.has_armcrypto; +} + int sodium_runtime_has_sse2(void) { diff --git a/test/Makefile.in b/test/Makefile.in index 6dbbab81bb..1f2b43d12f 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -208,6 +208,7 @@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CFLAGS_AESNI = @CFLAGS_AESNI@ +CFLAGS_ARMCRYPTO = @CFLAGS_ARMCRYPTO@ CFLAGS_AVX = @CFLAGS_AVX@ CFLAGS_AVX2 = @CFLAGS_AVX2@ CFLAGS_AVX512F = @CFLAGS_AVX512F@ diff --git a/test/default/Makefile.in b/test/default/Makefile.in index 88c37c9dc9..259edeb4e7 100644 --- a/test/default/Makefile.in +++ b/test/default/Makefile.in @@ -752,6 +752,7 @@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CFLAGS_AESNI = @CFLAGS_AESNI@ +CFLAGS_ARMCRYPTO = @CFLAGS_ARMCRYPTO@ CFLAGS_AVX = @CFLAGS_AVX@ CFLAGS_AVX2 = @CFLAGS_AVX2@ CFLAGS_AVX512F = @CFLAGS_AVX512F@