chacha: Move ARMv8 OPENSSL_armcap_P dispatching from assembly to C. Take a step towards removing all uses of OPENSSL_armcap_P from the ARMv8 assembly code. Change-Id: Ic1a75e107017b33f3e88b8eae503b788e37ca70a Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/64207 Reviewed-by: Bob Beck <bbe@google.com> Reviewed-by: David Benjamin <davidben@google.com> Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl index a519b5f..6818da2 100755 --- a/crypto/chacha/asm/chacha-armv8.pl +++ b/crypto/chacha/asm/chacha-armv8.pl
@@ -122,9 +122,6 @@ $code.=<<___; #include <openssl/arm_arch.h> -.extern OPENSSL_armcap_P -.hidden OPENSSL_armcap_P - .section .rodata .align 5 @@ -136,24 +133,10 @@ .text -.globl ChaCha20_ctr32 -.type ChaCha20_ctr32,%function +.globl ChaCha20_ctr32_nohw +.type ChaCha20_ctr32_nohw,%function .align 5 -ChaCha20_ctr32: - AARCH64_VALID_CALL_TARGET - cbz $len,.Labort -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp @x[0],:pg_hi21_nc:OPENSSL_armcap_P -#else - adrp @x[0],:pg_hi21:OPENSSL_armcap_P -#endif - cmp $len,#192 - b.lo .Lshort - ldr w17,[@x[0],:lo12:OPENSSL_armcap_P] - tst w17,#ARMV7_NEON - b.ne ChaCha20_neon - -.Lshort: +ChaCha20_ctr32_nohw: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -276,7 +259,6 @@ ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 AARCH64_VALIDATE_LINK_REGISTER -.Labort: ret .align 4 @@ -334,7 +316,7 @@ ldp x29,x30,[sp],#96 AARCH64_VALIDATE_LINK_REGISTER ret -.size ChaCha20_ctr32,.-ChaCha20_ctr32 +.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw ___ {{{ @@ -375,9 +357,10 @@ $code.=<<___; -.type ChaCha20_neon,%function +.globl ChaCha20_ctr32_neon +.type ChaCha20_ctr32_neon,%function .align 5 -ChaCha20_neon: +ChaCha20_ctr32_neon: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -690,7 +673,7 @@ ldp x29,x30,[sp],#96 AARCH64_VALIDATE_LINK_REGISTER ret -.size ChaCha20_neon,.-ChaCha20_neon +.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon ___ { my ($T0,$T1,$T2,$T3,$T4,$T5)=@K;
diff --git a/crypto/chacha/chacha.c b/crypto/chacha/chacha.c index a4d88c0..52102ab 100644 --- a/crypto/chacha/chacha.c +++ b/crypto/chacha/chacha.c
@@ -60,7 +60,22 @@ OPENSSL_memcpy(&out[16], &x[12], sizeof(uint32_t) * 4); } -#if defined(CHACHA20_ASM) +#if defined(CHACHA20_ASM_NOHW) +static void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len, + const uint32_t key[8], const uint32_t counter[4]) { +#if defined(CHACHA20_ASM_NEON) + if (ChaCha20_ctr32_neon_capable(in_len)) { + ChaCha20_ctr32_neon(out, in, in_len, key, counter); + return; + } +#endif + if (in_len > 0) { + ChaCha20_ctr32_nohw(out, in, in_len, key, counter); + } +} +#endif + +#if defined(CHACHA20_ASM) || defined(CHACHA20_ASM_NOHW) void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, const uint8_t key[32], const uint8_t nonce[12],
diff --git a/crypto/chacha/chacha_test.cc b/crypto/chacha/chacha_test.cc index d4e5332..6c03502 100644 --- a/crypto/chacha/chacha_test.cc +++ b/crypto/chacha/chacha_test.cc
@@ -347,7 +347,25 @@ } } -#if defined(CHACHA20_ASM) && defined(SUPPORTS_ABI_TEST) +#if defined(SUPPORTS_ABI_TEST) + +static void check_abi(uint8_t *out, const uint8_t *in, size_t in_len, + const uint32_t key[8], const uint32_t counter[4]) { +#if defined(CHACHA20_ASM) + CHECK_ABI(ChaCha20_ctr32, out, in, in_len, key, counter); +#endif +#if defined(CHACHA20_ASM_NEON) + if (ChaCha20_ctr32_neon_capable(in_len)) { + CHECK_ABI(ChaCha20_ctr32_neon, out, in, in_len, key, counter); + } +#endif +#if defined(CHACHA20_ASM_NOHW) + if (in_len > 0) { + CHECK_ABI(ChaCha20_ctr32_nohw, out, in, in_len, key, counter); + } +#endif +} + TEST(ChaChaTest, ABI) { uint32_t key[8]; OPENSSL_memcpy(key, kKey, sizeof(key)); @@ -357,14 +375,15 @@ auto buf = std::make_unique<uint8_t[]>(sizeof(kInput)); for (size_t len = 0; len <= 32; len++) { SCOPED_TRACE(len); - CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len, key, kCounterNonce); + check_abi(buf.get(), kInput, len, key, kCounterNonce); } for (size_t len : {32 * 2, 32 * 4, 32 * 8, 32 * 16, 32 * 24}) { SCOPED_TRACE(len); - CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len, key, kCounterNonce); + check_abi(buf.get(), kInput, len, key, kCounterNonce); // Cover the partial block paths. - CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len + 15, key, kCounterNonce); + check_abi(buf.get(), kInput, len + 15, key, kCounterNonce); } } -#endif // CHACHA20_ASM && SUPPORTS_ABI_TEST + +#endif // SUPPORTS_ABI_TEST
diff --git a/crypto/chacha/internal.h b/crypto/chacha/internal.h index 5f442ec..a8ae3cb 100644 --- a/crypto/chacha/internal.h +++ b/crypto/chacha/internal.h
@@ -17,6 +17,8 @@ #include <openssl/base.h> +#include "../internal.h" + #if defined(__cplusplus) extern "C" { #endif @@ -27,11 +29,24 @@ void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32], const uint8_t nonce[16]); -#if !defined(OPENSSL_NO_ASM) && \ - (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ - defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) +#if !defined(OPENSSL_NO_ASM) && \ + (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM)) + #define CHACHA20_ASM +#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) + +#define CHACHA20_ASM_NOHW + +#define CHACHA20_ASM_NEON +OPENSSL_INLINE int ChaCha20_ctr32_neon_capable(size_t len) { + return (len >= 192) && CRYPTO_is_NEON_capable(); +} +void ChaCha20_ctr32_neon(uint8_t *out, const uint8_t *in, size_t in_len, + const uint32_t key[8], const uint32_t counter[4]); +#endif + +#if defined(CHACHA20_ASM) // ChaCha20_ctr32 encrypts |in_len| bytes from |in| and writes the result to // |out|. If |in| and |out| alias, they must be equal. // @@ -44,6 +59,12 @@ const uint32_t key[8], const uint32_t counter[4]); #endif +#if defined(CHACHA20_ASM_NOHW) +// ChaCha20_ctr32_nohw is like |ChaCha20_ctr32| except |in_len| must be nonzero. +void ChaCha20_ctr32_nohw(uint8_t *out, const uint8_t *in, size_t in_len, + const uint32_t key[8], const uint32_t counter[4]); +#endif + #if defined(__cplusplus) } // extern C