chacha: Move ARMv8 OPENSSL_armcap_P dispatching from assembly to C.
Take a step towards removing all uses of OPENSSL_armcap_P from the
ARMv8 assembly code.
Change-Id: Ic1a75e107017b33f3e88b8eae503b788e37ca70a
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/64207
Reviewed-by: Bob Beck <bbe@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl
index a519b5f..6818da2 100755
--- a/crypto/chacha/asm/chacha-armv8.pl
+++ b/crypto/chacha/asm/chacha-armv8.pl
@@ -122,9 +122,6 @@
$code.=<<___;
#include <openssl/arm_arch.h>
-.extern OPENSSL_armcap_P
-.hidden OPENSSL_armcap_P
-
.section .rodata
.align 5
@@ -136,24 +133,10 @@
.text
-.globl ChaCha20_ctr32
-.type ChaCha20_ctr32,%function
+.globl ChaCha20_ctr32_nohw
+.type ChaCha20_ctr32_nohw,%function
.align 5
-ChaCha20_ctr32:
- AARCH64_VALID_CALL_TARGET
- cbz $len,.Labort
-#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10
- adrp @x[0],:pg_hi21_nc:OPENSSL_armcap_P
-#else
- adrp @x[0],:pg_hi21:OPENSSL_armcap_P
-#endif
- cmp $len,#192
- b.lo .Lshort
- ldr w17,[@x[0],:lo12:OPENSSL_armcap_P]
- tst w17,#ARMV7_NEON
- b.ne ChaCha20_neon
-
-.Lshort:
+ChaCha20_ctr32_nohw:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@@ -276,7 +259,6 @@
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
-.Labort:
ret
.align 4
@@ -334,7 +316,7 @@
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
-.size ChaCha20_ctr32,.-ChaCha20_ctr32
+.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw
___
{{{
@@ -375,9 +357,10 @@
$code.=<<___;
-.type ChaCha20_neon,%function
+.globl ChaCha20_ctr32_neon
+.type ChaCha20_ctr32_neon,%function
.align 5
-ChaCha20_neon:
+ChaCha20_ctr32_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@@ -690,7 +673,7 @@
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
-.size ChaCha20_neon,.-ChaCha20_neon
+.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon
___
{
my ($T0,$T1,$T2,$T3,$T4,$T5)=@K;
diff --git a/crypto/chacha/chacha.c b/crypto/chacha/chacha.c
index a4d88c0..52102ab 100644
--- a/crypto/chacha/chacha.c
+++ b/crypto/chacha/chacha.c
@@ -60,7 +60,22 @@
OPENSSL_memcpy(&out[16], &x[12], sizeof(uint32_t) * 4);
}
-#if defined(CHACHA20_ASM)
+#if defined(CHACHA20_ASM_NOHW)
+static void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]) {
+#if defined(CHACHA20_ASM_NEON)
+ if (ChaCha20_ctr32_neon_capable(in_len)) {
+ ChaCha20_ctr32_neon(out, in, in_len, key, counter);
+ return;
+ }
+#endif
+ if (in_len > 0) {
+ ChaCha20_ctr32_nohw(out, in, in_len, key, counter);
+ }
+}
+#endif
+
+#if defined(CHACHA20_ASM) || defined(CHACHA20_ASM_NOHW)
void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
const uint8_t key[32], const uint8_t nonce[12],
diff --git a/crypto/chacha/chacha_test.cc b/crypto/chacha/chacha_test.cc
index d4e5332..6c03502 100644
--- a/crypto/chacha/chacha_test.cc
+++ b/crypto/chacha/chacha_test.cc
@@ -347,7 +347,25 @@
}
}
-#if defined(CHACHA20_ASM) && defined(SUPPORTS_ABI_TEST)
+#if defined(SUPPORTS_ABI_TEST)
+
+static void check_abi(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]) {
+#if defined(CHACHA20_ASM)
+ CHECK_ABI(ChaCha20_ctr32, out, in, in_len, key, counter);
+#endif
+#if defined(CHACHA20_ASM_NEON)
+ if (ChaCha20_ctr32_neon_capable(in_len)) {
+ CHECK_ABI(ChaCha20_ctr32_neon, out, in, in_len, key, counter);
+ }
+#endif
+#if defined(CHACHA20_ASM_NOHW)
+ if (in_len > 0) {
+ CHECK_ABI(ChaCha20_ctr32_nohw, out, in, in_len, key, counter);
+ }
+#endif
+}
+
TEST(ChaChaTest, ABI) {
uint32_t key[8];
OPENSSL_memcpy(key, kKey, sizeof(key));
@@ -357,14 +375,15 @@
auto buf = std::make_unique<uint8_t[]>(sizeof(kInput));
for (size_t len = 0; len <= 32; len++) {
SCOPED_TRACE(len);
- CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len, key, kCounterNonce);
+ check_abi(buf.get(), kInput, len, key, kCounterNonce);
}
for (size_t len : {32 * 2, 32 * 4, 32 * 8, 32 * 16, 32 * 24}) {
SCOPED_TRACE(len);
- CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len, key, kCounterNonce);
+ check_abi(buf.get(), kInput, len, key, kCounterNonce);
// Cover the partial block paths.
- CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len + 15, key, kCounterNonce);
+ check_abi(buf.get(), kInput, len + 15, key, kCounterNonce);
}
}
-#endif // CHACHA20_ASM && SUPPORTS_ABI_TEST
+
+#endif // SUPPORTS_ABI_TEST
diff --git a/crypto/chacha/internal.h b/crypto/chacha/internal.h
index 5f442ec..a8ae3cb 100644
--- a/crypto/chacha/internal.h
+++ b/crypto/chacha/internal.h
@@ -17,6 +17,8 @@
#include <openssl/base.h>
+#include "../internal.h"
+
#if defined(__cplusplus)
extern "C" {
#endif
@@ -27,11 +29,24 @@
void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],
const uint8_t nonce[16]);
-#if !defined(OPENSSL_NO_ASM) && \
- (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
- defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
+#if !defined(OPENSSL_NO_ASM) && \
+ (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM))
+
#define CHACHA20_ASM
+#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64)
+
+#define CHACHA20_ASM_NOHW
+
+#define CHACHA20_ASM_NEON
+OPENSSL_INLINE int ChaCha20_ctr32_neon_capable(size_t len) {
+ return (len >= 192) && CRYPTO_is_NEON_capable();
+}
+void ChaCha20_ctr32_neon(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]);
+#endif
+
+#if defined(CHACHA20_ASM)
// ChaCha20_ctr32 encrypts |in_len| bytes from |in| and writes the result to
// |out|. If |in| and |out| alias, they must be equal.
//
@@ -44,6 +59,12 @@
const uint32_t key[8], const uint32_t counter[4]);
#endif
+#if defined(CHACHA20_ASM_NOHW)
+// ChaCha20_ctr32_nohw is like |ChaCha20_ctr32| except |in_len| must be nonzero.
+void ChaCha20_ctr32_nohw(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]);
+#endif
+
#if defined(__cplusplus)
} // extern C