chacha: Move ARMv8 OPENSSL_armcap_P dispatching from assembly to C.

Take a step towards removing all uses of OPENSSL_armcap_P from the
ARMv8 assembly code.

Change-Id: Ic1a75e107017b33f3e88b8eae503b788e37ca70a
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/64207
Reviewed-by: Bob Beck <bbe@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl
index a519b5f..6818da2 100755
--- a/crypto/chacha/asm/chacha-armv8.pl
+++ b/crypto/chacha/asm/chacha-armv8.pl
@@ -122,9 +122,6 @@
 $code.=<<___;
 #include <openssl/arm_arch.h>
 
-.extern	OPENSSL_armcap_P
-.hidden	OPENSSL_armcap_P
-
 .section .rodata
 
 .align	5
@@ -136,24 +133,10 @@
 
 .text
 
-.globl	ChaCha20_ctr32
-.type	ChaCha20_ctr32,%function
+.globl	ChaCha20_ctr32_nohw
+.type	ChaCha20_ctr32_nohw,%function
 .align	5
-ChaCha20_ctr32:
-	AARCH64_VALID_CALL_TARGET
-	cbz	$len,.Labort
-#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10
-	adrp	@x[0],:pg_hi21_nc:OPENSSL_armcap_P
-#else
-	adrp	@x[0],:pg_hi21:OPENSSL_armcap_P
-#endif
-	cmp	$len,#192
-	b.lo	.Lshort
-	ldr	w17,[@x[0],:lo12:OPENSSL_armcap_P]
-	tst	w17,#ARMV7_NEON
-	b.ne	ChaCha20_neon
-
-.Lshort:
+ChaCha20_ctr32_nohw:
 	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
@@ -276,7 +259,6 @@
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
 	AARCH64_VALIDATE_LINK_REGISTER
-.Labort:
 	ret
 
 .align	4
@@ -334,7 +316,7 @@
 	ldp	x29,x30,[sp],#96
 	AARCH64_VALIDATE_LINK_REGISTER
 	ret
-.size	ChaCha20_ctr32,.-ChaCha20_ctr32
+.size	ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw
 ___
 
 {{{
@@ -375,9 +357,10 @@
 
 $code.=<<___;
 
-.type	ChaCha20_neon,%function
+.globl	ChaCha20_ctr32_neon
+.type	ChaCha20_ctr32_neon,%function
 .align	5
-ChaCha20_neon:
+ChaCha20_ctr32_neon:
 	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
@@ -690,7 +673,7 @@
 	ldp	x29,x30,[sp],#96
 	AARCH64_VALIDATE_LINK_REGISTER
 	ret
-.size	ChaCha20_neon,.-ChaCha20_neon
+.size	ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon
 ___
 {
 my ($T0,$T1,$T2,$T3,$T4,$T5)=@K;
diff --git a/crypto/chacha/chacha.c b/crypto/chacha/chacha.c
index a4d88c0..52102ab 100644
--- a/crypto/chacha/chacha.c
+++ b/crypto/chacha/chacha.c
@@ -60,7 +60,22 @@
   OPENSSL_memcpy(&out[16], &x[12], sizeof(uint32_t) * 4);
 }
 
-#if defined(CHACHA20_ASM)
+#if defined(CHACHA20_ASM_NOHW)
+static void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len,
+                           const uint32_t key[8], const uint32_t counter[4]) {
+#if defined(CHACHA20_ASM_NEON)
+  if (ChaCha20_ctr32_neon_capable(in_len)) {
+    ChaCha20_ctr32_neon(out, in, in_len, key, counter);
+    return;
+  }
+#endif
+  if (in_len > 0) {
+    ChaCha20_ctr32_nohw(out, in, in_len, key, counter);
+  }
+}
+#endif
+
+#if defined(CHACHA20_ASM) || defined(CHACHA20_ASM_NOHW)
 
 void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
                       const uint8_t key[32], const uint8_t nonce[12],
diff --git a/crypto/chacha/chacha_test.cc b/crypto/chacha/chacha_test.cc
index d4e5332..6c03502 100644
--- a/crypto/chacha/chacha_test.cc
+++ b/crypto/chacha/chacha_test.cc
@@ -347,7 +347,25 @@
   }
 }
 
-#if defined(CHACHA20_ASM) && defined(SUPPORTS_ABI_TEST)
+#if defined(SUPPORTS_ABI_TEST)
+
+static void check_abi(uint8_t *out, const uint8_t *in, size_t in_len,
+                      const uint32_t key[8], const uint32_t counter[4]) {
+#if defined(CHACHA20_ASM)
+  CHECK_ABI(ChaCha20_ctr32, out, in, in_len, key, counter);
+#endif
+#if defined(CHACHA20_ASM_NEON)
+  if (ChaCha20_ctr32_neon_capable(in_len)) {
+    CHECK_ABI(ChaCha20_ctr32_neon, out, in, in_len, key, counter);
+  }
+#endif
+#if defined(CHACHA20_ASM_NOHW)
+  if (in_len > 0) {
+    CHECK_ABI(ChaCha20_ctr32_nohw, out, in, in_len, key, counter);
+  }
+#endif
+}
+
 TEST(ChaChaTest, ABI) {
   uint32_t key[8];
   OPENSSL_memcpy(key, kKey, sizeof(key));
@@ -357,14 +375,15 @@
   auto buf = std::make_unique<uint8_t[]>(sizeof(kInput));
   for (size_t len = 0; len <= 32; len++) {
     SCOPED_TRACE(len);
-    CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len, key, kCounterNonce);
+    check_abi(buf.get(), kInput, len, key, kCounterNonce);
   }
 
   for (size_t len : {32 * 2, 32 * 4, 32 * 8, 32 * 16, 32 * 24}) {
     SCOPED_TRACE(len);
-    CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len, key, kCounterNonce);
+    check_abi(buf.get(), kInput, len, key, kCounterNonce);
     // Cover the partial block paths.
-    CHECK_ABI(ChaCha20_ctr32, buf.get(), kInput, len + 15, key, kCounterNonce);
+    check_abi(buf.get(), kInput, len + 15, key, kCounterNonce);
   }
 }
-#endif  // CHACHA20_ASM && SUPPORTS_ABI_TEST
+
+#endif  // SUPPORTS_ABI_TEST
diff --git a/crypto/chacha/internal.h b/crypto/chacha/internal.h
index 5f442ec..a8ae3cb 100644
--- a/crypto/chacha/internal.h
+++ b/crypto/chacha/internal.h
@@ -17,6 +17,8 @@
 
 #include <openssl/base.h>
 
+#include "../internal.h"
+
 #if defined(__cplusplus)
 extern "C" {
 #endif
@@ -27,11 +29,24 @@
 void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],
                       const uint8_t nonce[16]);
 
-#if !defined(OPENSSL_NO_ASM) &&                         \
-    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
-     defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
+#if !defined(OPENSSL_NO_ASM) && \
+    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM))
+
 #define CHACHA20_ASM
 
+#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64)
+
+#define CHACHA20_ASM_NOHW
+
+#define CHACHA20_ASM_NEON
+OPENSSL_INLINE int ChaCha20_ctr32_neon_capable(size_t len) {
+  return (len >= 192) && CRYPTO_is_NEON_capable();
+}
+void ChaCha20_ctr32_neon(uint8_t *out, const uint8_t *in, size_t in_len,
+                         const uint32_t key[8], const uint32_t counter[4]);
+#endif
+
+#if defined(CHACHA20_ASM)
 // ChaCha20_ctr32 encrypts |in_len| bytes from |in| and writes the result to
 // |out|. If |in| and |out| alias, they must be equal.
 //
@@ -44,6 +59,12 @@
                     const uint32_t key[8], const uint32_t counter[4]);
 #endif
 
+#if defined(CHACHA20_ASM_NOHW)
+// ChaCha20_ctr32_nohw is like |ChaCha20_ctr32| except |in_len| must be nonzero.
+void ChaCha20_ctr32_nohw(uint8_t *out, const uint8_t *in, size_t in_len,
+                         const uint32_t key[8], const uint32_t counter[4]);
+#endif
+
 
 #if defined(__cplusplus)
 }  // extern C