chacha: Move 32-bit Arm CPU dispatch from assembly to C
This also removes handling of the empty input, to match what was done
for aarch64. (The C code ensures the function is never called in this
case.)
Bug: 673
Change-Id: I7e868a9eb0b022c22c3f4ba2c8782ae1464c5a52
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/64967
Auto-Submit: David Benjamin <davidben@google.com>
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: Bob Beck <bbe@google.com>
diff --git a/crypto/chacha/asm/chacha-armv4.pl b/crypto/chacha/asm/chacha-armv4.pl
index 24fbb84..fd92fdb 100755
--- a/crypto/chacha/asm/chacha-armv4.pl
+++ b/crypto/chacha/asm/chacha-armv4.pl
@@ -196,39 +196,14 @@
.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral
.Lone:
.long 1,0,0,0
-#if __ARM_MAX_ARCH__>=7
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-.Lsigma
-#else
-.word -1
-#endif
-.globl ChaCha20_ctr32
-.type ChaCha20_ctr32,%function
+.globl ChaCha20_ctr32_nohw
+.type ChaCha20_ctr32_nohw,%function
.align 5
-ChaCha20_ctr32:
-.LChaCha20_ctr32:
+ChaCha20_ctr32_nohw:
ldr r12,[sp,#0] @ pull pointer to counter and nonce
stmdb sp!,{r0-r2,r4-r11,lr}
adr r14,.Lsigma
- cmp r2,#0 @ len==0?
-#ifdef __thumb2__
- itt eq
-#endif
- addeq sp,sp,#4*3
- beq .Lno_data
-#if __ARM_MAX_ARCH__>=7
- cmp r2,#192 @ test len
- bls .Lshort
- ldr r4,[r14,#32]
- ldr r4,[r14,r4]
-# ifdef __APPLE__
- ldr r4,[r4]
-# endif
- tst r4,#ARMV7_NEON
- bne .LChaCha20_neon
-.Lshort:
-#endif
ldmia r12,{r4-r7} @ load counter and nonce
sub sp,sp,#4*(16) @ off-load area
stmdb sp!,{r4-r7} @ copy counter and nonce
@@ -621,9 +596,8 @@
.Ldone:
add sp,sp,#4*(32+3)
-.Lno_data:
ldmia sp!,{r4-r11,pc}
-.size ChaCha20_ctr32,.-ChaCha20_ctr32
+.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw
___
{{{
@@ -665,12 +639,12 @@
.arch armv7-a
.fpu neon
-.type ChaCha20_neon,%function
+.globl ChaCha20_ctr32_neon
+.type ChaCha20_ctr32_neon,%function
.align 5
-ChaCha20_neon:
+ChaCha20_ctr32_neon:
ldr r12,[sp,#0] @ pull pointer to counter and nonce
stmdb sp!,{r0-r2,r4-r11,lr}
-.LChaCha20_neon:
adr r14,.Lsigma
vstmdb sp!,{d8-d15} @ ABI spec says so
stmdb sp!,{r0-r3}
@@ -1145,8 +1119,7 @@
vldmia sp,{d8-d15}
add sp,sp,#4*(16+3)
ldmia sp!,{r4-r11,pc}
-.size ChaCha20_neon,.-ChaCha20_neon
-.comm OPENSSL_armcap_P,4,4
+.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon
#endif
___
}}}
diff --git a/crypto/chacha/internal.h b/crypto/chacha/internal.h
index a8ae3cb..d31a044 100644
--- a/crypto/chacha/internal.h
+++ b/crypto/chacha/internal.h
@@ -30,11 +30,12 @@
const uint8_t nonce[16]);
#if !defined(OPENSSL_NO_ASM) && \
- (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM))
+ (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
#define CHACHA20_ASM
-#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64)
+#elif !defined(OPENSSL_NO_ASM) && \
+ (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
#define CHACHA20_ASM_NOHW