chacha: Move 32-bit Arm CPU dispatch from assembly to C This also removes handling of the empty input, to match what was done for aarch64. (The C code ensures the function is never called in this case.) Bug: 673 Change-Id: I7e868a9eb0b022c22c3f4ba2c8782ae1464c5a52 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/64967 Auto-Submit: David Benjamin <davidben@google.com> Reviewed-by: Bob Beck <bbe@google.com> Commit-Queue: Bob Beck <bbe@google.com>
diff --git a/crypto/chacha/asm/chacha-armv4.pl b/crypto/chacha/asm/chacha-armv4.pl index 24fbb84..fd92fdb 100755 --- a/crypto/chacha/asm/chacha-armv4.pl +++ b/crypto/chacha/asm/chacha-armv4.pl
@@ -196,39 +196,14 @@ .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral .Lone: .long 1,0,0,0 -#if __ARM_MAX_ARCH__>=7 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-.Lsigma -#else -.word -1 -#endif -.globl ChaCha20_ctr32 -.type ChaCha20_ctr32,%function +.globl ChaCha20_ctr32_nohw +.type ChaCha20_ctr32_nohw,%function .align 5 -ChaCha20_ctr32: -.LChaCha20_ctr32: +ChaCha20_ctr32_nohw: ldr r12,[sp,#0] @ pull pointer to counter and nonce stmdb sp!,{r0-r2,r4-r11,lr} adr r14,.Lsigma - cmp r2,#0 @ len==0? -#ifdef __thumb2__ - itt eq -#endif - addeq sp,sp,#4*3 - beq .Lno_data -#if __ARM_MAX_ARCH__>=7 - cmp r2,#192 @ test len - bls .Lshort - ldr r4,[r14,#32] - ldr r4,[r14,r4] -# ifdef __APPLE__ - ldr r4,[r4] -# endif - tst r4,#ARMV7_NEON - bne .LChaCha20_neon -.Lshort: -#endif ldmia r12,{r4-r7} @ load counter and nonce sub sp,sp,#4*(16) @ off-load area stmdb sp!,{r4-r7} @ copy counter and nonce @@ -621,9 +596,8 @@ .Ldone: add sp,sp,#4*(32+3) -.Lno_data: ldmia sp!,{r4-r11,pc} -.size ChaCha20_ctr32,.-ChaCha20_ctr32 +.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw ___ {{{ @@ -665,12 +639,12 @@ .arch armv7-a .fpu neon -.type ChaCha20_neon,%function +.globl ChaCha20_ctr32_neon +.type ChaCha20_ctr32_neon,%function .align 5 -ChaCha20_neon: +ChaCha20_ctr32_neon: ldr r12,[sp,#0] @ pull pointer to counter and nonce stmdb sp!,{r0-r2,r4-r11,lr} -.LChaCha20_neon: adr r14,.Lsigma vstmdb sp!,{d8-d15} @ ABI spec says so stmdb sp!,{r0-r3} @@ -1145,8 +1119,7 @@ vldmia sp,{d8-d15} add sp,sp,#4*(16+3) ldmia sp!,{r4-r11,pc} -.size ChaCha20_neon,.-ChaCha20_neon -.comm OPENSSL_armcap_P,4,4 +.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon #endif ___ }}}
diff --git a/crypto/chacha/internal.h b/crypto/chacha/internal.h index a8ae3cb..d31a044 100644 --- a/crypto/chacha/internal.h +++ b/crypto/chacha/internal.h
@@ -30,11 +30,12 @@ const uint8_t nonce[16]); #if !defined(OPENSSL_NO_ASM) && \ - (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM)) + (defined(OPENSSL_X86) || defined(OPENSSL_X86_64)) #define CHACHA20_ASM -#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) +#elif !defined(OPENSSL_NO_ASM) && \ + (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) #define CHACHA20_ASM_NOHW