Assume the Arm assembler can handle ADR It's 2023. We shouldn't need to be counting offsets from PC anymore. Instead, let the assembler figure this out with an ADR instruction. Additionally, since it's easy, in chacha-armv4.pl, avoid depending on the exact offset between code and data. We still depend on the code and data being close enough to fit within ADR's (very tight) bounds however. (E.g. an ADR of K256 inside sha256_block_data_order_armv8 would not work because K256 is too far away.) I have not removed the offset dependency in the SHA-2 files yet as they're a bit thorny and .Lsha256_block_data_order-K256 does not seem to work on Apple's 32-bit Arm assembler. (We probably should drop 32-bit Arm assembly on Apple platforms. It doesn't really exist anymore.) Once the armcap references are gone, that will be more straightforward. Update-Note: If 32-bit Arm assembly no longer builds, let us know and tell us what your toolchain is. Change-Id: Ie191781fed98d53c3b986b2f535132b970d79f98 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/64747 Auto-Submit: David Benjamin <davidben@google.com> Reviewed-by: Bob Beck <bbe@google.com> Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/chacha/asm/chacha-armv4.pl b/crypto/chacha/asm/chacha-armv4.pl index 1f5ceff..24fbb84 100755 --- a/crypto/chacha/asm/chacha-armv4.pl +++ b/crypto/chacha/asm/chacha-armv4.pl
@@ -198,7 +198,7 @@ .long 1,0,0,0 #if __ARM_MAX_ARCH__>=7 .LOPENSSL_armcap: -.word OPENSSL_armcap_P-.LChaCha20_ctr32 +.word OPENSSL_armcap_P-.Lsigma #else .word -1 #endif @@ -210,11 +210,7 @@ .LChaCha20_ctr32: ldr r12,[sp,#0] @ pull pointer to counter and nonce stmdb sp!,{r0-r2,r4-r11,lr} -#if __ARM_ARCH<7 && !defined(__thumb2__) - sub r14,pc,#16 @ ChaCha20_ctr32 -#else - adr r14,.LChaCha20_ctr32 -#endif + adr r14,.Lsigma cmp r2,#0 @ len==0? #ifdef __thumb2__ itt eq @@ -224,7 +220,7 @@ #if __ARM_MAX_ARCH__>=7 cmp r2,#192 @ test len bls .Lshort - ldr r4,[r14,#-32] + ldr r4,[r14,#32] ldr r4,[r14,r4] # ifdef __APPLE__ ldr r4,[r4] @@ -235,7 +231,6 @@ #endif ldmia r12,{r4-r7} @ load counter and nonce sub sp,sp,#4*(16) @ off-load area - sub r14,r14,#64 @ .Lsigma stmdb sp!,{r4-r7} @ copy counter and nonce ldmia r3,{r4-r11} @ load key ldmia r14,{r0-r3} @ load sigma
diff --git a/crypto/fipsmodule/sha/asm/sha256-armv4.pl b/crypto/fipsmodule/sha/asm/sha256-armv4.pl index 6812b27..fa82f3c 100644 --- a/crypto/fipsmodule/sha/asm/sha256-armv4.pl +++ b/crypto/fipsmodule/sha/asm/sha256-armv4.pl
@@ -227,11 +227,7 @@ .type sha256_block_data_order,%function sha256_block_data_order: .Lsha256_block_data_order: -#if __ARM_ARCH<7 && !defined(__thumb2__) - sub r3,pc,#8 @ sha256_block_data_order -#else adr r3,.Lsha256_block_data_order -#endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P @@ -246,6 +242,8 @@ add $len,$inp,$len,lsl#6 @ len to point at the end of inp stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} + @ TODO(davidben): When the OPENSSL_armcap logic above is removed, + @ replace this with a simple ADR. sub $Ktbl,r3,#256+32 @ K256 sub sp,sp,#16*4 @ alloca(X[16]) .Loop:
diff --git a/crypto/fipsmodule/sha/asm/sha512-armv4.pl b/crypto/fipsmodule/sha/asm/sha512-armv4.pl index d470daf..f52b5b0 100644 --- a/crypto/fipsmodule/sha/asm/sha512-armv4.pl +++ b/crypto/fipsmodule/sha/asm/sha512-armv4.pl
@@ -288,11 +288,7 @@ .type sha512_block_data_order,%function sha512_block_data_order: .Lsha512_block_data_order: -#if __ARM_ARCH<7 && !defined(__thumb2__) - sub r3,pc,#8 @ sha512_block_data_order -#else adr r3,.Lsha512_block_data_order -#endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P @@ -304,6 +300,8 @@ #endif add $len,$inp,$len,lsl#7 @ len to point at the end of inp stmdb sp!,{r4-r12,lr} + @ TODO(davidben): When the OPENSSL_armcap logic above is removed, + @ replace this with a simple ADR. sub $Ktbl,r3,#672 @ K512 sub sp,sp,#9*8