Assume the Arm assembler can handle ADR
It's 2023. We shouldn't need to be counting offsets from PC anymore.
Instead, let the assembler figure this out with an ADR instruction.
Additionally, since it's easy, in chacha-armv4.pl, avoid depending on
the exact offset between code and data. We still depend on the code and
data being close enough to fit within ADR's (very tight) bounds however.
(E.g. an ADR of K256 inside sha256_block_data_order_armv8 would not work
because K256 is too far away.)
I have not removed the offset dependency in the SHA-2 files yet as
they're a bit thorny and .Lsha256_block_data_order-K256 does not seem to
work on Apple's 32-bit Arm assembler. (We probably should drop 32-bit
Arm assembly on Apple platforms. It doesn't really exist anymore.) Once
the armcap references are gone, that will be more straightforward.
Update-Note: If 32-bit Arm assembly no longer builds, let us know and
tell us what your toolchain is.
Change-Id: Ie191781fed98d53c3b986b2f535132b970d79f98
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/64747
Auto-Submit: David Benjamin <davidben@google.com>
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/chacha/asm/chacha-armv4.pl b/crypto/chacha/asm/chacha-armv4.pl
index 1f5ceff..24fbb84 100755
--- a/crypto/chacha/asm/chacha-armv4.pl
+++ b/crypto/chacha/asm/chacha-armv4.pl
@@ -198,7 +198,7 @@
.long 1,0,0,0
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-.LChaCha20_ctr32
+.word OPENSSL_armcap_P-.Lsigma
#else
.word -1
#endif
@@ -210,11 +210,7 @@
.LChaCha20_ctr32:
ldr r12,[sp,#0] @ pull pointer to counter and nonce
stmdb sp!,{r0-r2,r4-r11,lr}
-#if __ARM_ARCH<7 && !defined(__thumb2__)
- sub r14,pc,#16 @ ChaCha20_ctr32
-#else
- adr r14,.LChaCha20_ctr32
-#endif
+ adr r14,.Lsigma
cmp r2,#0 @ len==0?
#ifdef __thumb2__
itt eq
@@ -224,7 +220,7 @@
#if __ARM_MAX_ARCH__>=7
cmp r2,#192 @ test len
bls .Lshort
- ldr r4,[r14,#-32]
+ ldr r4,[r14,#32]
ldr r4,[r14,r4]
# ifdef __APPLE__
ldr r4,[r4]
@@ -235,7 +231,6 @@
#endif
ldmia r12,{r4-r7} @ load counter and nonce
sub sp,sp,#4*(16) @ off-load area
- sub r14,r14,#64 @ .Lsigma
stmdb sp!,{r4-r7} @ copy counter and nonce
ldmia r3,{r4-r11} @ load key
ldmia r14,{r0-r3} @ load sigma
diff --git a/crypto/fipsmodule/sha/asm/sha256-armv4.pl b/crypto/fipsmodule/sha/asm/sha256-armv4.pl
index 6812b27..fa82f3c 100644
--- a/crypto/fipsmodule/sha/asm/sha256-armv4.pl
+++ b/crypto/fipsmodule/sha/asm/sha256-armv4.pl
@@ -227,11 +227,7 @@
.type sha256_block_data_order,%function
sha256_block_data_order:
.Lsha256_block_data_order:
-#if __ARM_ARCH<7 && !defined(__thumb2__)
- sub r3,pc,#8 @ sha256_block_data_order
-#else
adr r3,.Lsha256_block_data_order
-#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
@@ -246,6 +242,8 @@
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
+ @ TODO(davidben): When the OPENSSL_armcap logic above is removed,
+ @ replace this with a simple ADR.
sub $Ktbl,r3,#256+32 @ K256
sub sp,sp,#16*4 @ alloca(X[16])
.Loop:
diff --git a/crypto/fipsmodule/sha/asm/sha512-armv4.pl b/crypto/fipsmodule/sha/asm/sha512-armv4.pl
index d470daf..f52b5b0 100644
--- a/crypto/fipsmodule/sha/asm/sha512-armv4.pl
+++ b/crypto/fipsmodule/sha/asm/sha512-armv4.pl
@@ -288,11 +288,7 @@
.type sha512_block_data_order,%function
sha512_block_data_order:
.Lsha512_block_data_order:
-#if __ARM_ARCH<7 && !defined(__thumb2__)
- sub r3,pc,#8 @ sha512_block_data_order
-#else
adr r3,.Lsha512_block_data_order
-#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
@@ -304,6 +300,8 @@
#endif
add $len,$inp,$len,lsl#7 @ len to point at the end of inp
stmdb sp!,{r4-r12,lr}
+ @ TODO(davidben): When the OPENSSL_armcap logic above is removed,
+ @ replace this with a simple ADR.
sub $Ktbl,r3,#672 @ K512
sub sp,sp,#9*8