Assume the Arm assembler can handle ADR

It's 2023. We shouldn't need to be counting offsets from PC anymore.
Instead, let the assembler figure this out with an ADR instruction.

Additionally, since it's easy, in chacha-armv4.pl, avoid depending on
the exact offset between code and data. We still depend on the code and
data being close enough to fit within ADR's (very tight) bounds however.
(E.g. an ADR of K256 inside sha256_block_data_order_armv8 would not work
because K256 is too far away.)

I have not removed the offset dependency in the SHA-2 files yet as
they're a bit thorny and .Lsha256_block_data_order-K256 does not seem to
work on Apple's 32-bit Arm assembler. (We probably should drop 32-bit
Arm assembly on Apple platforms. It doesn't really exist anymore.) Once
the armcap references are gone, that will be more straightforward.

Update-Note: If 32-bit Arm assembly no longer builds, let us know and
tell us what your toolchain is.

Change-Id: Ie191781fed98d53c3b986b2f535132b970d79f98
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/64747
Auto-Submit: David Benjamin <davidben@google.com>
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/chacha/asm/chacha-armv4.pl b/crypto/chacha/asm/chacha-armv4.pl
index 1f5ceff..24fbb84 100755
--- a/crypto/chacha/asm/chacha-armv4.pl
+++ b/crypto/chacha/asm/chacha-armv4.pl
@@ -198,7 +198,7 @@
 .long	1,0,0,0
 #if __ARM_MAX_ARCH__>=7
 .LOPENSSL_armcap:
-.word   OPENSSL_armcap_P-.LChaCha20_ctr32
+.word   OPENSSL_armcap_P-.Lsigma
 #else
 .word	-1
 #endif
@@ -210,11 +210,7 @@
 .LChaCha20_ctr32:
 	ldr	r12,[sp,#0]		@ pull pointer to counter and nonce
 	stmdb	sp!,{r0-r2,r4-r11,lr}
-#if __ARM_ARCH<7 && !defined(__thumb2__)
-	sub	r14,pc,#16		@ ChaCha20_ctr32
-#else
-	adr	r14,.LChaCha20_ctr32
-#endif
+	adr	r14,.Lsigma
 	cmp	r2,#0			@ len==0?
 #ifdef	__thumb2__
 	itt	eq
@@ -224,7 +220,7 @@
 #if __ARM_MAX_ARCH__>=7
 	cmp	r2,#192			@ test len
 	bls	.Lshort
-	ldr	r4,[r14,#-32]
+	ldr	r4,[r14,#32]
 	ldr	r4,[r14,r4]
 # ifdef	__APPLE__
 	ldr	r4,[r4]
@@ -235,7 +231,6 @@
 #endif
 	ldmia	r12,{r4-r7}		@ load counter and nonce
 	sub	sp,sp,#4*(16)		@ off-load area
-	sub	r14,r14,#64		@ .Lsigma
 	stmdb	sp!,{r4-r7}		@ copy counter and nonce
 	ldmia	r3,{r4-r11}		@ load key
 	ldmia	r14,{r0-r3}		@ load sigma
diff --git a/crypto/fipsmodule/sha/asm/sha256-armv4.pl b/crypto/fipsmodule/sha/asm/sha256-armv4.pl
index 6812b27..fa82f3c 100644
--- a/crypto/fipsmodule/sha/asm/sha256-armv4.pl
+++ b/crypto/fipsmodule/sha/asm/sha256-armv4.pl
@@ -227,11 +227,7 @@
 .type	sha256_block_data_order,%function
 sha256_block_data_order:
 .Lsha256_block_data_order:
-#if __ARM_ARCH<7 && !defined(__thumb2__)
-	sub	r3,pc,#8		@ sha256_block_data_order
-#else
 	adr	r3,.Lsha256_block_data_order
-#endif
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 	ldr	r12,.LOPENSSL_armcap
 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
@@ -246,6 +242,8 @@
 	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
 	stmdb	sp!,{$ctx,$inp,$len,r4-r11,lr}
 	ldmia	$ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
+	@ TODO(davidben): When the OPENSSL_armcap logic above is removed,
+	@ replace this with a simple ADR.
 	sub	$Ktbl,r3,#256+32	@ K256
 	sub	sp,sp,#16*4		@ alloca(X[16])
 .Loop:
diff --git a/crypto/fipsmodule/sha/asm/sha512-armv4.pl b/crypto/fipsmodule/sha/asm/sha512-armv4.pl
index d470daf..f52b5b0 100644
--- a/crypto/fipsmodule/sha/asm/sha512-armv4.pl
+++ b/crypto/fipsmodule/sha/asm/sha512-armv4.pl
@@ -288,11 +288,7 @@
 .type	sha512_block_data_order,%function
 sha512_block_data_order:
 .Lsha512_block_data_order:
-#if __ARM_ARCH<7 && !defined(__thumb2__)
-	sub	r3,pc,#8		@ sha512_block_data_order
-#else
 	adr	r3,.Lsha512_block_data_order
-#endif
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 	ldr	r12,.LOPENSSL_armcap
 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
@@ -304,6 +300,8 @@
 #endif
 	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
 	stmdb	sp!,{r4-r12,lr}
+	@ TODO(davidben): When the OPENSSL_armcap logic above is removed,
+	@ replace this with a simple ADR.
 	sub	$Ktbl,r3,#672		@ K512
 	sub	sp,sp,#9*8