Support execute-only memory for AArch64 assembly.

Put data in .rodata and, rather than adr, use the combination of adrp :pg_hi21:
and add :lo12:. Unfortunately, iOS uses different syntax, so we must add more
transforms to arm-xlate.pl.

Tested manually by:

1. Use Android NDK r19-beta1

2. Follow usual instructions to configure CMake for aarch64, but pass
   -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld -Wl,-execute-only".

3. Build. Confirm with readelf -l tool/bssl that .text is not marked
   readable.

4. Push the test binaries onto a Pixel 3. Test normally and with
   --cpu={none,neon,crypto}. I had to pass --gtest_filter=-*Thread* to
   crypto_test. There appears to be an issue with some runtime function
   that's unrelated to our assembly.

No measurable performance difference.

Going forward, to support this, we will need to apply similar changes to
all other AArch64 assembly. This is relatively straightforward, but may
be a little finicky for dual-AArch32/AArch64 files (aesv8-armx.pl).

Update-Note: Assembly syntax is a mess. There's a decent chance some
assembler will get offend.

Change-Id: Ib59b921d4cce76584320fefd23e6bb7ebd4847eb
Reviewed-on: https://boringssl-review.googlesource.com/c/33245
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl
index 0a1c415..7795f2c 100755
--- a/crypto/chacha/asm/chacha-armv8.pl
+++ b/crypto/chacha/asm/chacha-armv8.pl
@@ -122,10 +122,10 @@
 $code.=<<___;
 #include <openssl/arm_arch.h>
 
-.text
-
 .extern	OPENSSL_armcap_P
 
+.section .rodata
+
 .align	5
 .Lsigma:
 .quad	0x3320646e61707865,0x6b20657479622d32		// endian-neutral
@@ -139,20 +139,18 @@
 #endif
 .asciz	"ChaCha20 for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
 
+.text
+
 .globl	ChaCha20_ctr32
 .type	ChaCha20_ctr32,%function
 .align	5
 ChaCha20_ctr32:
 	cbz	$len,.Labort
-	adr	@x[0],.LOPENSSL_armcap_P
+	adrp	@x[0],:pg_hi21:OPENSSL_armcap_P
 	cmp	$len,#192
 	b.lo	.Lshort
-#ifdef	__ILP32__
-	ldrsw	@x[1],[@x[0]]
-#else
-	ldr	@x[1],[@x[0]]
-#endif
-	ldr	w17,[@x[1],@x[0]]
+	add	@x[0],@x[0],:lo12:OPENSSL_armcap_P
+	ldr	w17,[@x[0]]
 	tst	w17,#ARMV7_NEON
 	b.ne	ChaCha20_neon
 
@@ -160,7 +158,8 @@
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
-	adr	@x[0],.Lsigma
+	adrp	@x[0],:pg_hi21:.Lsigma
+	add	@x[0],@x[0],:lo12:.Lsigma
 	stp	x19,x20,[sp,#16]
 	stp	x21,x22,[sp,#32]
 	stp	x23,x24,[sp,#48]
@@ -380,7 +379,8 @@
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
-	adr	@x[0],.Lsigma
+	adrp	@x[0],:pg_hi21:.Lsigma
+	add	@x[0],@x[0],:lo12:.Lsigma
 	stp	x19,x20,[sp,#16]
 	stp	x21,x22,[sp,#32]
 	stp	x23,x24,[sp,#48]
@@ -699,7 +699,8 @@
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
-	adr	@x[0],.Lsigma
+	adrp	@x[0],:pg_hi21:.Lsigma
+	add	@x[0],@x[0],:lo12:.Lsigma
 	stp	x19,x20,[sp,#16]
 	stp	x21,x22,[sp,#32]
 	stp	x23,x24,[sp,#48]
diff --git a/crypto/fipsmodule/aes/asm/aesv8-armx.pl b/crypto/fipsmodule/aes/asm/aesv8-armx.pl
index 2fc616e..13f86a0 100644
--- a/crypto/fipsmodule/aes/asm/aesv8-armx.pl
+++ b/crypto/fipsmodule/aes/asm/aesv8-armx.pl
@@ -77,6 +77,9 @@
 	$flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10));
 
 
+# On AArch64, put the data .rodata and use adrp + add for compatibility with
+# execute-only memory. On AArch32, put it in .text and use adr.
+$code.= ".section .rodata\n" if ($flavour =~ /64/);
 $code.=<<___;
 .align	5
 .Lrcon:
@@ -84,6 +87,8 @@
 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
 .long	0x1b,0x1b,0x1b,0x1b
 
+.text
+
 .globl	${prefix}_set_encrypt_key
 .type	${prefix}_set_encrypt_key,%function
 .align	5
@@ -108,7 +113,15 @@
 	tst	$bits,#0x3f
 	b.ne	.Lenc_key_abort
 
+___
+$code.=<<___	if ($flavour =~ /64/);
+	adrp	$ptr,:pg_hi21:.Lrcon
+	add	$ptr,$ptr,:lo12:.Lrcon
+___
+$code.=<<___	if ($flavour !~ /64/);
 	adr	$ptr,.Lrcon
+___
+$code.=<<___;
 	cmp	$bits,#192
 
 	veor	$zero,$zero,$zero
diff --git a/crypto/fipsmodule/sha/asm/sha1-armv8.pl b/crypto/fipsmodule/sha/asm/sha1-armv8.pl
index 80567d9..7c8880f 100644
--- a/crypto/fipsmodule/sha/asm/sha1-armv8.pl
+++ b/crypto/fipsmodule/sha/asm/sha1-armv8.pl
@@ -180,13 +180,8 @@
 .type	sha1_block_data_order,%function
 .align	6
 sha1_block_data_order:
-#ifdef	__ILP32__
-	ldrsw	x16,.LOPENSSL_armcap_P
-#else
-	ldr	x16,.LOPENSSL_armcap_P
-#endif
-	adr	x17,.LOPENSSL_armcap_P
-	add	x16,x16,x17
+	adrp	x16,:pg_hi21:OPENSSL_armcap_P
+	add	x16,x16,:lo12:OPENSSL_armcap_P
 	ldr	w16,[x16]
 	tst	w16,#ARMV8_SHA1
 	b.ne	.Lv8_entry
@@ -255,7 +250,8 @@
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
-	adr	x4,.Lconst
+	adrp	x4,:pg_hi21:.Lconst
+	add	x4,x4,:lo12:.Lconst
 	eor	$E,$E,$E
 	ld1.32	{$ABCD},[$ctx],#16
 	ld1.32	{$E}[0],[$ctx]
@@ -315,18 +311,13 @@
 	ldr	x29,[sp],#16
 	ret
 .size	sha1_block_armv8,.-sha1_block_armv8
+.section .rodata
 .align	6
 .Lconst:
 .long	0x5a827999,0x5a827999,0x5a827999,0x5a827999	//K_00_19
 .long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1	//K_20_39
 .long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc	//K_40_59
 .long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6	//K_60_79
-.LOPENSSL_armcap_P:
-#ifdef	__ILP32__
-.long	OPENSSL_armcap_P-.
-#else
-.quad	OPENSSL_armcap_P-.
-#endif
 .asciz	"SHA1 block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
 .align	2
 .comm	OPENSSL_armcap_P,4,4
diff --git a/crypto/fipsmodule/sha/asm/sha512-armv8.pl b/crypto/fipsmodule/sha/asm/sha512-armv8.pl
index 22c47d7..3f69071 100644
--- a/crypto/fipsmodule/sha/asm/sha512-armv8.pl
+++ b/crypto/fipsmodule/sha/asm/sha512-armv8.pl
@@ -185,13 +185,8 @@
 ___
 $code.=<<___	if ($SZ==4);
 #ifndef	__KERNEL__
-# ifdef	__ILP32__
-	ldrsw	x16,.LOPENSSL_armcap_P
-# else
-	ldr	x16,.LOPENSSL_armcap_P
-# endif
-	adr	x17,.LOPENSSL_armcap_P
-	add	x16,x16,x17
+	adrp	x16,:pg_hi21:OPENSSL_armcap_P
+	add	x16,x16,:lo12:OPENSSL_armcap_P
 	ldr	w16,[x16]
 	tst	w16,#ARMV8_SHA256
 	b.ne	.Lv8_entry
@@ -213,7 +208,8 @@
 	ldp	$E,$F,[$ctx,#4*$SZ]
 	add	$num,$inp,$num,lsl#`log(16*$SZ)/log(2)`	// end of input
 	ldp	$G,$H,[$ctx,#6*$SZ]
-	adr	$Ktbl,.LK$BITS
+	adrp	$Ktbl,:pg_hi21:.LK$BITS
+	add	$Ktbl,$Ktbl,:lo12:.LK$BITS
 	stp	$ctx,$num,[x29,#96]
 
 .Loop:
@@ -262,6 +258,7 @@
 	ret
 .size	$func,.-$func
 
+.section .rodata
 .align	6
 .type	.LK$BITS,%object
 .LK$BITS:
@@ -330,15 +327,6 @@
 ___
 $code.=<<___;
 .size	.LK$BITS,.-.LK$BITS
-#ifndef	__KERNEL__
-.align	3
-.LOPENSSL_armcap_P:
-# ifdef	__ILP32__
-	.long	OPENSSL_armcap_P-.
-# else
-	.quad	OPENSSL_armcap_P-.
-# endif
-#endif
 .asciz	"SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
 .align	2
 ___
@@ -352,6 +340,7 @@
 my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b");
 
 $code.=<<___;
+.text
 #ifndef	__KERNEL__
 .type	sha256_block_armv8,%function
 .align	6
@@ -361,7 +350,8 @@
 	add		x29,sp,#0
 
 	ld1.32		{$ABCD,$EFGH},[$ctx]
-	adr		$Ktbl,.LK256
+	adrp		$Ktbl,:pg_hi21:.LK256
+	add		$Ktbl,$Ktbl,:lo12:.LK256
 
 .Loop_hw:
 	ld1		{@MSG[0]-@MSG[3]},[$inp],#64
diff --git a/crypto/perlasm/arm-xlate.pl b/crypto/perlasm/arm-xlate.pl
index 29e086d..abb97e3 100755
--- a/crypto/perlasm/arm-xlate.pl
+++ b/crypto/perlasm/arm-xlate.pl
@@ -97,6 +97,16 @@
     else
     {	"";	}
 };
+my $section = sub {
+    if ($flavour =~ /ios/) {
+        if ($_[0] eq ".rodata") {
+            return ".section\t__TEXT,__const";
+        }
+        die "Unknown section name $_[0]";
+    } else {
+        return ".section\t" . join(",", @_);
+    }
+};
 
 sub range {
   my ($r,$sfx,$start,$end) = @_;
@@ -179,6 +189,18 @@
 	    $opcode = eval("\$$mnemonic");
 	}
 
+	if ($flavour =~ /ios/) {
+	    # Mach-O and ELF use different syntax for these relocations. Note
+	    # that we require :pg_hi21: to be explicitly listed. It is normally
+	    # optional with adrp instructions.
+	    $line =~ s|:pg_hi21:(\w+)|\1\@PAGE|;
+	    $line =~ s|:lo12:(\w+)|\1\@PAGEOFF|;
+	} else {
+	    # Clang's integrated assembly does not support the optional
+	    # :pg_hi21: markers, so erase them.
+	    $line =~ s|:pg_hi21:||;
+	}
+
 	my $arg=expand_line($line);
 
 	if (ref($opcode) eq 'CODE') {