Add assembly support for -fsanitize=hwaddress tagged globals.

As of LLVM r368102, Clang will set a pointer tag in bits 56-63 of the
address of a global when compiling with -fsanitize=hwaddress. This requires
an adjustment to assembly code that takes the address of such globals: the
code cannot use the regular R_AARCH64_ADR_PREL_PG_HI21 relocation to refer
to the global, since the tag would take the address out of range. Instead,
the code must use the non-checking (_NC) variant of the relocation (the
link-time check is substituted by a runtime check).

This change makes the necessary adjustment in all of the places where it
is needed when compiling with -fsanitize=hwaddress. While here, shrink the
code by an instruction in each of those places by folding the addend into
the load, and remove some dead code that seems to have been left over from
commit 293d9ee4e837d122a28cd992e37779a5de48dc7f.

We check for a sufficiently new clang before using the :pg_hi21_nc: relocation
variant because support for this variant was only added recently.

Change-Id: Ic9da8386e19c03c1e90c103a81232a254277e9a5
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/36924
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Adam Langley <agl@google.com>
diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl
index 02325e7..9cae28c 100755
--- a/crypto/chacha/asm/chacha-armv8.pl
+++ b/crypto/chacha/asm/chacha-armv8.pl
@@ -131,12 +131,6 @@
 .quad	0x3320646e61707865,0x6b20657479622d32		// endian-neutral
 .Lone:
 .long	1,0,0,0
-.LOPENSSL_armcap_P:
-#ifdef	__ILP32__
-.long	OPENSSL_armcap_P-.
-#else
-.quad	OPENSSL_armcap_P-.
-#endif
 .asciz	"ChaCha20 for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
 
 .text
@@ -146,11 +140,14 @@
 .align	5
 ChaCha20_ctr32:
 	cbz	$len,.Labort
+#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
+	adrp	@x[0],:pg_hi21_nc:OPENSSL_armcap_P
+#else
 	adrp	@x[0],:pg_hi21:OPENSSL_armcap_P
+#endif
 	cmp	$len,#192
 	b.lo	.Lshort
-	add	@x[0],@x[0],:lo12:OPENSSL_armcap_P
-	ldr	w17,[@x[0]]
+	ldr	w17,[@x[0],:lo12:OPENSSL_armcap_P]
 	tst	w17,#ARMV7_NEON
 	b.ne	ChaCha20_neon
 
diff --git a/crypto/fipsmodule/sha/asm/sha1-armv8.pl b/crypto/fipsmodule/sha/asm/sha1-armv8.pl
index c147462..59d55b3 100644
--- a/crypto/fipsmodule/sha/asm/sha1-armv8.pl
+++ b/crypto/fipsmodule/sha/asm/sha1-armv8.pl
@@ -180,9 +180,12 @@
 .type	sha1_block_data_order,%function
 .align	6
 sha1_block_data_order:
+#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
+	adrp	x16,:pg_hi21_nc:OPENSSL_armcap_P
+#else
 	adrp	x16,:pg_hi21:OPENSSL_armcap_P
-	add	x16,x16,:lo12:OPENSSL_armcap_P
-	ldr	w16,[x16]
+#endif
+	ldr	w16,[x16,:lo12:OPENSSL_armcap_P]
 	tst	w16,#ARMV8_SHA1
 	b.ne	.Lv8_entry
 
diff --git a/crypto/fipsmodule/sha/asm/sha512-armv8.pl b/crypto/fipsmodule/sha/asm/sha512-armv8.pl
index 64306be..205b06d 100644
--- a/crypto/fipsmodule/sha/asm/sha512-armv8.pl
+++ b/crypto/fipsmodule/sha/asm/sha512-armv8.pl
@@ -185,9 +185,12 @@
 ___
 $code.=<<___	if ($SZ==4);
 #ifndef	__KERNEL__
+#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
+	adrp	x16,:pg_hi21_nc:OPENSSL_armcap_P
+#else
 	adrp	x16,:pg_hi21:OPENSSL_armcap_P
-	add	x16,x16,:lo12:OPENSSL_armcap_P
-	ldr	w16,[x16]
+#endif
+	ldr	w16,[x16,:lo12:OPENSSL_armcap_P]
 	tst	w16,#ARMV8_SHA256
 	b.ne	.Lv8_entry
 #endif
diff --git a/crypto/perlasm/arm-xlate.pl b/crypto/perlasm/arm-xlate.pl
index 655be61..4dec276 100755
--- a/crypto/perlasm/arm-xlate.pl
+++ b/crypto/perlasm/arm-xlate.pl
@@ -144,11 +144,12 @@
 // This file is generated from a similarly-named Perl script in the BoringSSL
 // source tree. Do not edit by hand.
 
-#if defined(__has_feature)
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
 #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
 #define OPENSSL_NO_ASM
 #endif
-#endif
 
 #if !defined(OPENSSL_NO_ASM)
 ___