Don't use __ARMEL__/__ARMEB__ in aarch64 assembly

GCC's __ARMEL__ and __ARMEB__ defines denote little- and big-endian arm,
respectively. They are not defined on aarch64, which instead use
__AARCH64EL__ and __AARCH64EB__.

However, OpenSSL's assembly originally used the 32-bit defines on both
platforms and even define __ARMEL__ and __ARMEB__ in arm_arch.h. This is
less portable and can even interfere with other headers, which use
__ARMEL__ to detect little-endian arm. (Our own base.h believes
__ARMEL__ implies 32-bit arm. We just happen to check __AARCH64EL__
first. base.h is probably also always included before arm_arch.h.)

Over time, the aarch64 assembly has switched to the correct defines,
such as in 32bbb62ea634239e7cb91d6450ba23517082bab6. This commit
finishes the job.

(There is an even more official endianness detector, __ARM_BIG_ENDIAN in
the Arm C Language Extensions. But I've stuck with the GCC ones here as
that would be a larger change.)

See also https://github.com/openssl/openssl/pull/17373

Change-Id: Ic04ff85782e6599cdeaeb33d12c2fa8edc882224
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/50848
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl
index 608db66..c3a3653 100755
--- a/crypto/chacha/asm/chacha-armv8.pl
+++ b/crypto/chacha/asm/chacha-armv8.pl
@@ -171,7 +171,7 @@
 	ldp	@d[2],@d[3],[$key]		// load key
 	ldp	@d[4],@d[5],[$key,#16]
 	ldp	@d[6],@d[7],[$ctr]		// load counter
-#ifdef	__ARMEB__
+#ifdef	__AARCH64EB__
 	ror	@d[2],@d[2],#32
 	ror	@d[3],@d[3],#32
 	ror	@d[4],@d[4],#32
@@ -240,7 +240,7 @@
 	add	@x[14],@x[14],@x[15],lsl#32
 	ldp	@x[13],@x[15],[$inp,#48]
 	add	$inp,$inp,#64
-#ifdef	__ARMEB__
+#ifdef	__AARCH64EB__
 	rev	@x[0],@x[0]
 	rev	@x[2],@x[2]
 	rev	@x[4],@x[4]
@@ -297,7 +297,7 @@
 	add	@x[10],@x[10],@x[11],lsl#32
 	add	@x[12],@x[12],@x[13],lsl#32
 	add	@x[14],@x[14],@x[15],lsl#32
-#ifdef	__ARMEB__
+#ifdef	__AARCH64EB__
 	rev	@x[0],@x[0]
 	rev	@x[2],@x[2]
 	rev	@x[4],@x[4]
@@ -402,7 +402,7 @@
 	ldp	@d[6],@d[7],[$ctr]		// load counter
 	ld1	{@K[3]},[$ctr]
 	ld1	{$ONE},[@x[0]]
-#ifdef	__ARMEB__
+#ifdef	__AARCH64EB__
 	rev64	@K[0],@K[0]
 	ror	@d[2],@d[2],#32
 	ror	@d[3],@d[3],#32
@@ -519,7 +519,7 @@
 	add	@x[14],@x[14],@x[15],lsl#32
 	ldp	@x[13],@x[15],[$inp,#48]
 	add	$inp,$inp,#64
-#ifdef	__ARMEB__
+#ifdef	__AARCH64EB__
 	rev	@x[0],@x[0]
 	rev	@x[2],@x[2]
 	rev	@x[4],@x[4]
@@ -599,7 +599,7 @@
 	add	@x[14],@x[14],@x[15],lsl#32
 	ldp	@x[13],@x[15],[$inp,#48]
 	add	$inp,$inp,#64
-#ifdef	__ARMEB__
+#ifdef	__AARCH64EB__
 	rev	@x[0],@x[0]
 	rev	@x[2],@x[2]
 	rev	@x[4],@x[4]
@@ -724,7 +724,7 @@
 	ldp	@d[6],@d[7],[$ctr]		// load counter
 	ld1	{@K[3]},[$ctr]
 	ld1	{$ONE},[@x[0]]
-#ifdef	__ARMEB__
+#ifdef	__AARCH64EB__
 	rev64	@K[0],@K[0]
 	ror	@d[2],@d[2],#32
 	ror	@d[3],@d[3],#32
@@ -866,7 +866,7 @@
 	add	@x[14],@x[14],@x[15],lsl#32
 	ldp	@x[13],@x[15],[$inp,#48]
 	add	$inp,$inp,#64
-#ifdef	__ARMEB__
+#ifdef	__AARCH64EB__
 	rev	@x[0],@x[0]
 	rev	@x[2],@x[2]
 	rev	@x[4],@x[4]
@@ -1007,7 +1007,7 @@
 	add	$inp,$inp,#64
 	 add	$B5,$B5,@K[1]
 
-#ifdef	__ARMEB__
+#ifdef	__AARCH64EB__
 	rev	@x[0],@x[0]
 	rev	@x[2],@x[2]
 	rev	@x[4],@x[4]
diff --git a/crypto/fipsmodule/aes/asm/aesv8-armx.pl b/crypto/fipsmodule/aes/asm/aesv8-armx.pl
index 82022c7..9f62232 100644
--- a/crypto/fipsmodule/aes/asm/aesv8-armx.pl
+++ b/crypto/fipsmodule/aes/asm/aesv8-armx.pl
@@ -975,6 +975,9 @@
 	s/\.[ui]?64//o and s/\.16b/\.2d/go;
 	s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
 
+	# Switch preprocessor checks to aarch64 versions.
+	s/__ARME([BL])__/__AARCH64E$1__/go;
+
 	print $_,"\n";
     }
 } else {				######## 32-bit code
diff --git a/crypto/fipsmodule/modes/asm/ghashv8-armx.pl b/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
index 24eb773..74f4b9b 100644
--- a/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
+++ b/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
@@ -735,6 +735,9 @@
 	s/\.[uisp]?64//o and s/\.16b/\.2d/go;
 	s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
 
+	# Switch preprocessor checks to aarch64 versions.
+	s/__ARME([BL])__/__AARCH64E$1__/go;
+
 	print $_,"\n";
     }
 } else {				######## 32-bit code
diff --git a/crypto/fipsmodule/sha/asm/sha1-armv8.pl b/crypto/fipsmodule/sha/asm/sha1-armv8.pl
index 25e5234..856b819 100644
--- a/crypto/fipsmodule/sha/asm/sha1-armv8.pl
+++ b/crypto/fipsmodule/sha/asm/sha1-armv8.pl
@@ -61,7 +61,7 @@
 	ldr	@Xx[$i+2],[$inp,#`($i+2)*4-64`]
 ___
 $code.=<<___ if ($i<14 && ($i&1));
-#ifdef	__ARMEB__
+#ifdef	__AARCH64EB__
 	ror	@Xx[$i+1],@Xx[$i+1],#32
 #else
 	rev32	@Xx[$i+1],@Xx[$i+1]
@@ -209,7 +209,7 @@
 	movz	$K,#0x7999
 	sub	$num,$num,#1
 	movk	$K,#0x5a82,lsl#16
-#ifdef	__ARMEB__
+#ifdef	__AARCH64EB__
 	ror	$Xx[0],@Xx[0],#32
 #else
 	rev32	@Xx[0],@Xx[0]
diff --git a/include/openssl/arm_arch.h b/include/openssl/arm_arch.h
index 13f5b4a..3a34829 100644
--- a/include/openssl/arm_arch.h
+++ b/include/openssl/arm_arch.h
@@ -64,11 +64,6 @@
 # elif defined(__GNUC__)
 #  if defined(__aarch64__)
 #    define __ARM_ARCH__ 8
-#    if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-#      define __ARMEB__
-#    else
-#      define __ARMEL__
-#    endif
   // Why doesn't gcc define __ARM_ARCH__? Instead it defines
   // bunch of below macros. See all_architectires[] table in
   // gcc/config/arm/arm.c. On a side note it defines