Switch __ARM_FEATURE_CRYPTO to __ARM_FEATURE_{AES,SHA2}.

The latest version of ACLE splits __ARM_FEATURE_CRYPTO into two defines
to reflect that, starting ARMv8.2, the cryptography extension can
include {AES,PMULL} and {SHA1,SHA256} separately.

Also standardize on __ARM_NEON, which is the recommended symbol from
ACLE, and the only one defined on non-Apple aarch64 targets. Digging
through GCC history, __ARM_NEON__ is a bit older.  __ARM_NEON was added
in GCC's 9e94a7fc5ab770928b9e6a2b74e292d35b4c94da from 2012, part of GCC
4.8.0.

I suspect we can stop paying attention to __ARM_NEON__ at this point,
but I've left both working for now. __ARM_FEATURE_{AES,SHA2} is definite
too new to fully replace __ARM_FEATURE_CRYPTO.

Tested on Linux that -march=armv8-a+aes now also drops the fallback AES
code. Previously, we would pick up -march=armv8-a+crypto, but not
-march=armv8-a+aes. Also tested that, on an OPENSSL_STATIC_ARMCAP build,
-march=armv8-a+sha2 sets the SHA-1 and SHA-256 features.

Change-Id: I749bdbc501ba2da23177ddb823547efcd77e5c98
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/50847
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/BUILDING.md b/BUILDING.md
index 64b1520..10645be 100644
--- a/BUILDING.md
+++ b/BUILDING.md
@@ -170,10 +170,12 @@
 capability information has to be provided by a combination of compile-time
 information and the operating system.
 
-BoringSSL determines capabilities at compile-time based on `__ARM_NEON__`,
-`__ARM_FEATURE_CRYPTO`, and other preprocessor symbols reported by the compiler.
+BoringSSL determines capabilities at compile-time based on `__ARM_NEON`,
+`__ARM_FEATURE_AES`, and other preprocessor symbols defined in
+[Arm C Language Extensions (ACLE)](https://developer.arm.com/architectures/system-architectures/software-standards/acle).
 These values are usually controlled by the `-march` flag. You can also define
-any of the following to enable the corresponding ARM feature.
+any of the following to enable the corresponding ARM feature, but using the ACLE
+symbols via `-march` is recommended.
 
   * `OPENSSL_STATIC_ARMCAP_NEON`
   * `OPENSSL_STATIC_ARMCAP_AES`
diff --git a/crypto/cpu_aarch64_apple.c b/crypto/cpu_aarch64_apple.c
index 23c8d18..55c20f8 100644
--- a/crypto/cpu_aarch64_apple.c
+++ b/crypto/cpu_aarch64_apple.c
@@ -53,7 +53,8 @@
   // Apple ARM64 platforms have NEON and cryptography extensions available
   // statically, so we do not need to query them. In particular, there sometimes
   // are no sysctls corresponding to such features. See below.
-#if !defined(__ARM_NEON) || !defined(__ARM_FEATURE_CRYPTO)
+#if !defined(__ARM_NEON) || !defined(__ARM_FEATURE_AES) || \
+    !defined(__ARM_FEATURE_SHA2)
 #error "NEON and crypto extensions should be statically available."
 #endif
   OPENSSL_armcap_P =
diff --git a/crypto/crypto.c b/crypto/crypto.c
index 4be91dc..af7e560 100644
--- a/crypto/crypto.c
+++ b/crypto/crypto.c
@@ -86,23 +86,29 @@
 
 #if defined(OPENSSL_STATIC_ARMCAP)
 
+// See ARM ACLE for the definitions of these macros. Note |__ARM_FEATURE_AES|
+// covers both AES and PMULL and |__ARM_FEATURE_SHA2| covers SHA-1 and SHA-256.
+// https://developer.arm.com/architectures/system-architectures/software-standards/acle
+// https://github.com/ARM-software/acle/issues/152
+//
+// TODO(davidben): Do we still need |OPENSSL_STATIC_ARMCAP_*| or are the
+// standard flags and -march sufficient?
 HIDDEN uint32_t OPENSSL_armcap_P =
-#if defined(OPENSSL_STATIC_ARMCAP_NEON) || \
-    (defined(__ARM_NEON__) || defined(__ARM_NEON))
+#if defined(OPENSSL_STATIC_ARMCAP_NEON) || defined(__ARM_NEON)
     ARMV7_NEON |
 #endif
-#if defined(OPENSSL_STATIC_ARMCAP_AES) || defined(__ARM_FEATURE_CRYPTO)
+#if defined(OPENSSL_STATIC_ARMCAP_AES) || defined(__ARM_FEATURE_AES)
     ARMV8_AES |
 #endif
-#if defined(OPENSSL_STATIC_ARMCAP_SHA1) || defined(__ARM_FEATURE_CRYPTO)
+#if defined(OPENSSL_STATIC_ARMCAP_PMULL) || defined(__ARM_FEATURE_AES)
+    ARMV8_PMULL |
+#endif
+#if defined(OPENSSL_STATIC_ARMCAP_SHA1) || defined(__ARM_FEATURE_SHA2)
     ARMV8_SHA1 |
 #endif
-#if defined(OPENSSL_STATIC_ARMCAP_SHA256) || defined(__ARM_FEATURE_CRYPTO)
+#if defined(OPENSSL_STATIC_ARMCAP_SHA256) || defined(__ARM_FEATURE_SHA2)
     ARMV8_SHA256 |
 #endif
-#if defined(OPENSSL_STATIC_ARMCAP_PMULL) || defined(__ARM_FEATURE_CRYPTO)
-    ARMV8_PMULL |
-#endif
 #if defined(__ARM_FEATURE_SHA512)
     ARMV8_SHA512 |
 #endif
diff --git a/crypto/hrss/hrss.c b/crypto/hrss/hrss.c
index 558c456..8e21068 100644
--- a/crypto/hrss/hrss.c
+++ b/crypto/hrss/hrss.c
@@ -37,8 +37,7 @@
 #include <emmintrin.h>
 #endif
 
-#if (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) && \
-    (defined(__ARM_NEON__) || defined(__ARM_NEON))
+#if (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) && defined(__ARM_NEON)
 #include <arm_neon.h>
 #endif
 
@@ -188,8 +187,7 @@
 // compiler requires that |i| be a compile-time constant.)
 #define vec_get_word(v, i) _mm_extract_epi16(v, i)
 
-#elif (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) && \
-    (defined(__ARM_NEON__) || defined(__ARM_NEON))
+#elif (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) && defined(__ARM_NEON)
 
 #define HRSS_HAVE_VECTOR_UNIT
 typedef uint16x8_t vec_t;
diff --git a/crypto/internal.h b/crypto/internal.h
index 5937250..42f94d5 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -995,6 +995,20 @@
 #define OPENSSL_STATIC_ARMCAP
 #endif
 
+// Normalize some older feature flags to their modern ACLE values.
+// https://developer.arm.com/architectures/system-architectures/software-standards/acle
+#if defined(__ARM_NEON__) && !defined(__ARM_NEON)
+#define __ARM_NEON 1
+#endif
+#if defined(__ARM_FEATURE_CRYPTO)
+#if !defined(__ARM_FEATURE_AES)
+#define __ARM_FEATURE_AES 1
+#endif
+#if !defined(__ARM_FEATURE_SHA2)
+#define __ARM_FEATURE_SHA2 1
+#endif
+#endif
+
 #if !defined(OPENSSL_STATIC_ARMCAP)
 // CRYPTO_is_NEON_capable_at_runtime returns true if the current CPU has a NEON
 // unit. Note that |OPENSSL_armcap_P| also exists and contains the same
@@ -1013,8 +1027,7 @@
 // CRYPTO_is_NEON_capable returns true if the current CPU has a NEON unit. If
 // this is known statically, it is a constant inline function.
 OPENSSL_INLINE int CRYPTO_is_NEON_capable(void) {
-#if defined(__ARM_NEON__) || defined(__ARM_NEON) || \
-    defined(OPENSSL_STATIC_ARMCAP_NEON)
+#if defined(OPENSSL_STATIC_ARMCAP_NEON) || defined(__ARM_NEON)
   return 1;
 #elif defined(OPENSSL_STATIC_ARMCAP)
   return 0;
@@ -1024,7 +1037,7 @@
 }
 
 OPENSSL_INLINE int CRYPTO_is_ARMv8_AES_capable(void) {
-#if defined(OPENSSL_STATIC_ARMCAP_AES) || defined(__ARM_FEATURE_CRYPTO)
+#if defined(OPENSSL_STATIC_ARMCAP_AES) || defined(__ARM_FEATURE_AES)
   return 1;
 #elif defined(OPENSSL_STATIC_ARMCAP)
   return 0;
@@ -1034,7 +1047,7 @@
 }
 
 OPENSSL_INLINE int CRYPTO_is_ARMv8_PMULL_capable(void) {
-#if defined(OPENSSL_STATIC_ARMCAP_PMULL) || defined(__ARM_FEATURE_CRYPTO)
+#if defined(OPENSSL_STATIC_ARMCAP_PMULL) || defined(__ARM_FEATURE_AES)
   return 1;
 #elif defined(OPENSSL_STATIC_ARMCAP)
   return 0;