x86_64 assembly pack: "optimize" for Knights Landing, add AVX-512 results.
The changes to the assembly files are synced from upstream's
64d92d74985ebb3d0be58a9718f9e080a14a8e7f. cpu-intel.c is translated to C
from that commit and d84df594404ebbd71d21fec5526178d935e4d88d.
Change-Id: I02c8f83aa4780df301c21f011ef2d8d8300e2f2a
Reviewed-on: https://boringssl-review.googlesource.com/18411
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/cpu-intel.c b/crypto/cpu-intel.c
index 92a8eff..ef327df 100644
--- a/crypto/cpu-intel.c
+++ b/crypto/cpu-intel.c
@@ -207,6 +207,14 @@
/* Reserved bit #30 is repurposed to signal an Intel CPU. */
if (is_intel) {
edx |= (1 << 30);
+
+ /* Clear the XSAVE bit on Knights Landing to mimic Silvermont. This enables
+ * some Silvermont-specific codepaths which perform better. See OpenSSL
+ * commit 64d92d74985ebb3d0be58a9718f9e080a14a8e7f. */
+ if ((eax & 0x0fff0ff0) == 0x00050670 /* Knights Landing */ ||
+ (eax & 0x0fff0ff0) == 0x00080650 /* Knights Mill (per SDE) */) {
+ ecx &= ~(1 << 26);
+ }
} else {
edx &= ~(1 << 30);
}
@@ -243,6 +251,12 @@
extended_features &= ~(1 << 16);
}
+ /* Disable ADX instructions on Knights Landing. See OpenSSL commit
+ * 64d92d74985ebb3d0be58a9718f9e080a14a8e7f. */
+ if ((ecx & (1 << 26)) == 0) {
+ extended_features &= ~(1 << 19);
+ }
+
OPENSSL_ia32cap_P[0] = edx;
OPENSSL_ia32cap_P[1] = ecx;
OPENSSL_ia32cap_P[2] = extended_features;