Detect if the kernel preserves %zmm registers.
Also clear AVX512 bits if %xmm and %ymm registers are not preserved. See
also upstream's 66bee01c822c5dd26679cad076c52b3d81199668.
Change-Id: I1bcaf4cf355e3ca0adb5d207ae6185f9b49c0245
Reviewed-on: https://boringssl-review.googlesource.com/18410
Reviewed-by: Steven Valdez <svaldez@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/crypto/cpu-intel.c b/crypto/cpu-intel.c
index f2e0c4c..92a8eff 100644
--- a/crypto/cpu-intel.c
+++ b/crypto/cpu-intel.c
@@ -223,13 +223,24 @@
/* XCR0 may only be queried if the OSXSAVE bit is set. */
xcr0 = OPENSSL_xgetbv(0);
}
- /* See Intel manual, section 14.3. */
+ /* See Intel manual, volume 1, section 14.3. */
if ((xcr0 & 6) != 6) {
/* YMM registers cannot be used. */
ecx &= ~(1 << 28); /* AVX */
ecx &= ~(1 << 12); /* FMA */
ecx &= ~(1 << 11); /* AMD XOP */
- extended_features &= ~(1 << 5); /* AVX2 */
+ /* Clear AVX2 and AVX512* bits.
+ *
+ * TODO(davidben): Should bits 17 and 26-28 also be cleared? Upstream
+ * doesn't clear those. */
+ extended_features &=
+ ~((1 << 5) | (1 << 16) | (1 << 21) | (1 << 30) | (1 << 31));
+ }
+ /* See Intel manual, volume 1, section 15.2. */
+ if ((xcr0 & 0xe6) != 0xe6) {
+ /* Clear AVX512F. Note we don't touch other AVX512 extensions because they
+ * can be used with YMM. */
+ extended_features &= ~(1 << 16);
}
OPENSSL_ia32cap_P[0] = edx;