bn: Move ia32cap_P references from x86_64-mont.pl to C.

Replace |bn_sqr8x_mont|'s unused |bp| parameter with a flag that
indicates whether MULX and ADX are enabled.

Bug: 673
Change-Id: I56632ad51bdc2f7f5ddd4278419d67e467b84d8f
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65587
Reviewed-by: Bob Beck <bbe@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont.pl b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
index 875a5a5..d76b68b 100755
--- a/crypto/fipsmodule/bn/asm/x86_64-mont.pl
+++ b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
@@ -85,8 +85,6 @@
 $code=<<___;
 .text
 
-.extern	OPENSSL_ia32cap_P
-
 .globl	bn_mul_mont_nohw
 .type	bn_mul_mont_nohw,\@function,6
 .align	16
@@ -785,7 +783,7 @@
 # int bn_sqr8x_mont(
 my $rptr="%rdi";	# const BN_ULONG *rptr,
 my $aptr="%rsi";	# const BN_ULONG *aptr,
-my $bptr="%rdx";	# not used
+my $mulx_adx_capable="%rdx"; # Different than upstream!
 my $nptr="%rcx";	# const BN_ULONG *nptr,
 my $n0  ="%r8";		# const BN_ULONG *n0);
 my $num ="%r9";		# int num, has to be divisible by 8
@@ -886,11 +884,8 @@
 	movq	%r10, %xmm3		# -$num
 ___
 $code.=<<___ if ($addx);
-	leaq	OPENSSL_ia32cap_P(%rip),%rax
-	mov	8(%rax),%eax
-	and	\$0x80100,%eax
-	cmp	\$0x80100,%eax
-	jne	.Lsqr8x_nox
+	test	$mulx_adx_capable,$mulx_adx_capable
+	jz	.Lsqr8x_nox
 
 	call	bn_sqrx8x_internal	# see x86_64-mont5 module
 					# %rax	top-most carry
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc
index 13042ea..90e0117 100644
--- a/crypto/fipsmodule/bn/bn_test.cc
+++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -2899,8 +2899,8 @@
     CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d,
               mont->n0, words);
     if (bn_sqr8x_mont_capable(words)) {
-      CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), a.data(), mont->N.d,
-                mont->n0, words);
+      CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), bn_mulx_adx_capable(),
+                mont->N.d, mont->n0, words);
     }
 #else
     CHECK_ABI(bn_mul_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0,
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 4de201f..a0133ef 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -402,6 +402,10 @@
                 const BN_ULONG *np, const BN_ULONG *n0, size_t num);
 
 #if defined(OPENSSL_X86_64)
+OPENSSL_INLINE int bn_mulx_adx_capable(void) {
+  // MULX is in BMI2.
+  return CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable();
+}
 int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                      const BN_ULONG *np, const BN_ULONG *n0, size_t num);
 OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) {
@@ -410,16 +414,14 @@
 int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                   const BN_ULONG *np, const BN_ULONG *n0, size_t num);
 OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) {
-  // MULX is in BMI2.
-  return bn_mul4x_mont_capable(num) && CRYPTO_is_BMI2_capable() &&
-         CRYPTO_is_ADX_capable();
+  return bn_mul4x_mont_capable(num) && bn_mulx_adx_capable();
 }
 int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                    const BN_ULONG *np, const BN_ULONG *n0, size_t num);
 OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) {
   return (num >= 8) && ((num & 7) == 0);
 }
-int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *unused_bp,
+int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable,
                   const BN_ULONG *np, const BN_ULONG *n0, size_t num);
 #endif // defined(OPENSSL_X86_64)
 
diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c
index 86b64c6..7a4ca2f 100644
--- a/crypto/fipsmodule/bn/montgomery.c
+++ b/crypto/fipsmodule/bn/montgomery.c
@@ -510,7 +510,7 @@
                 const BN_ULONG *np, const BN_ULONG *n0, size_t num)
 {
   if (ap == bp && bn_sqr8x_mont_capable(num)) {
-    return bn_sqr8x_mont(rp, ap, bp, np, n0, num);
+    return bn_sqr8x_mont(rp, ap, bn_mulx_adx_capable(), np, n0, num);
   }
   if (bn_mulx4x_mont_capable(num)) {
     return bn_mulx4x_mont(rp, ap, bp, np, n0, num);