bn: Move ia32cap_P references from x86_64-mont.pl to C.
Replace |bn_sqr8x_mont|'s unused |bp| parameter with a flag that
indicates whether MULX and ADX are enabled.
Bug: 673
Change-Id: I56632ad51bdc2f7f5ddd4278419d67e467b84d8f
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65587
Reviewed-by: Bob Beck <bbe@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont.pl b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
index 875a5a5..d76b68b 100755
--- a/crypto/fipsmodule/bn/asm/x86_64-mont.pl
+++ b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
@@ -85,8 +85,6 @@
$code=<<___;
.text
-.extern OPENSSL_ia32cap_P
-
.globl bn_mul_mont_nohw
.type bn_mul_mont_nohw,\@function,6
.align 16
@@ -785,7 +783,7 @@
# int bn_sqr8x_mont(
my $rptr="%rdi"; # const BN_ULONG *rptr,
my $aptr="%rsi"; # const BN_ULONG *aptr,
-my $bptr="%rdx"; # not used
+my $mulx_adx_capable="%rdx"; # Different than upstream!
my $nptr="%rcx"; # const BN_ULONG *nptr,
my $n0 ="%r8"; # const BN_ULONG *n0);
my $num ="%r9"; # int num, has to be divisible by 8
@@ -886,11 +884,8 @@
movq %r10, %xmm3 # -$num
___
$code.=<<___ if ($addx);
- leaq OPENSSL_ia32cap_P(%rip),%rax
- mov 8(%rax),%eax
- and \$0x80100,%eax
- cmp \$0x80100,%eax
- jne .Lsqr8x_nox
+ test $mulx_adx_capable,$mulx_adx_capable
+ jz .Lsqr8x_nox
call bn_sqrx8x_internal # see x86_64-mont5 module
# %rax top-most carry
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc
index 13042ea..90e0117 100644
--- a/crypto/fipsmodule/bn/bn_test.cc
+++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -2899,8 +2899,8 @@
CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d,
mont->n0, words);
if (bn_sqr8x_mont_capable(words)) {
- CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), a.data(), mont->N.d,
- mont->n0, words);
+ CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), bn_mulx_adx_capable(),
+ mont->N.d, mont->n0, words);
}
#else
CHECK_ABI(bn_mul_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0,
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 4de201f..a0133ef 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -402,6 +402,10 @@
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
#if defined(OPENSSL_X86_64)
+OPENSSL_INLINE int bn_mulx_adx_capable(void) {
+ // MULX is in BMI2.
+ return CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable();
+}
int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) {
@@ -410,16 +414,14 @@
int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
OPENSSL_INLINE int bn_mulx4x_mont_capable(size_t num) {
- // MULX is in BMI2.
- return bn_mul4x_mont_capable(num) && CRYPTO_is_BMI2_capable() &&
- CRYPTO_is_ADX_capable();
+ return bn_mul4x_mont_capable(num) && bn_mulx_adx_capable();
}
int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) {
return (num >= 8) && ((num & 7) == 0);
}
-int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *unused_bp,
+int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
#endif // defined(OPENSSL_X86_64)
diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c
index 86b64c6..7a4ca2f 100644
--- a/crypto/fipsmodule/bn/montgomery.c
+++ b/crypto/fipsmodule/bn/montgomery.c
@@ -510,7 +510,7 @@
const BN_ULONG *np, const BN_ULONG *n0, size_t num)
{
if (ap == bp && bn_sqr8x_mont_capable(num)) {
- return bn_sqr8x_mont(rp, ap, bp, np, n0, num);
+ return bn_sqr8x_mont(rp, ap, bn_mulx_adx_capable(), np, n0, num);
}
if (bn_mulx4x_mont_capable(num)) {
return bn_mulx4x_mont(rp, ap, bp, np, n0, num);