Shave 8 bytes off BN_MONT_CTX in 64-bit
We allocate two words in n0, but 64-bit platforms only need one word.
Change-Id: Ia2d53f88a9098d326dfbd79f9e59eb390afefad1
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/80207
Reviewed-by: Adam Langley <agl@google.com>
Auto-Submit: David Benjamin <davidben@google.com>
Commit-Queue: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc
index 10fd3b4..7c1ad40 100644
--- a/crypto/fipsmodule/bn/bn_test.cc
+++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -603,8 +603,9 @@
bssl::UniquePtr<BN_MONT_CTX> mont2(BN_MONT_CTX_new_consttime(m.get(), ctx));
ASSERT_TRUE(mont2);
EXPECT_BIGNUMS_EQUAL("RR (mod M) (constant-time)", &mont->RR, &mont2->RR);
- EXPECT_EQ(mont->n0[0], mont2->n0[0]);
- EXPECT_EQ(mont->n0[1], mont2->n0[1]);
+ for (size_t i = 0; i < std::size(mont->n0); i++) {
+ EXPECT_EQ(mont->n0[i], mont2->n0[i]);
+ }
bssl::UniquePtr<BIGNUM> a_tmp(BN_new()), b_tmp(BN_new());
ASSERT_TRUE(a_tmp);
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 1ec2562..b3e2331 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -47,7 +47,6 @@
#define BN_MASK2l (0xffffffffUL)
#define BN_MASK2h (0xffffffff00000000UL)
#define BN_MASK2h1 (0xffffffff80000000UL)
-#define BN_MONT_CTX_N0_LIMBS 1
#define BN_DEC_CONV (10000000000000000000UL)
#define BN_DEC_NUM 19
#define TOBN(hi, lo) ((BN_ULONG)(hi) << 32 | (lo))
@@ -64,12 +63,6 @@
#define BN_MASK2l (0xffffUL)
#define BN_MASK2h1 (0xffff8000UL)
#define BN_MASK2h (0xffff0000UL)
-// On some 32-bit platforms, Montgomery multiplication is done using 64-bit
-// arithmetic with SIMD instructions. On such platforms, |BN_MONT_CTX::n0|
-// needs to be two words long. Only certain 32-bit platforms actually make use
-// of n0[1] and shorter R value would suffice for the others. However,
-// currently only the assembly files know which is which.
-#define BN_MONT_CTX_N0_LIMBS 2
#define BN_DEC_CONV (1000000000UL)
#define BN_DEC_NUM 9
#define TOBN(hi, lo) (lo), (hi)
@@ -296,7 +289,8 @@
// See also discussion in |ToWord| in abi_test.h for notes on smaller-than-word
// inputs.
void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
- const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+ const BN_ULONG *np, const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS],
+ size_t num);
#if defined(OPENSSL_X86_64)
inline int bn_mulx_adx_capable(void) {
@@ -304,30 +298,36 @@
return CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable();
}
void bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
- const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+ const BN_ULONG *np,
+ const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS], size_t num);
inline int bn_mul4x_mont_capable(size_t num) {
return num >= 8 && (num & 3) == 0;
}
void bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
- const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+ const BN_ULONG *np, const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS],
+ size_t num);
inline int bn_mulx4x_mont_capable(size_t num) {
return bn_mul4x_mont_capable(num) && bn_mulx_adx_capable();
}
void bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
- const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+ const BN_ULONG *np, const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS],
+ size_t num);
inline int bn_sqr8x_mont_capable(size_t num) {
return num >= 8 && (num & 7) == 0;
}
void bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable,
- const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+ const BN_ULONG *np, const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS],
+ size_t num);
#elif defined(OPENSSL_ARM)
inline int bn_mul8x_mont_neon_capable(size_t num) {
return (num & 7) == 0 && CRYPTO_is_NEON_capable();
}
void bn_mul8x_mont_neon(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
- const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+ const BN_ULONG *np,
+ const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS], size_t num);
void bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
- const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+ const BN_ULONG *np,
+ const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS], size_t num);
#endif
#endif // OPENSSL_BN_ASM_MONT
@@ -340,7 +340,8 @@
inline int bn_mul4x_mont_gather5_capable(int num) { return (num & 7) == 0; }
void bn_mul4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
const BN_ULONG *table, const BN_ULONG *np,
- const BN_ULONG *n0, int num, int power);
+ const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS], int num,
+ int power);
inline int bn_mulx4x_mont_gather5_capable(int num) {
return bn_mul4x_mont_gather5_capable(num) && CRYPTO_is_ADX_capable() &&
@@ -348,11 +349,13 @@
}
void bn_mulx4x_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
const BN_ULONG *table, const BN_ULONG *np,
- const BN_ULONG *n0, int num, int power);
+ const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS], int num,
+ int power);
void bn_mul_mont_gather5_nohw(BN_ULONG *rp, const BN_ULONG *ap,
const BN_ULONG *table, const BN_ULONG *np,
- const BN_ULONG *n0, int num, int power);
+ const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS], int num,
+ int power);
// bn_scatter5 stores |inp| to index |power| of |table|. |inp| and each entry of
// |table| are |num| words long. |power| must be less than 32 and is treated as
@@ -368,7 +371,8 @@
// The following functions implement |bn_power5|. See |bn_power5| for details.
void bn_power5_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table,
- const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
+ const BN_ULONG *np, const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS],
+ int num, int power);
inline int bn_power5_capable(int num) { return (num & 7) == 0; }
@@ -377,7 +381,8 @@
CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable();
}
void bn_powerx5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table,
- const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
+ const BN_ULONG *np, const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS],
+ int num, int power);
#endif // !OPENSSL_NO_ASM && OPENSSL_X86_64
diff --git a/crypto/fipsmodule/bn/montgomery.cc.inc b/crypto/fipsmodule/bn/montgomery.cc.inc
index aac5af8..fe5c3a2 100644
--- a/crypto/fipsmodule/bn/montgomery.cc.inc
+++ b/crypto/fipsmodule/bn/montgomery.cc.inc
@@ -64,8 +64,9 @@
if (!BN_copy(&to->RR, &from->RR) || !BN_copy(&to->N, &from->N)) {
return NULL;
}
- to->n0[0] = from->n0[0];
- to->n0[1] = from->n0[1];
+ for (size_t i = 0; i < BN_MONT_CTX_N0_LIMBS; i++) {
+ to->n0[i] = from->n0[i];
+ }
return to;
}
@@ -111,8 +112,6 @@
mont->n0[0] = (BN_ULONG)n0;
#if BN_MONT_CTX_N0_LIMBS == 2
mont->n0[1] = (BN_ULONG)(n0 >> BN_BITS2);
-#else
- mont->n0[1] = 0;
#endif
return 1;
}
diff --git a/include/openssl/bn.h b/include/openssl/bn.h
index 8209975..62b63dd 100644
--- a/include/openssl/bn.h
+++ b/include/openssl/bn.h
@@ -917,6 +917,19 @@
int flags;
};
+// On some 32-bit platforms, Montgomery multiplication is done using 64-bit
+// arithmetic with SIMD instructions. On such platforms, |BN_MONT_CTX::n0|
+// needs to be two words long. Only certain 32-bit platforms actually make use
+// of n0[1] and shorter R value would suffice for the others. However,
+// currently only the assembly files know which is which.
+#if defined(OPENSSL_64_BIT)
+#define BN_MONT_CTX_N0_LIMBS 1
+#elif defined(OPENSSL_32_BIT)
+#define BN_MONT_CTX_N0_LIMBS 2
+#else
+#error "unknown bit size"
+#endif
+
struct bn_mont_ctx_st {
// RR is R^2, reduced modulo |N|. It is used to convert to Montgomery form. It
// is guaranteed to have the same width as |N|.
@@ -924,7 +937,7 @@
// N is the modulus. It is always stored in minimal form, so |N.width|
// determines R.
BIGNUM N;
- BN_ULONG n0[2]; // least significant words of (R*Ri-1)/N
+ BN_ULONG n0[BN_MONT_CTX_N0_LIMBS]; // least significant words of (R*Ri-1)/N
};
OPENSSL_EXPORT unsigned BN_num_bits_word(BN_ULONG l);