Add a constant-time generic modular inverse function.
This uses the full binary GCD algorithm, where all four of A, B, C, and
D must be retained. (BN_mod_inverse_odd implements the odd number
version which only needs A and C.) It is patterned after the version
in the Handbook of Applied Cryptography, but tweaked so the coefficients
are non-negative and bounded.
Median of 29 RSA keygens: 0m0.225s -> 0m0.220s
(Accuracy beyond 0.1s is questionable.)
Bug: 238
Change-Id: I6dc13524ea7c8ac1072592857880ddf141d87526
Reviewed-on: https://boringssl-review.googlesource.com/26370
Reviewed-by: Adam Langley <alangley@gmail.com>
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc
index 2ee6a16..99d7f16 100644
--- a/crypto/fipsmodule/bn/bn_test.cc
+++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -867,6 +867,13 @@
ASSERT_TRUE(BN_gcd(ret.get(), a.get(), m.get(), ctx));
EXPECT_BIGNUMS_EQUAL("GCD(A, M)", BN_value_one(), ret.get());
+
+ ASSERT_TRUE(BN_nnmod(a.get(), a.get(), m.get(), ctx));
+ int no_inverse;
+ ASSERT_TRUE(
+ bn_mod_inverse_consttime(ret.get(), &no_inverse, a.get(), m.get(), ctx));
+ EXPECT_BIGNUMS_EQUAL("inv(A) (mod M) (constant-time)", mod_inv.get(),
+ ret.get());
}
static void TestGCD(BIGNUMFileTest *t, BN_CTX *ctx) {
@@ -889,6 +896,28 @@
<< "A^-1 (mod B) computed, but it does not exist";
EXPECT_FALSE(BN_mod_inverse(ret.get(), b.get(), a.get(), ctx))
<< "B^-1 (mod A) computed, but it does not exist";
+
+ if (!BN_is_zero(b.get())) {
+ bssl::UniquePtr<BIGNUM> a_reduced(BN_new());
+ ASSERT_TRUE(a_reduced);
+ ASSERT_TRUE(BN_nnmod(a_reduced.get(), a.get(), b.get(), ctx));
+ int no_inverse;
+ EXPECT_FALSE(bn_mod_inverse_consttime(ret.get(), &no_inverse,
+ a_reduced.get(), b.get(), ctx))
+ << "A^-1 (mod B) computed, but it does not exist";
+ EXPECT_TRUE(no_inverse);
+ }
+
+ if (!BN_is_zero(a.get())) {
+ bssl::UniquePtr<BIGNUM> b_reduced(BN_new());
+ ASSERT_TRUE(b_reduced);
+ ASSERT_TRUE(BN_nnmod(b_reduced.get(), b.get(), a.get(), ctx));
+ int no_inverse;
+ EXPECT_FALSE(bn_mod_inverse_consttime(ret.get(), &no_inverse,
+ b_reduced.get(), a.get(), ctx))
+ << "B^-1 (mod A) computed, but it does not exist";
+ EXPECT_TRUE(no_inverse);
+ }
}
int is_relative_prime;
diff --git a/crypto/fipsmodule/bn/gcd.c b/crypto/fipsmodule/bn/gcd.c
index bc40ec2..7868b40 100644
--- a/crypto/fipsmodule/bn/gcd.c
+++ b/crypto/fipsmodule/bn/gcd.c
@@ -123,6 +123,23 @@
bn_select_words(a, mask, tmp, a, num);
}
+static void maybe_rshift1_words_carry(BN_ULONG *a, BN_ULONG carry,
+ BN_ULONG mask, BN_ULONG *tmp,
+ size_t num) {
+ maybe_rshift1_words(a, mask, tmp, num);
+ if (num != 0) {
+ carry &= mask;
+ a[num - 1] |= carry << (BN_BITS2-1);
+ }
+}
+
+static BN_ULONG maybe_add_words(BN_ULONG *a, BN_ULONG mask, const BN_ULONG *b,
+ BN_ULONG *tmp, size_t num) {
+ BN_ULONG carry = bn_add_words(tmp, a, b, num);
+ bn_select_words(a, mask, tmp, a, num);
+ return carry & mask;
+}
+
static int bn_gcd_consttime(BIGNUM *r, unsigned *out_shift, const BIGNUM *x,
const BIGNUM *y, BN_CTX *ctx) {
size_t width = x->width > y->width ? x->width : y->width;
@@ -243,10 +260,163 @@
return ret;
}
-// solves ax == 1 (mod n)
-static int bn_mod_inverse_general(BIGNUM *out, int *out_no_inverse,
- const BIGNUM *a, const BIGNUM *n,
- BN_CTX *ctx);
+int bn_mod_inverse_consttime(BIGNUM *r, int *out_no_inverse, const BIGNUM *a,
+ const BIGNUM *n, BN_CTX *ctx) {
+ *out_no_inverse = 0;
+ if (BN_is_negative(a) || BN_ucmp(a, n) >= 0) {
+ OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
+ return 0;
+ }
+ if (BN_is_zero(a)) {
+ if (BN_is_one(n)) {
+ BN_zero(r);
+ return 1;
+ }
+ *out_no_inverse = 1;
+ OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
+ return 0;
+ }
+
+ // This is a constant-time implementation of the extended binary GCD
+ // algorithm. It is adapted from the Handbook of Applied Cryptography, section
+ // 14.4.3, algorithm 14.51, and modified to bound coefficients and avoid
+ // negative numbers.
+ //
+ // For more details and proof of correctness, see
+ // https://github.com/mit-plv/fiat-crypto/pull/333. In particular, see |step|
+ // and |mod_inverse_consttime| for the algorithm in Gallina and see
+ // |mod_inverse_consttime_spec| for the correctness result.
+
+ if (!BN_is_odd(a) && !BN_is_odd(n)) {
+ *out_no_inverse = 1;
+ OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
+ return 0;
+ }
+
+ // This function exists to compute the RSA private exponent, where |a| is one
+ // word. We'll thus use |a_width| when available.
+ size_t n_width = n->width, a_width = a->width;
+ if (a_width > n_width) {
+ a_width = n_width;
+ }
+
+ int ret = 0;
+ BN_CTX_start(ctx);
+ BIGNUM *u = BN_CTX_get(ctx);
+ BIGNUM *v = BN_CTX_get(ctx);
+ BIGNUM *A = BN_CTX_get(ctx);
+ BIGNUM *B = BN_CTX_get(ctx);
+ BIGNUM *C = BN_CTX_get(ctx);
+ BIGNUM *D = BN_CTX_get(ctx);
+ BIGNUM *tmp = BN_CTX_get(ctx);
+ BIGNUM *tmp2 = BN_CTX_get(ctx);
+ if (u == NULL || v == NULL || A == NULL || B == NULL || C == NULL ||
+ D == NULL || tmp == NULL || tmp2 == NULL ||
+ !BN_copy(u, a) ||
+ !BN_copy(v, n) ||
+ !BN_one(A) ||
+ !BN_one(D) ||
+ // For convenience, size |u| and |v| equivalently.
+ !bn_resize_words(u, n_width) ||
+ !bn_resize_words(v, n_width) ||
+ // |A| and |C| are bounded by |m|.
+ !bn_resize_words(A, n_width) ||
+ !bn_resize_words(C, n_width) ||
+ // |B| and |D| are bounded by |a|.
+ !bn_resize_words(B, a_width) ||
+ !bn_resize_words(D, a_width) ||
+ // |tmp| and |tmp2| may be used at either size.
+ !bn_resize_words(tmp, n_width) ||
+ !bn_resize_words(tmp2, n_width)) {
+ goto err;
+ }
+
+ // Each loop iteration halves at least one of |u| and |v|. Thus we need at
+ // most the combined bit width of inputs for at least one value to be zero.
+ unsigned a_bits = a_width * BN_BITS2, n_bits = n_width * BN_BITS2;
+ unsigned num_iters = a_bits + n_bits;
+ if (num_iters < a_bits) {
+ OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
+ goto err;
+ }
+
+ // Before and after each loop iteration, the following hold:
+ //
+ // u = A*a - B*n
+ // v = D*n - C*a
+ // 0 < u <= a
+ // 0 <= v <= n
+ // 0 <= A < n
+ // 0 <= B <= a
+ // 0 <= C < n
+ // 0 <= D <= a
+ //
+ // After each loop iteration, u and v only get smaller, and at least one of
+ // them shrinks by at least a factor of two.
+ for (unsigned i = 0; i < num_iters; i++) {
+ BN_ULONG both_odd = word_is_odd_mask(u->d[0]) & word_is_odd_mask(v->d[0]);
+
+ // If both |u| and |v| are odd, subtract the smaller from the larger.
+ BN_ULONG v_less_than_u =
+ (BN_ULONG)0 - bn_sub_words(tmp->d, v->d, u->d, n_width);
+ bn_select_words(v->d, both_odd & ~v_less_than_u, tmp->d, v->d, n_width);
+ bn_sub_words(tmp->d, u->d, v->d, n_width);
+ bn_select_words(u->d, both_odd & v_less_than_u, tmp->d, u->d, n_width);
+
+ // If we updated one of the values, update the corresponding coefficient.
+ BN_ULONG carry = bn_add_words(tmp->d, A->d, C->d, n_width);
+ carry -= bn_sub_words(tmp2->d, tmp->d, n->d, n_width);
+ bn_select_words(tmp->d, carry, tmp->d, tmp2->d, n_width);
+ bn_select_words(A->d, both_odd & v_less_than_u, tmp->d, A->d, n_width);
+ bn_select_words(C->d, both_odd & ~v_less_than_u, tmp->d, C->d, n_width);
+
+ bn_add_words(tmp->d, B->d, D->d, a_width);
+ bn_sub_words(tmp2->d, tmp->d, a->d, a_width);
+ bn_select_words(tmp->d, carry, tmp->d, tmp2->d, a_width);
+ bn_select_words(B->d, both_odd & v_less_than_u, tmp->d, B->d, a_width);
+ bn_select_words(D->d, both_odd & ~v_less_than_u, tmp->d, D->d, a_width);
+
+ // Our loop invariants hold at this point. Additionally, exactly one of |u|
+ // and |v| is now even.
+ BN_ULONG u_is_even = ~word_is_odd_mask(u->d[0]);
+ BN_ULONG v_is_even = ~word_is_odd_mask(v->d[0]);
+ assert(u_is_even != v_is_even);
+
+ // Halve the even one and adjust the corresponding coefficient.
+ maybe_rshift1_words(u->d, u_is_even, tmp->d, n_width);
+ BN_ULONG A_or_B_is_odd =
+ word_is_odd_mask(A->d[0]) | word_is_odd_mask(B->d[0]);
+ BN_ULONG A_carry =
+ maybe_add_words(A->d, A_or_B_is_odd & u_is_even, n->d, tmp->d, n_width);
+ BN_ULONG B_carry =
+ maybe_add_words(B->d, A_or_B_is_odd & u_is_even, a->d, tmp->d, a_width);
+ maybe_rshift1_words_carry(A->d, A_carry, u_is_even, tmp->d, n_width);
+ maybe_rshift1_words_carry(B->d, B_carry, u_is_even, tmp->d, a_width);
+
+ maybe_rshift1_words(v->d, v_is_even, tmp->d, n_width);
+ BN_ULONG C_or_D_is_odd =
+ word_is_odd_mask(C->d[0]) | word_is_odd_mask(D->d[0]);
+ BN_ULONG C_carry =
+ maybe_add_words(C->d, C_or_D_is_odd & v_is_even, n->d, tmp->d, n_width);
+ BN_ULONG D_carry =
+ maybe_add_words(D->d, C_or_D_is_odd & v_is_even, a->d, tmp->d, a_width);
+ maybe_rshift1_words_carry(C->d, C_carry, v_is_even, tmp->d, n_width);
+ maybe_rshift1_words_carry(D->d, D_carry, v_is_even, tmp->d, a_width);
+ }
+
+ assert(BN_is_zero(v));
+ if (!BN_is_one(u)) {
+ *out_no_inverse = 1;
+ OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
+ goto err;
+ }
+
+ ret = BN_copy(r, A) != NULL;
+
+err:
+ BN_CTX_end(ctx);
+ return ret;
+}
int BN_mod_inverse_odd(BIGNUM *out, int *out_no_inverse, const BIGNUM *a,
const BIGNUM *n, BN_CTX *ctx) {
@@ -442,7 +612,7 @@
int no_inverse;
if (!BN_is_odd(n)) {
- if (!bn_mod_inverse_general(out, &no_inverse, a, n, ctx)) {
+ if (!bn_mod_inverse_consttime(out, &no_inverse, a, n, ctx)) {
goto err;
}
} else if (!BN_mod_inverse_odd(out, &no_inverse, a, n, ctx)) {
@@ -488,139 +658,6 @@
return ret;
}
-// bn_mod_inverse_general is the general inversion algorithm that works for
-// both even and odd |n|. It was specifically designed to contain fewer
-// branches that may leak sensitive information; see "New Branch Prediction
-// Vulnerabilities in OpenSSL and Necessary Software Countermeasures" by
-// Onur Acıçmez, Shay Gueron, and Jean-Pierre Seifert.
-static int bn_mod_inverse_general(BIGNUM *out, int *out_no_inverse,
- const BIGNUM *a, const BIGNUM *n,
- BN_CTX *ctx) {
- BIGNUM *A, *B, *X, *Y, *M, *D, *T;
- int ret = 0;
- int sign;
-
- *out_no_inverse = 0;
-
- BN_CTX_start(ctx);
- A = BN_CTX_get(ctx);
- B = BN_CTX_get(ctx);
- X = BN_CTX_get(ctx);
- D = BN_CTX_get(ctx);
- M = BN_CTX_get(ctx);
- Y = BN_CTX_get(ctx);
- T = BN_CTX_get(ctx);
- if (T == NULL) {
- goto err;
- }
-
- BIGNUM *R = out;
-
- BN_zero(Y);
- if (!BN_one(X) || BN_copy(B, a) == NULL || BN_copy(A, n) == NULL) {
- goto err;
- }
- A->neg = 0;
-
- sign = -1;
- // From B = a mod |n|, A = |n| it follows that
- //
- // 0 <= B < A,
- // -sign*X*a == B (mod |n|),
- // sign*Y*a == A (mod |n|).
-
- while (!BN_is_zero(B)) {
- BIGNUM *tmp;
-
- // 0 < B < A,
- // (*) -sign*X*a == B (mod |n|),
- // sign*Y*a == A (mod |n|)
-
- // (D, M) := (A/B, A%B) ...
- if (!BN_div(D, M, A, B, ctx)) {
- goto err;
- }
-
- // Now
- // A = D*B + M;
- // thus we have
- // (**) sign*Y*a == D*B + M (mod |n|).
-
- tmp = A; // keep the BIGNUM object, the value does not matter
-
- // (A, B) := (B, A mod B) ...
- A = B;
- B = M;
- // ... so we have 0 <= B < A again
-
- // Since the former M is now B and the former B is now A,
- // (**) translates into
- // sign*Y*a == D*A + B (mod |n|),
- // i.e.
- // sign*Y*a - D*A == B (mod |n|).
- // Similarly, (*) translates into
- // -sign*X*a == A (mod |n|).
- //
- // Thus,
- // sign*Y*a + D*sign*X*a == B (mod |n|),
- // i.e.
- // sign*(Y + D*X)*a == B (mod |n|).
- //
- // So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
- // -sign*X*a == B (mod |n|),
- // sign*Y*a == A (mod |n|).
- // Note that X and Y stay non-negative all the time.
-
- if (!BN_mul(tmp, D, X, ctx)) {
- goto err;
- }
- if (!BN_add(tmp, tmp, Y)) {
- goto err;
- }
-
- M = Y; // keep the BIGNUM object, the value does not matter
- Y = X;
- X = tmp;
- sign = -sign;
- }
-
- if (!BN_is_one(A)) {
- *out_no_inverse = 1;
- OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
- goto err;
- }
-
- // The while loop (Euclid's algorithm) ends when
- // A == gcd(a,n);
- // we have
- // sign*Y*a == A (mod |n|),
- // where Y is non-negative.
-
- if (sign < 0) {
- if (!BN_sub(Y, n, Y)) {
- goto err;
- }
- }
- // Now Y*a == A (mod |n|).
-
- // Y*a == 1 (mod |n|)
- if (!Y->neg && BN_ucmp(Y, n) < 0) {
- if (!BN_copy(R, Y)) {
- goto err;
- }
- } else {
- if (!BN_nnmod(R, Y, n, ctx)) {
- goto err;
- }
- }
-
- ret = 1;
-
-err:
- BN_CTX_end(ctx);
- return ret;
-}
-
int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
BN_CTX *ctx, const BN_MONT_CTX *mont_p) {
BN_CTX_start(ctx);
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 79e5a41..a8ad129 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -342,18 +342,6 @@
#error "Either BN_ULLONG or BN_UMULT_LOHI must be defined on every platform."
#endif
-// bn_mod_inverse_prime sets |out| to the modular inverse of |a| modulo |p|,
-// computed with Fermat's Little Theorem. It returns one on success and zero on
-// error. If |mont_p| is NULL, one will be computed temporarily.
-int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
- BN_CTX *ctx, const BN_MONT_CTX *mont_p);
-
-// bn_mod_inverse_secret_prime behaves like |bn_mod_inverse_prime| but uses
-// |BN_mod_exp_mont_consttime| instead of |BN_mod_exp_mont| in hopes of
-// protecting the exponent.
-int bn_mod_inverse_secret_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
- BN_CTX *ctx, const BN_MONT_CTX *mont_p);
-
// bn_jacobi returns the Jacobi symbol of |a| and |b| (which is -1, 0 or 1), or
// -2 on error.
int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
@@ -463,6 +451,30 @@
int bn_mod_lshift_consttime(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
BN_CTX *ctx);
+// bn_mod_inverse_consttime sets |r| to |a|^-1, mod |n|. |a| must be non-
+// negative and less than |n|. It returns one on success and zero on error. On
+// failure, if the failure was caused by |a| having no inverse mod |n| then
+// |*out_no_inverse| will be set to one; otherwise it will be set to zero.
+//
+// This function treats both |a| and |n| as secret, provided they are both non-
+// zero and the inverse exists. It should only be used for even moduli where
+// none of the less general implementations are applicable.
+OPENSSL_EXPORT int bn_mod_inverse_consttime(BIGNUM *r, int *out_no_inverse,
+ const BIGNUM *a, const BIGNUM *n,
+ BN_CTX *ctx);
+
+// bn_mod_inverse_prime sets |out| to the modular inverse of |a| modulo |p|,
+// computed with Fermat's Little Theorem. It returns one on success and zero on
+// error. If |mont_p| is NULL, one will be computed temporarily.
+int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
+ BN_CTX *ctx, const BN_MONT_CTX *mont_p);
+
+// bn_mod_inverse_secret_prime behaves like |bn_mod_inverse_prime| but uses
+// |BN_mod_exp_mont_consttime| instead of |BN_mod_exp_mont| in hopes of
+// protecting the exponent.
+int bn_mod_inverse_secret_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
+ BN_CTX *ctx, const BN_MONT_CTX *mont_p);
+
// Low-level operations for small numbers.
//