Don't use BN_mod_inverse for inverses mod p in RSA keygen.

Instead, use BN_mod_exp_mont_consttime of p - 2. This removes two more
call sites sensitive to BN_FLG_CONSTTIME. We're down to just that last
BN_mod_inverse modulo φ(n). (Sort of. It's actually not sensitive
because even mod inverses always hit the other codepath. Perhaps we
should just leave it alone.)

Note this comes with a slight behavior change. The BN_MONT_CTXs are
initialized a little earlier. If a caller calls RSA_generate_* and then
reaches into the struct to scrap all the fields on it, they'll get
confused. Before, they had to perform an operation on it to get
confused. This is a completely ridiculous thing to do.

Since we do this a lot, this introduces some convenience functions for
doing the Fermat's Little Theorem mod inverse and fixes a leak in the
DSA code should computing kinv hit a malloc error.

BUG=125

Change-Id: Iafcae2fc6fd379d161f015c90ff7050e2282e905
Reviewed-on: https://boringssl-review.googlesource.com/12925
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/crypto/bn/gcd.c b/crypto/bn/gcd.c
index 7280963..9e62da0 100644
--- a/crypto/bn/gcd.c
+++ b/crypto/bn/gcd.c
@@ -615,3 +615,27 @@
   BN_CTX_end(ctx);
   return ret;
 }
+
+int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
+                         BN_CTX *ctx, const BN_MONT_CTX *mont_p) {
+  BN_CTX_start(ctx);
+  BIGNUM *p_minus_2 = BN_CTX_get(ctx);
+  int ok = p_minus_2 != NULL &&
+           BN_copy(p_minus_2, p) &&
+           BN_sub_word(p_minus_2, 2) &&
+           BN_mod_exp_mont(out, a, p_minus_2, p, ctx, mont_p);
+  BN_CTX_end(ctx);
+  return ok;
+}
+
+int bn_mod_inverse_secret_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
+                                BN_CTX *ctx, const BN_MONT_CTX *mont_p) {
+  BN_CTX_start(ctx);
+  BIGNUM *p_minus_2 = BN_CTX_get(ctx);
+  int ok = p_minus_2 != NULL &&
+           BN_copy(p_minus_2, p) &&
+           BN_sub_word(p_minus_2, 2) &&
+           BN_mod_exp_mont_consttime(out, a, p_minus_2, p, ctx, mont_p);
+  BN_CTX_end(ctx);
+  return ok;
+}
diff --git a/crypto/bn/internal.h b/crypto/bn/internal.h
index f2141d7..1ee29fd 100644
--- a/crypto/bn/internal.h
+++ b/crypto/bn/internal.h
@@ -238,6 +238,18 @@
 #error "Either BN_ULLONG or BN_UMULT_LOHI must be defined on every platform."
 #endif
 
+/* bn_mod_inverse_prime sets |out| to the modular inverse of |a| modulo |p|,
+ * computed with Fermat's Little Theorem. It returns one on success and zero on
+ * error. If |mont_p| is NULL, one will be computed temporarily. */
+int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
+                         BN_CTX *ctx, const BN_MONT_CTX *mont_p);
+
+/* bn_mod_inverse_secret_prime behaves like |bn_mod_inverse_prime| but uses
+ * |BN_mod_exp_mont_consttime| instead of |BN_mod_exp_mont| in hopes of
+ * protecting the exponent. */
+int bn_mod_inverse_secret_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
+                                BN_CTX *ctx, const BN_MONT_CTX *mont_p);
+
 
 #if defined(__cplusplus)
 }  /* extern C */
diff --git a/crypto/dsa/dsa.c b/crypto/dsa/dsa.c
index 65e4091..15583be 100644
--- a/crypto/dsa/dsa.c
+++ b/crypto/dsa/dsa.c
@@ -72,6 +72,7 @@
 #include <openssl/sha.h>
 #include <openssl/thread.h>
 
+#include "../bn/internal.h"
 #include "../internal.h"
 
 
@@ -814,7 +815,7 @@
 int DSA_sign_setup(const DSA *dsa, BN_CTX *ctx_in, BIGNUM **out_kinv,
                    BIGNUM **out_r) {
   BN_CTX *ctx;
-  BIGNUM k, kq, qm2, *kinv = NULL, *r = NULL;
+  BIGNUM k, kq, *kinv = NULL, *r = NULL;
   int ret = 0;
 
   if (!dsa->p || !dsa->q || !dsa->g) {
@@ -824,7 +825,6 @@
 
   BN_init(&k);
   BN_init(&kq);
-  BN_init(&qm2);
 
   ctx = ctx_in;
   if (ctx == NULL) {
@@ -886,9 +886,7 @@
    * Theorem. */
   kinv = BN_new();
   if (kinv == NULL ||
-      !BN_set_word(&qm2, 2) ||
-      !BN_sub(&qm2, dsa->q, &qm2) ||
-      !BN_mod_exp_mont(kinv, &k, &qm2, dsa->q, ctx, dsa->method_mont_q)) {
+      !bn_mod_inverse_prime(kinv, &k, dsa->q, ctx, dsa->method_mont_q)) {
     goto err;
   }
 
@@ -912,7 +910,7 @@
   }
   BN_clear_free(&k);
   BN_clear_free(&kq);
-  BN_free(&qm2);
+  BN_clear_free(kinv);
   return ret;
 }
 
diff --git a/crypto/ec/ec_montgomery.c b/crypto/ec/ec_montgomery.c
index 1253a73..4643fd2 100644
--- a/crypto/ec/ec_montgomery.c
+++ b/crypto/ec/ec_montgomery.c
@@ -71,6 +71,7 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>
 
+#include "../bn/internal.h"
 #include "internal.h"
 
 
@@ -230,11 +231,9 @@
     BIGNUM *Z_1 = BN_CTX_get(ctx);
     BIGNUM *Z_2 = BN_CTX_get(ctx);
     BIGNUM *Z_3 = BN_CTX_get(ctx);
-    BIGNUM *field_minus_2 = BN_CTX_get(ctx);
     if (Z_1 == NULL ||
         Z_2 == NULL ||
-        Z_3 == NULL ||
-        field_minus_2 == NULL) {
+        Z_3 == NULL) {
       goto err;
     }
 
@@ -247,16 +246,12 @@
      * is more efficient (at least in theory) than |BN_to_montgomery|, since it
      * doesn't have to do the multiplication before the reduction.
      *
-     * Use Fermat's Little Theorem with |BN_mod_exp_mont_consttime| instead of
-     * |BN_mod_inverse_odd| since this inversion may be done as the final step
-     * of private key operations. Unfortunately, this is suboptimal for ECDSA
-     * verification. */
+     * Use Fermat's Little Theorem instead of |BN_mod_inverse_odd| since this
+     * inversion may be done as the final step of private key operations.
+     * Unfortunately, this is suboptimal for ECDSA verification. */
     if (!BN_from_montgomery(Z_1, &point->Z, group->mont, ctx) ||
         !BN_from_montgomery(Z_1, Z_1, group->mont, ctx) ||
-        !BN_copy(field_minus_2, &group->field) ||
-        !BN_sub_word(field_minus_2, 2) ||
-        !BN_mod_exp_mont_consttime(Z_1, Z_1, field_minus_2, &group->field,
-                                   ctx, group->mont)) {
+        !bn_mod_inverse_prime(Z_1, Z_1, &group->field, ctx, group->mont)) {
       goto err;
     }
 
diff --git a/crypto/ecdsa/ecdsa.c b/crypto/ecdsa/ecdsa.c
index b4e6d0d..3432081 100644
--- a/crypto/ecdsa/ecdsa.c
+++ b/crypto/ecdsa/ecdsa.c
@@ -60,6 +60,7 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>
 
+#include "../bn/internal.h"
 #include "../ec/internal.h"
 #include "../internal.h"
 
@@ -309,12 +310,9 @@
   } while (BN_is_zero(r));
 
   /* Compute the inverse of k. The order is a prime, so use Fermat's Little
-   * Theorem. */
-  if (!BN_set_word(tmp, 2) ||
-      !BN_sub(tmp, order, tmp) ||
-      /* Note |ec_group_get_mont_data| may return NULL but |BN_mod_exp_mont|
-       * allows it to be. */
-      !BN_mod_exp_mont(k, k, tmp, order, ctx, ec_group_get_mont_data(group))) {
+   * Theorem. Note |ec_group_get_mont_data| may return NULL but
+   * |bn_mod_inverse_prime| allows this. */
+  if (!bn_mod_inverse_prime(k, k, order, ctx, ec_group_get_mont_data(group))) {
     OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
     goto err;
   }
diff --git a/crypto/rsa/rsa_impl.c b/crypto/rsa/rsa_impl.c
index 2cae4ca..3834be5 100644
--- a/crypto/rsa/rsa_impl.c
+++ b/crypto/rsa/rsa_impl.c
@@ -65,6 +65,7 @@
 #include <openssl/thread.h>
 
 #include "internal.h"
+#include "../bn/internal.h"
 #include "../internal.h"
 
 
@@ -1014,10 +1015,14 @@
     goto err;
   }
 
-  /* calculate inverse of q mod p */
+  /* Calculate inverse of q mod p. Note that although RSA key generation is far
+   * from constant-time, |bn_mod_inverse_secret_prime| uses the same modular
+   * exponentation logic as in RSA private key operations and, if the RSAZ-1024
+   * code is enabled, will be optimized for common RSA prime sizes. */
   p = &local_p;
   BN_with_flags(p, rsa->p, BN_FLG_CONSTTIME);
-  if (!BN_mod_inverse(rsa->iqmp, rsa->q, p, ctx)) {
+  if (!BN_MONT_CTX_set_locked(&rsa->mont_p, &rsa->lock, rsa->p, ctx) ||
+      !bn_mod_inverse_secret_prime(rsa->iqmp, rsa->q, p, ctx, rsa->mont_p)) {
     goto err;
   }
 
@@ -1026,7 +1031,9 @@
         sk_RSA_additional_prime_value(additional_primes, i - 2);
     if (!BN_sub(ap->exp, ap->prime, BN_value_one()) ||
         !BN_mod(ap->exp, rsa->d, ap->exp, ctx) ||
-        !BN_mod_inverse(ap->coeff, ap->r, ap->prime, ctx)) {
+        !BN_MONT_CTX_set_locked(&ap->mont, &rsa->lock, ap->prime, ctx) ||
+        !bn_mod_inverse_secret_prime(ap->coeff, ap->r, ap->prime, ctx,
+                                     ap->mont)) {
       goto err;
     }
   }