Remove LCM dependency from RSA_check_key.

Instead of checking d * e = 1 (mod lcm(p-1, q-1)), we can separately
check d * e = 1 (mod p-1) and d * e = 1 (mod q-1). This drops an LCM
dependency from key import and is 2x faster.

(Our constant-time LCM implementation can probably be faster if we
tried, but now it's only used in RSA keygen, so it doesn't matter much.
It's also using the unoptimized constant-time division, which is
probably the next target if we decide we care about speeding this up.)

Before:
Did 6768 RSA 2048 checking operations in 3015824us (2244.2 ops/sec)
Did 5610 RSA 2048 signing operations in 3033396us (1849.4 ops/sec)
Did 1953 RSA 4096 checking operations in 3060828us (638.1 ops/sec)
Did 817 RSA 4096 signing operations in 3021092us (270.4 ops/sec)
After:
Did 13175 RSA 2048 checking operations in 3090576us (4263.0 ops/sec)
Did 5610 RSA 2048 signing operations in 3032966us (1849.7 ops/sec)
Did 3720 RSA 4096 checking operations in 3085971us (1205.5 ops/sec)
Did 820 RSA 4096 signing operations in 3027312us (270.9 ops/sec)

Bug: 316
Change-Id: Ie29554c02d31f586dd0f8bdee03a227f1d5dc916
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/40146
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/rsa/rsa.c b/crypto/fipsmodule/rsa/rsa.c
index a7fb7ae..2929673 100644
--- a/crypto/fipsmodule/rsa/rsa.c
+++ b/crypto/fipsmodule/rsa/rsa.c
@@ -655,7 +655,12 @@
 }
 
 static int check_mod_inverse(int *out_ok, const BIGNUM *a, const BIGNUM *ainv,
-                             const BIGNUM *m, int check_reduced, BN_CTX *ctx) {
+                             const BIGNUM *m, BN_CTX *ctx) {
+  if (BN_is_negative(ainv) || BN_cmp(ainv, m) >= 0) {
+    *out_ok = 0;
+    return 1;
+  }
+
   BN_CTX_start(ctx);
   BIGNUM *tmp = BN_CTX_get(ctx);
   int ret = tmp != NULL &&
@@ -663,19 +668,12 @@
             bn_div_consttime(NULL, tmp, tmp, m, ctx);
   if (ret) {
     *out_ok = BN_is_one(tmp);
-    if (check_reduced && (BN_is_negative(ainv) || BN_cmp(ainv, m) >= 0)) {
-      *out_ok = 0;
-    }
   }
   BN_CTX_end(ctx);
   return ret;
 }
 
 int RSA_check_key(const RSA *key) {
-  BIGNUM n, pm1, qm1, lcm, dmp1, dmq1, iqmp_times_q;
-  BN_CTX *ctx;
-  int ok = 0, has_crt_values;
-
   if (RSA_is_opaque(key)) {
     // Opaque keys can't be checked.
     return 1;
@@ -697,50 +695,53 @@
     return 1;
   }
 
-  ctx = BN_CTX_new();
+  BN_CTX *ctx = BN_CTX_new();
   if (ctx == NULL) {
     OPENSSL_PUT_ERROR(RSA, ERR_R_MALLOC_FAILURE);
     return 0;
   }
 
-  BN_init(&n);
+  BIGNUM tmp, de, pm1, qm1, dmp1, dmq1;
+  int ok = 0;
+  BN_init(&tmp);
+  BN_init(&de);
   BN_init(&pm1);
   BN_init(&qm1);
-  BN_init(&lcm);
   BN_init(&dmp1);
   BN_init(&dmq1);
-  BN_init(&iqmp_times_q);
-
-  int d_ok;
-  if (!bn_mul_consttime(&n, key->p, key->q, ctx) ||
-      // lcm = lcm(p, q)
-      !bn_usub_consttime(&pm1, key->p, BN_value_one()) ||
-      !bn_usub_consttime(&qm1, key->q, BN_value_one()) ||
-      !bn_lcm_consttime(&lcm, &pm1, &qm1, ctx) ||
-      // Other implementations use the Euler totient rather than the Carmichael
-      // totient, so allow unreduced |key->d|.
-      !check_mod_inverse(&d_ok, key->e, key->d, &lcm,
-                         0 /* don't require reduced */, ctx)) {
+  if (!bn_mul_consttime(&tmp, key->p, key->q, ctx)) {
     OPENSSL_PUT_ERROR(RSA, ERR_LIB_BN);
     goto out;
   }
 
-  if (BN_cmp(&n, key->n) != 0) {
+  if (BN_cmp(&tmp, key->n) != 0) {
     OPENSSL_PUT_ERROR(RSA, RSA_R_N_NOT_EQUAL_P_Q);
     goto out;
   }
 
-  if (!d_ok) {
-    OPENSSL_PUT_ERROR(RSA, RSA_R_D_E_NOT_CONGRUENT_TO_1);
-    goto out;
-  }
-
   if (BN_is_negative(key->d) || BN_cmp(key->d, key->n) >= 0) {
     OPENSSL_PUT_ERROR(RSA, RSA_R_D_OUT_OF_RANGE);
     goto out;
   }
 
-  has_crt_values = key->dmp1 != NULL;
+  // d must be an inverse of e mod the Carmichael totient, lcm(p-1, q-1), but it
+  // may be unreduced because other implementations use the Euler totient. We
+  // simply check that d * e is one mod p-1 and mod q-1.
+  if (!bn_usub_consttime(&pm1, key->p, BN_value_one()) ||
+      !bn_usub_consttime(&qm1, key->q, BN_value_one()) ||
+      !bn_mul_consttime(&de, key->d, key->e, ctx) ||
+      !bn_div_consttime(NULL, &tmp, &de, &pm1, ctx) ||
+      !bn_div_consttime(NULL, &de, &de, &qm1, ctx)) {
+    OPENSSL_PUT_ERROR(RSA, ERR_LIB_BN);
+    goto out;
+  }
+
+  if (!BN_is_one(&tmp) || !BN_is_one(&de)) {
+    OPENSSL_PUT_ERROR(RSA, RSA_R_D_E_NOT_CONGRUENT_TO_1);
+    goto out;
+  }
+
+  int has_crt_values = key->dmp1 != NULL;
   if (has_crt_values != (key->dmq1 != NULL) ||
       has_crt_values != (key->iqmp != NULL)) {
     OPENSSL_PUT_ERROR(RSA, RSA_R_INCONSISTENT_SET_OF_CRT_VALUES);
@@ -749,12 +750,9 @@
 
   if (has_crt_values) {
     int dmp1_ok, dmq1_ok, iqmp_ok;
-    if (!check_mod_inverse(&dmp1_ok, key->e, key->dmp1, &pm1,
-                           1 /* check reduced */, ctx) ||
-        !check_mod_inverse(&dmq1_ok, key->e, key->dmq1, &qm1,
-                           1 /* check reduced */, ctx) ||
-        !check_mod_inverse(&iqmp_ok, key->q, key->iqmp, key->p,
-                           1 /* check reduced */, ctx)) {
+    if (!check_mod_inverse(&dmp1_ok, key->e, key->dmp1, &pm1, ctx) ||
+        !check_mod_inverse(&dmq1_ok, key->e, key->dmq1, &qm1, ctx) ||
+        !check_mod_inverse(&iqmp_ok, key->q, key->iqmp, key->p, ctx)) {
       OPENSSL_PUT_ERROR(RSA, ERR_LIB_BN);
       goto out;
     }
@@ -768,13 +766,12 @@
   ok = 1;
 
 out:
-  BN_free(&n);
+  BN_free(&tmp);
+  BN_free(&de);
   BN_free(&pm1);
   BN_free(&qm1);
-  BN_free(&lcm);
   BN_free(&dmp1);
   BN_free(&dmq1);
-  BN_free(&iqmp_times_q);
   BN_CTX_free(ctx);
 
   return ok;