Deduplicate built-in curves and give custom curves an order_mont.

I still need to revive the original CL, but right now I'm interested in
giving every EC_GROUP an order_mont and having different ownership of
that field between built-in and custom groups is kind of a nuisance. If
I'm going to do that anyway, better to avoid computing the entire
EC_GROUP in one go.

I'm using some manual locking rather than CRYPTO_once here so that it
behaves well in the face of malloc errors. Not that we especially care,
but it was easy to do.

This speeds up our ECDH benchmark a bit which otherwise must construct the
EC_GROUP each time (matching real world usage).

Before:
Did 7619 ECDH P-224 operations in 1003190us (7594.8 ops/sec)
Did 7518 ECDH P-256 operations in 1060844us (7086.8 ops/sec)
Did 572 ECDH P-384 operations in 1055878us (541.7 ops/sec)
Did 264 ECDH P-521 operations in 1062375us (248.5 ops/sec)

After:
Did 8415 ECDH P-224 operations in 1066695us (7888.9 ops/sec)
Did 7952 ECDH P-256 operations in 1022819us (7774.6 ops/sec)
Did 572 ECDH P-384 operations in 1055817us (541.8 ops/sec)
Did 264 ECDH P-521 operations in 1060008us (249.1 ops/sec)

Bug: 20
Change-Id: I7446cd0a69a840551dcc2dfabadde8ee1e3ff3e2
Reviewed-on: https://boringssl-review.googlesource.com/23073
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/ec/ec.c b/crypto/fipsmodule/ec/ec.c
index 4916b1d..14c7450 100644
--- a/crypto/fipsmodule/ec/ec.c
+++ b/crypto/fipsmodule/ec/ec.c
@@ -279,63 +279,6 @@
 #endif
 }
 
-// built_in_curve_scalar_field_monts contains Montgomery contexts for
-// performing inversions in the scalar fields of each of the built-in
-// curves. It's protected by |built_in_curve_scalar_field_monts_once|.
-DEFINE_LOCAL_DATA(BN_MONT_CTX **, built_in_curve_scalar_field_monts) {
-  const struct built_in_curves *const curves = OPENSSL_built_in_curves();
-
-  BN_MONT_CTX **monts =
-      OPENSSL_malloc(sizeof(BN_MONT_CTX *) * OPENSSL_NUM_BUILT_IN_CURVES);
-  if (monts == NULL) {
-    return;
-  }
-
-  OPENSSL_memset(monts, 0, sizeof(BN_MONT_CTX *) * OPENSSL_NUM_BUILT_IN_CURVES);
-
-  BIGNUM *order = BN_new();
-  BN_CTX *bn_ctx = BN_CTX_new();
-  BN_MONT_CTX *mont_ctx = NULL;
-
-  if (bn_ctx == NULL ||
-      order == NULL) {
-    goto err;
-  }
-
-  for (size_t i = 0; i < OPENSSL_NUM_BUILT_IN_CURVES; i++) {
-    const struct built_in_curve *curve = &curves->curves[i];
-    const unsigned param_len = curve->param_len;
-    const uint8_t *params = curve->params;
-
-    mont_ctx = BN_MONT_CTX_new();
-    if (mont_ctx == NULL) {
-      goto err;
-    }
-
-    if (!BN_bin2bn(params + 5 * param_len, param_len, order) ||
-        !BN_MONT_CTX_set(mont_ctx, order, bn_ctx)) {
-      goto err;
-    }
-
-    monts[i] = mont_ctx;
-    mont_ctx = NULL;
-  }
-
-  *out = monts;
-  goto done;
-
-err:
-  BN_MONT_CTX_free(mont_ctx);
-  for (size_t i = 0; i < OPENSSL_NUM_BUILT_IN_CURVES; i++) {
-    BN_MONT_CTX_free(monts[i]);
-  }
-  OPENSSL_free((BN_MONT_CTX**) monts);
-
-done:
-  BN_free(order);
-  BN_CTX_free(bn_ctx);
-}
-
 EC_GROUP *ec_group_new(const EC_METHOD *meth) {
   EC_GROUP *ret;
 
@@ -462,13 +405,18 @@
     return 0;
   }
 
+  BN_MONT_CTX_free(group->order_mont);
+  group->order_mont = BN_MONT_CTX_new();
+  if (group->order_mont == NULL ||
+      !BN_MONT_CTX_set(group->order_mont, &group->order, NULL)) {
+    return 0;
+  }
+
   ec_group_set0_generator(group, copy);
   return 1;
 }
 
-static EC_GROUP *ec_group_new_from_data(unsigned built_in_index) {
-  const struct built_in_curves *const curves = OPENSSL_built_in_curves();
-  const struct built_in_curve *curve = &curves->curves[built_in_index];
+static EC_GROUP *ec_group_new_from_data(const struct built_in_curve *curve) {
   EC_GROUP *group = NULL;
   EC_POINT *P = NULL;
   BIGNUM *p = NULL, *a = NULL, *b = NULL, *x = NULL, *y = NULL;
@@ -517,9 +465,11 @@
     goto err;
   }
 
-  const BN_MONT_CTX **monts = *built_in_curve_scalar_field_monts();
-  if (monts != NULL) {
-    group->order_mont = monts[built_in_index];
+  group->order_mont = BN_MONT_CTX_new();
+  if (group->order_mont == NULL ||
+      !BN_MONT_CTX_set(group->order_mont, &group->order, ctx)) {
+    OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
+    goto err;
   }
 
   ec_group_set0_generator(group, P);
@@ -541,29 +491,65 @@
   return group;
 }
 
-EC_GROUP *EC_GROUP_new_by_curve_name(int nid) {
-  const struct built_in_curves *const curves = OPENSSL_built_in_curves();
-  EC_GROUP *ret = NULL;
+// Built-in groups are allocated lazily and static once allocated.
+// TODO(davidben): Make these actually static. https://crbug.com/boringssl/20.
+struct built_in_groups_st {
+  EC_GROUP *groups[OPENSSL_NUM_BUILT_IN_CURVES];
+};
+DEFINE_BSS_GET(struct built_in_groups_st, built_in_groups);
+DEFINE_STATIC_MUTEX(built_in_groups_lock);
 
+EC_GROUP *EC_GROUP_new_by_curve_name(int nid) {
+  struct built_in_groups_st *groups = built_in_groups_bss_get();
+  EC_GROUP **group_ptr = NULL;
+  const struct built_in_curves *const curves = OPENSSL_built_in_curves();
+  const struct built_in_curve *curve = NULL;
   for (size_t i = 0; i < OPENSSL_NUM_BUILT_IN_CURVES; i++) {
-    const struct built_in_curve *curve = &curves->curves[i];
-    if (curve->nid == nid) {
-      ret = ec_group_new_from_data(i);
+    if (curves->curves[i].nid == nid) {
+      curve = &curves->curves[i];
+      group_ptr = &groups->groups[i];
       break;
     }
   }
 
-  if (ret == NULL) {
+  if (curve == NULL) {
     OPENSSL_PUT_ERROR(EC, EC_R_UNKNOWN_GROUP);
     return NULL;
   }
 
-  ret->curve_name = nid;
+  CRYPTO_STATIC_MUTEX_lock_read(built_in_groups_lock_bss_get());
+  EC_GROUP *ret = *group_ptr;
+  CRYPTO_STATIC_MUTEX_unlock_read(built_in_groups_lock_bss_get());
+  if (ret != NULL) {
+    return ret;
+  }
+
+  ret = ec_group_new_from_data(curve);
+  if (ret == NULL) {
+    return NULL;
+  }
+
+  EC_GROUP *to_free = NULL;
+  CRYPTO_STATIC_MUTEX_lock_write(built_in_groups_lock_bss_get());
+  if (*group_ptr == NULL) {
+    *group_ptr = ret;
+    // Filling in |ret->curve_name| makes |EC_GROUP_free| and |EC_GROUP_dup|
+    // into no-ops. At this point, |ret| is considered static.
+    ret->curve_name = nid;
+  } else {
+    to_free = ret;
+    ret = *group_ptr;
+  }
+  CRYPTO_STATIC_MUTEX_unlock_write(built_in_groups_lock_bss_get());
+
+  EC_GROUP_free(to_free);
   return ret;
 }
 
 void EC_GROUP_free(EC_GROUP *group) {
   if (group == NULL ||
+      // Built-in curves are static.
+      group->curve_name != NID_undef ||
       !CRYPTO_refcount_dec_and_test_zero(&group->references)) {
     return;
   }
@@ -574,17 +560,16 @@
 
   ec_point_free(group->generator, 0 /* don't free group */);
   BN_free(&group->order);
+  BN_MONT_CTX_free(group->order_mont);
 
   OPENSSL_free(group);
 }
 
-const BN_MONT_CTX *ec_group_get_order_mont(const EC_GROUP *group) {
-  return group->order_mont;
-}
-
 EC_GROUP *EC_GROUP_dup(const EC_GROUP *a) {
-  if (a == NULL) {
-    return NULL;
+  if (a == NULL ||
+      // Built-in curves are static.
+      a->curve_name != NID_undef) {
+    return (EC_GROUP *)a;
   }
 
   // Groups are logically immutable (but for |EC_GROUP_set_generator| which must
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
index 65ce61f..22c8e45 100644
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -132,7 +132,7 @@
 
   int curve_name;  // optional NID for named curve
 
-  const BN_MONT_CTX *order_mont;  // data for ECDSA inverse
+  BN_MONT_CTX *order_mont;  // data for ECDSA inverse
 
   // The following members are handled by the method functions,
   // even if they appear generic
@@ -163,11 +163,6 @@
 
 EC_GROUP *ec_group_new(const EC_METHOD *meth);
 
-// ec_group_get_order_mont returns a Montgomery context for operations modulo
-// |group|'s order. It may return NULL in the case that |group| is not a
-// built-in group.
-const BN_MONT_CTX *ec_group_get_order_mont(const EC_GROUP *group);
-
 int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
                 const EC_POINT *p, const BIGNUM *p_scalar, BN_CTX *ctx);
 
diff --git a/crypto/fipsmodule/ecdsa/ecdsa.c b/crypto/fipsmodule/ecdsa/ecdsa.c
index 0e2adb6..8d01c6f 100644
--- a/crypto/fipsmodule/ecdsa/ecdsa.c
+++ b/crypto/fipsmodule/ecdsa/ecdsa.c
@@ -278,10 +278,8 @@
     }
 
     // Compute the inverse of k. The order is a prime, so use Fermat's Little
-    // Theorem. Note |ec_group_get_order_mont| may return NULL but
-    // |bn_mod_inverse_prime| allows this.
-    if (!bn_mod_inverse_prime(kinv, k, order, ctx,
-                              ec_group_get_order_mont(group))) {
+    // Theorem.
+    if (!bn_mod_inverse_prime(kinv, k, order, ctx, group->order_mont)) {
       OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
       goto err;
     }