Devirtualize ec_simple_{add,dbl}.

Now that the tuned add/dbl implementations are exposed, these can be
specific to EC_GFp_mont_method and call the felem_mul and felem_sqr
implementations directly.

felem_sqr and felem_mul are still used elsewhere in simple.c, however,
so we cannot get rid of them yet.

Change-Id: I5ea22a8815279931afc98a6fc578bc85e3f8bdcc
Reviewed-on: https://boringssl-review.googlesource.com/c/32849
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/ec/ec_montgomery.c b/crypto/fipsmodule/ec/ec_montgomery.c
index 3fb67e2..8f277b0 100644
--- a/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/crypto/fipsmodule/ec/ec_montgomery.c
@@ -220,15 +220,220 @@
   return 1;
 }
 
+void ec_GFp_mont_add(const EC_GROUP *group, EC_RAW_POINT *out,
+                     const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
+  if (a == b) {
+    ec_GFp_mont_dbl(group, out, a);
+    return;
+  }
+
+  // The method is taken from:
+  //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl
+  //
+  // Coq transcription and correctness proof:
+  // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L467>
+  // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L544>
+  EC_FELEM x_out, y_out, z_out;
+  BN_ULONG z1nz = ec_felem_non_zero_mask(group, &a->Z);
+  BN_ULONG z2nz = ec_felem_non_zero_mask(group, &b->Z);
+
+  // z1z1 = z1z1 = z1**2
+  EC_FELEM z1z1;
+  ec_GFp_mont_felem_sqr(group, &z1z1, &a->Z);
+
+  // z2z2 = z2**2
+  EC_FELEM z2z2;
+  ec_GFp_mont_felem_sqr(group, &z2z2, &b->Z);
+
+  // u1 = x1*z2z2
+  EC_FELEM u1;
+  ec_GFp_mont_felem_mul(group, &u1, &a->X, &z2z2);
+
+  // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
+  EC_FELEM two_z1z2;
+  ec_felem_add(group, &two_z1z2, &a->Z, &b->Z);
+  ec_GFp_mont_felem_sqr(group, &two_z1z2, &two_z1z2);
+  ec_felem_sub(group, &two_z1z2, &two_z1z2, &z1z1);
+  ec_felem_sub(group, &two_z1z2, &two_z1z2, &z2z2);
+
+  // s1 = y1 * z2**3
+  EC_FELEM s1;
+  ec_GFp_mont_felem_mul(group, &s1, &b->Z, &z2z2);
+  ec_GFp_mont_felem_mul(group, &s1, &s1, &a->Y);
+
+  // u2 = x2*z1z1
+  EC_FELEM u2;
+  ec_GFp_mont_felem_mul(group, &u2, &b->X, &z1z1);
+
+  // h = u2 - u1
+  EC_FELEM h;
+  ec_felem_sub(group, &h, &u2, &u1);
+
+  BN_ULONG xneq = ec_felem_non_zero_mask(group, &h);
+
+  // z_out = two_z1z2 * h
+  ec_GFp_mont_felem_mul(group, &z_out, &h, &two_z1z2);
+
+  // z1z1z1 = z1 * z1z1
+  EC_FELEM z1z1z1;
+  ec_GFp_mont_felem_mul(group, &z1z1z1, &a->Z, &z1z1);
+
+  // s2 = y2 * z1**3
+  EC_FELEM s2;
+  ec_GFp_mont_felem_mul(group, &s2, &b->Y, &z1z1z1);
+
+  // r = (s2 - s1)*2
+  EC_FELEM r;
+  ec_felem_sub(group, &r, &s2, &s1);
+  ec_felem_add(group, &r, &r, &r);
+
+  BN_ULONG yneq = ec_felem_non_zero_mask(group, &r);
+
+  // This case will never occur in the constant-time |ec_GFp_mont_mul|.
+  if (!xneq && !yneq && z1nz && z2nz) {
+    ec_GFp_mont_dbl(group, out, a);
+    return;
+  }
+
+  // I = (2h)**2
+  EC_FELEM i;
+  ec_felem_add(group, &i, &h, &h);
+  ec_GFp_mont_felem_sqr(group, &i, &i);
+
+  // J = h * I
+  EC_FELEM j;
+  ec_GFp_mont_felem_mul(group, &j, &h, &i);
+
+  // V = U1 * I
+  EC_FELEM v;
+  ec_GFp_mont_felem_mul(group, &v, &u1, &i);
+
+  // x_out = r**2 - J - 2V
+  ec_GFp_mont_felem_sqr(group, &x_out, &r);
+  ec_felem_sub(group, &x_out, &x_out, &j);
+  ec_felem_sub(group, &x_out, &x_out, &v);
+  ec_felem_sub(group, &x_out, &x_out, &v);
+
+  // y_out = r(V-x_out) - 2 * s1 * J
+  ec_felem_sub(group, &y_out, &v, &x_out);
+  ec_GFp_mont_felem_mul(group, &y_out, &y_out, &r);
+  EC_FELEM s1j;
+  ec_GFp_mont_felem_mul(group, &s1j, &s1, &j);
+  ec_felem_sub(group, &y_out, &y_out, &s1j);
+  ec_felem_sub(group, &y_out, &y_out, &s1j);
+
+  ec_felem_select(group, &x_out, z1nz, &x_out, &b->X);
+  ec_felem_select(group, &out->X, z2nz, &x_out, &a->X);
+  ec_felem_select(group, &y_out, z1nz, &y_out, &b->Y);
+  ec_felem_select(group, &out->Y, z2nz, &y_out, &a->Y);
+  ec_felem_select(group, &z_out, z1nz, &z_out, &b->Z);
+  ec_felem_select(group, &out->Z, z2nz, &z_out, &a->Z);
+}
+
+void ec_GFp_mont_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
+                     const EC_RAW_POINT *a) {
+  if (group->a_is_minus3) {
+    // The method is taken from:
+    //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
+    //
+    // Coq transcription and correctness proof:
+    // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
+    // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
+    EC_FELEM delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
+    // delta = z^2
+    ec_GFp_mont_felem_sqr(group, &delta, &a->Z);
+    // gamma = y^2
+    ec_GFp_mont_felem_sqr(group, &gamma, &a->Y);
+    // beta = x*gamma
+    ec_GFp_mont_felem_mul(group, &beta, &a->X, &gamma);
+
+    // alpha = 3*(x-delta)*(x+delta)
+    ec_felem_sub(group, &ftmp, &a->X, &delta);
+    ec_felem_add(group, &ftmp2, &a->X, &delta);
+
+    ec_felem_add(group, &tmptmp, &ftmp2, &ftmp2);
+    ec_felem_add(group, &ftmp2, &ftmp2, &tmptmp);
+    ec_GFp_mont_felem_mul(group, &alpha, &ftmp, &ftmp2);
+
+    // x' = alpha^2 - 8*beta
+    ec_GFp_mont_felem_sqr(group, &r->X, &alpha);
+    ec_felem_add(group, &fourbeta, &beta, &beta);
+    ec_felem_add(group, &fourbeta, &fourbeta, &fourbeta);
+    ec_felem_add(group, &tmptmp, &fourbeta, &fourbeta);
+    ec_felem_sub(group, &r->X, &r->X, &tmptmp);
+
+    // z' = (y + z)^2 - gamma - delta
+    ec_felem_add(group, &delta, &gamma, &delta);
+    ec_felem_add(group, &ftmp, &a->Y, &a->Z);
+    ec_GFp_mont_felem_sqr(group, &r->Z, &ftmp);
+    ec_felem_sub(group, &r->Z, &r->Z, &delta);
+
+    // y' = alpha*(4*beta - x') - 8*gamma^2
+    ec_felem_sub(group, &r->Y, &fourbeta, &r->X);
+    ec_felem_add(group, &gamma, &gamma, &gamma);
+    ec_GFp_mont_felem_sqr(group, &gamma, &gamma);
+    ec_GFp_mont_felem_mul(group, &r->Y, &alpha, &r->Y);
+    ec_felem_add(group, &gamma, &gamma, &gamma);
+    ec_felem_sub(group, &r->Y, &r->Y, &gamma);
+  } else {
+    // The method is taken from:
+    //   http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl
+    //
+    // Coq transcription and correctness proof:
+    // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L102>
+    // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L534>
+    EC_FELEM xx, yy, yyyy, zz;
+    ec_GFp_mont_felem_sqr(group, &xx, &a->X);
+    ec_GFp_mont_felem_sqr(group, &yy, &a->Y);
+    ec_GFp_mont_felem_sqr(group, &yyyy, &yy);
+    ec_GFp_mont_felem_sqr(group, &zz, &a->Z);
+
+    // s = 2*((x_in + yy)^2 - xx - yyyy)
+    EC_FELEM s;
+    ec_felem_add(group, &s, &a->X, &yy);
+    ec_GFp_mont_felem_sqr(group, &s, &s);
+    ec_felem_sub(group, &s, &s, &xx);
+    ec_felem_sub(group, &s, &s, &yyyy);
+    ec_felem_add(group, &s, &s, &s);
+
+    // m = 3*xx + a*zz^2
+    EC_FELEM m;
+    ec_GFp_mont_felem_sqr(group, &m, &zz);
+    ec_GFp_mont_felem_mul(group, &m, &group->a, &m);
+    ec_felem_add(group, &m, &m, &xx);
+    ec_felem_add(group, &m, &m, &xx);
+    ec_felem_add(group, &m, &m, &xx);
+
+    // x_out = m^2 - 2*s
+    ec_GFp_mont_felem_sqr(group, &r->X, &m);
+    ec_felem_sub(group, &r->X, &r->X, &s);
+    ec_felem_sub(group, &r->X, &r->X, &s);
+
+    // z_out = (y_in + z_in)^2 - yy - zz
+    ec_felem_add(group, &r->Z, &a->Y, &a->Z);
+    ec_GFp_mont_felem_sqr(group, &r->Z, &r->Z);
+    ec_felem_sub(group, &r->Z, &r->Z, &yy);
+    ec_felem_sub(group, &r->Z, &r->Z, &zz);
+
+    // y_out = m*(s-x_out) - 8*yyyy
+    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
+    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
+    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
+    ec_felem_sub(group, &r->Y, &s, &r->X);
+    ec_GFp_mont_felem_mul(group, &r->Y, &r->Y, &m);
+    ec_felem_sub(group, &r->Y, &r->Y, &yyyy);
+  }
+}
+
 DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) {
   out->group_init = ec_GFp_mont_group_init;
   out->group_finish = ec_GFp_mont_group_finish;
   out->group_set_curve = ec_GFp_mont_group_set_curve;
   out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates;
-  out->add = ec_GFp_simple_add;
-  out->dbl = ec_GFp_simple_dbl;
-  out->mul = ec_GFp_simple_mul;
-  out->mul_public = ec_GFp_simple_mul_public;
+  out->add = ec_GFp_mont_add;
+  out->dbl = ec_GFp_mont_dbl;
+  out->mul = ec_GFp_mont_mul;
+  out->mul_public = ec_GFp_mont_mul_public;
   out->felem_mul = ec_GFp_mont_felem_mul;
   out->felem_sqr = ec_GFp_mont_felem_sqr;
   out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
index e51109a..e62ceb8 100644
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -292,9 +292,9 @@
     const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar,
     const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx);
 
-void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r,
-                       const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
-                       const EC_SCALAR *p_scalar);
+void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+                     const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                     const EC_SCALAR *p_scalar);
 
 // ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of
 // |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of
@@ -307,9 +307,9 @@
 void ec_compute_wNAF(const EC_GROUP *group, int8_t *out,
                      const EC_SCALAR *scalar, size_t bits, int w);
 
-void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
-                              const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
-                              const EC_SCALAR *p_scalar);
+void ec_GFp_mont_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
+                            const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                            const EC_SCALAR *p_scalar);
 
 // method functions in simple.c
 int ec_GFp_simple_group_init(EC_GROUP *);
@@ -325,10 +325,9 @@
 int ec_GFp_simple_point_set_affine_coordinates(const EC_GROUP *, EC_RAW_POINT *,
                                                const BIGNUM *x,
                                                const BIGNUM *y);
-void ec_GFp_simple_add(const EC_GROUP *, EC_RAW_POINT *r, const EC_RAW_POINT *a,
-                       const EC_RAW_POINT *b);
-void ec_GFp_simple_dbl(const EC_GROUP *, EC_RAW_POINT *r,
-                       const EC_RAW_POINT *a);
+void ec_GFp_mont_add(const EC_GROUP *, EC_RAW_POINT *r, const EC_RAW_POINT *a,
+                     const EC_RAW_POINT *b);
+void ec_GFp_mont_dbl(const EC_GROUP *, EC_RAW_POINT *r, const EC_RAW_POINT *a);
 void ec_GFp_simple_invert(const EC_GROUP *, EC_RAW_POINT *);
 int ec_GFp_simple_is_at_infinity(const EC_GROUP *, const EC_RAW_POINT *);
 int ec_GFp_simple_is_on_curve(const EC_GROUP *, const EC_RAW_POINT *);
diff --git a/crypto/fipsmodule/ec/simple.c b/crypto/fipsmodule/ec/simple.c
index 5c63711..bf4aa4f 100644
--- a/crypto/fipsmodule/ec/simple.c
+++ b/crypto/fipsmodule/ec/simple.c
@@ -212,222 +212,6 @@
   return 1;
 }
 
-void ec_GFp_simple_add(const EC_GROUP *group, EC_RAW_POINT *out,
-                       const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
-  if (a == b) {
-    ec_GFp_simple_dbl(group, out, a);
-    return;
-  }
-
-
-  // The method is taken from:
-  //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl
-  //
-  // Coq transcription and correctness proof:
-  // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L467>
-  // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L544>
-  void (*const felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
-                          const EC_FELEM *b) = group->meth->felem_mul;
-  void (*const felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a) =
-      group->meth->felem_sqr;
-
-  EC_FELEM x_out, y_out, z_out;
-  BN_ULONG z1nz = ec_felem_non_zero_mask(group, &a->Z);
-  BN_ULONG z2nz = ec_felem_non_zero_mask(group, &b->Z);
-
-  // z1z1 = z1z1 = z1**2
-  EC_FELEM z1z1;
-  felem_sqr(group, &z1z1, &a->Z);
-
-  // z2z2 = z2**2
-  EC_FELEM z2z2;
-  felem_sqr(group, &z2z2, &b->Z);
-
-  // u1 = x1*z2z2
-  EC_FELEM u1;
-  felem_mul(group, &u1, &a->X, &z2z2);
-
-  // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
-  EC_FELEM two_z1z2;
-  ec_felem_add(group, &two_z1z2, &a->Z, &b->Z);
-  felem_sqr(group, &two_z1z2, &two_z1z2);
-  ec_felem_sub(group, &two_z1z2, &two_z1z2, &z1z1);
-  ec_felem_sub(group, &two_z1z2, &two_z1z2, &z2z2);
-
-  // s1 = y1 * z2**3
-  EC_FELEM s1;
-  felem_mul(group, &s1, &b->Z, &z2z2);
-  felem_mul(group, &s1, &s1, &a->Y);
-
-  // u2 = x2*z1z1
-  EC_FELEM u2;
-  felem_mul(group, &u2, &b->X, &z1z1);
-
-  // h = u2 - u1
-  EC_FELEM h;
-  ec_felem_sub(group, &h, &u2, &u1);
-
-  BN_ULONG xneq = ec_felem_non_zero_mask(group, &h);
-
-  // z_out = two_z1z2 * h
-  felem_mul(group, &z_out, &h, &two_z1z2);
-
-  // z1z1z1 = z1 * z1z1
-  EC_FELEM z1z1z1;
-  felem_mul(group, &z1z1z1, &a->Z, &z1z1);
-
-  // s2 = y2 * z1**3
-  EC_FELEM s2;
-  felem_mul(group, &s2, &b->Y, &z1z1z1);
-
-  // r = (s2 - s1)*2
-  EC_FELEM r;
-  ec_felem_sub(group, &r, &s2, &s1);
-  ec_felem_add(group, &r, &r, &r);
-
-  BN_ULONG yneq = ec_felem_non_zero_mask(group, &r);
-
-  // This case will never occur in the constant-time |ec_GFp_simple_mul|.
-  if (!xneq && !yneq && z1nz && z2nz) {
-    ec_GFp_simple_dbl(group, out, a);
-    return;
-  }
-
-  // I = (2h)**2
-  EC_FELEM i;
-  ec_felem_add(group, &i, &h, &h);
-  felem_sqr(group, &i, &i);
-
-  // J = h * I
-  EC_FELEM j;
-  felem_mul(group, &j, &h, &i);
-
-  // V = U1 * I
-  EC_FELEM v;
-  felem_mul(group, &v, &u1, &i);
-
-  // x_out = r**2 - J - 2V
-  felem_sqr(group, &x_out, &r);
-  ec_felem_sub(group, &x_out, &x_out, &j);
-  ec_felem_sub(group, &x_out, &x_out, &v);
-  ec_felem_sub(group, &x_out, &x_out, &v);
-
-  // y_out = r(V-x_out) - 2 * s1 * J
-  ec_felem_sub(group, &y_out, &v, &x_out);
-  felem_mul(group, &y_out, &y_out, &r);
-  EC_FELEM s1j;
-  felem_mul(group, &s1j, &s1, &j);
-  ec_felem_sub(group, &y_out, &y_out, &s1j);
-  ec_felem_sub(group, &y_out, &y_out, &s1j);
-
-  ec_felem_select(group, &x_out, z1nz, &x_out, &b->X);
-  ec_felem_select(group, &out->X, z2nz, &x_out, &a->X);
-  ec_felem_select(group, &y_out, z1nz, &y_out, &b->Y);
-  ec_felem_select(group, &out->Y, z2nz, &y_out, &a->Y);
-  ec_felem_select(group, &z_out, z1nz, &z_out, &b->Z);
-  ec_felem_select(group, &out->Z, z2nz, &z_out, &a->Z);
-}
-
-void ec_GFp_simple_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
-                       const EC_RAW_POINT *a) {
-  void (*const felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
-                          const EC_FELEM *b) = group->meth->felem_mul;
-  void (*const felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a) =
-      group->meth->felem_sqr;
-
-  if (group->a_is_minus3) {
-    // The method is taken from:
-    //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
-    //
-    // Coq transcription and correctness proof:
-    // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
-    // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
-    EC_FELEM delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
-    // delta = z^2
-    felem_sqr(group, &delta, &a->Z);
-    // gamma = y^2
-    felem_sqr(group, &gamma, &a->Y);
-    // beta = x*gamma
-    felem_mul(group, &beta, &a->X, &gamma);
-
-    // alpha = 3*(x-delta)*(x+delta)
-    ec_felem_sub(group, &ftmp, &a->X, &delta);
-    ec_felem_add(group, &ftmp2, &a->X, &delta);
-
-    ec_felem_add(group, &tmptmp, &ftmp2, &ftmp2);
-    ec_felem_add(group, &ftmp2, &ftmp2, &tmptmp);
-    felem_mul(group, &alpha, &ftmp, &ftmp2);
-
-    // x' = alpha^2 - 8*beta
-    felem_sqr(group, &r->X, &alpha);
-    ec_felem_add(group, &fourbeta, &beta, &beta);
-    ec_felem_add(group, &fourbeta, &fourbeta, &fourbeta);
-    ec_felem_add(group, &tmptmp, &fourbeta, &fourbeta);
-    ec_felem_sub(group, &r->X, &r->X, &tmptmp);
-
-    // z' = (y + z)^2 - gamma - delta
-    ec_felem_add(group, &delta, &gamma, &delta);
-    ec_felem_add(group, &ftmp, &a->Y, &a->Z);
-    felem_sqr(group, &r->Z, &ftmp);
-    ec_felem_sub(group, &r->Z, &r->Z, &delta);
-
-    // y' = alpha*(4*beta - x') - 8*gamma^2
-    ec_felem_sub(group, &r->Y, &fourbeta, &r->X);
-    ec_felem_add(group, &gamma, &gamma, &gamma);
-    felem_sqr(group, &gamma, &gamma);
-    felem_mul(group, &r->Y, &alpha, &r->Y);
-    ec_felem_add(group, &gamma, &gamma, &gamma);
-    ec_felem_sub(group, &r->Y, &r->Y, &gamma);
-  } else {
-    // The method is taken from:
-    //   http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl
-    //
-    // Coq transcription and correctness proof:
-    // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L102>
-    // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L534>
-    EC_FELEM xx, yy, yyyy, zz;
-    felem_sqr(group, &xx, &a->X);
-    felem_sqr(group, &yy, &a->Y);
-    felem_sqr(group, &yyyy, &yy);
-    felem_sqr(group, &zz, &a->Z);
-
-    // s = 2*((x_in + yy)^2 - xx - yyyy)
-    EC_FELEM s;
-    ec_felem_add(group, &s, &a->X, &yy);
-    felem_sqr(group, &s, &s);
-    ec_felem_sub(group, &s, &s, &xx);
-    ec_felem_sub(group, &s, &s, &yyyy);
-    ec_felem_add(group, &s, &s, &s);
-
-    // m = 3*xx + a*zz^2
-    EC_FELEM m;
-    felem_sqr(group, &m, &zz);
-    felem_mul(group, &m, &group->a, &m);
-    ec_felem_add(group, &m, &m, &xx);
-    ec_felem_add(group, &m, &m, &xx);
-    ec_felem_add(group, &m, &m, &xx);
-
-    // x_out = m^2 - 2*s
-    felem_sqr(group, &r->X, &m);
-    ec_felem_sub(group, &r->X, &r->X, &s);
-    ec_felem_sub(group, &r->X, &r->X, &s);
-
-    // z_out = (y_in + z_in)^2 - yy - zz
-    ec_felem_add(group, &r->Z, &a->Y, &a->Z);
-    felem_sqr(group, &r->Z, &r->Z);
-    ec_felem_sub(group, &r->Z, &r->Z, &yy);
-    ec_felem_sub(group, &r->Z, &r->Z, &zz);
-
-    // y_out = m*(s-x_out) - 8*yyyy
-    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
-    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
-    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
-    ec_felem_sub(group, &r->Y, &s, &r->X);
-    felem_mul(group, &r->Y, &r->Y, &m);
-    ec_felem_sub(group, &r->Y, &r->Y, &yyyy);
-  }
-}
-
 void ec_GFp_simple_invert(const EC_GROUP *group, EC_RAW_POINT *point) {
   ec_felem_neg(group, &point->Y, &point->Y);
 }
diff --git a/crypto/fipsmodule/ec/simple_mul.c b/crypto/fipsmodule/ec/simple_mul.c
index 93ed0a8..e05f491 100644
--- a/crypto/fipsmodule/ec/simple_mul.c
+++ b/crypto/fipsmodule/ec/simple_mul.c
@@ -21,12 +21,12 @@
 #include "../../internal.h"
 
 
-static void ec_GFp_simple_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
-                                     const EC_RAW_POINT *p,
-                                     const EC_SCALAR *scalar) {
+static void ec_GFp_mont_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
+                                   const EC_RAW_POINT *p,
+                                   const EC_SCALAR *scalar) {
   // This is a generic implementation for uncommon curves that not do not
   // warrant a tuned one. It uses unsigned digits so that the doubling case in
-  // |ec_GFp_simple_add| is always unreachable, erring on safety and simplicity.
+  // |ec_GFp_mont_add| is always unreachable, erring on safety and simplicity.
 
   // Compute a table of the first 32 multiples of |p| (including infinity).
   EC_RAW_POINT precomp[32];
@@ -34,9 +34,9 @@
   ec_GFp_simple_point_copy(&precomp[1], p);
   for (size_t j = 2; j < OPENSSL_ARRAY_SIZE(precomp); j++) {
     if (j & 1) {
-      ec_GFp_simple_add(group, &precomp[j], &precomp[1], &precomp[j - 1]);
+      ec_GFp_mont_add(group, &precomp[j], &precomp[1], &precomp[j - 1]);
     } else {
-      ec_GFp_simple_dbl(group, &precomp[j], &precomp[j / 2]);
+      ec_GFp_mont_dbl(group, &precomp[j], &precomp[j / 2]);
     }
   }
 
@@ -45,7 +45,7 @@
   int r_is_at_infinity = 1;
   for (unsigned i = bits - 1; i < bits; i--) {
     if (!r_is_at_infinity) {
-      ec_GFp_simple_dbl(group, r, r);
+      ec_GFp_mont_dbl(group, r, r);
     }
     if (i % 5 == 0) {
       // Compute the next window value.
@@ -70,7 +70,7 @@
         ec_GFp_simple_point_copy(r, &tmp);
         r_is_at_infinity = 0;
       } else {
-        ec_GFp_simple_add(group, r, r, &tmp);
+        ec_GFp_mont_add(group, r, r, &tmp);
       }
     }
   }
@@ -79,21 +79,21 @@
   }
 }
 
-void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r,
-                       const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
-                       const EC_SCALAR *p_scalar) {
+void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+                     const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                     const EC_SCALAR *p_scalar) {
   assert(g_scalar != NULL || p_scalar != NULL);
   if (p_scalar == NULL) {
-    ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar);
+    ec_GFp_mont_mul_single(group, r, &group->generator->raw, g_scalar);
   } else if (g_scalar == NULL) {
-    ec_GFp_simple_mul_single(group, r, p, p_scalar);
+    ec_GFp_mont_mul_single(group, r, p, p_scalar);
   } else {
     // Support constant-time two-point multiplication for compatibility.  This
     // does not actually come up in keygen, ECDH, or ECDSA, so we implement it
     // the naive way.
-    ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar);
+    ec_GFp_mont_mul_single(group, r, &group->generator->raw, g_scalar);
     EC_RAW_POINT tmp;
-    ec_GFp_simple_mul_single(group, &tmp, p, p_scalar);
-    ec_GFp_simple_add(group, r, r, &tmp);
+    ec_GFp_mont_mul_single(group, &tmp, p, p_scalar);
+    ec_GFp_mont_add(group, r, r, &tmp);
   }
 }
diff --git a/crypto/fipsmodule/ec/wnaf.c b/crypto/fipsmodule/ec/wnaf.c
index 145caa0..c0c2809 100644
--- a/crypto/fipsmodule/ec/wnaf.c
+++ b/crypto/fipsmodule/ec/wnaf.c
@@ -151,9 +151,9 @@
                             const EC_RAW_POINT *p, size_t len) {
   ec_GFp_simple_point_copy(&out[0], p);
   EC_RAW_POINT two_p;
-  ec_GFp_simple_dbl(group, &two_p, p);
+  ec_GFp_mont_dbl(group, &two_p, p);
   for (size_t i = 1; i < len; i++) {
-    ec_GFp_simple_add(group, &out[i], &out[i - 1], &two_p);
+    ec_GFp_mont_add(group, &out[i], &out[i - 1], &two_p);
   }
 }
 
@@ -168,15 +168,15 @@
   }
 }
 
-// EC_WNAF_WINDOW_BITS is the window size to use for |ec_GFp_simple_mul_public|.
+// EC_WNAF_WINDOW_BITS is the window size to use for |ec_GFp_mont_mul_public|.
 #define EC_WNAF_WINDOW_BITS 4
 
-// EC_WNAF_TABLE_SIZE is the table size to use for |ec_GFp_simple_mul_public|.
+// EC_WNAF_TABLE_SIZE is the table size to use for |ec_GFp_mont_mul_public|.
 #define EC_WNAF_TABLE_SIZE (1 << (EC_WNAF_WINDOW_BITS - 1))
 
-void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
-                              const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
-                              const EC_SCALAR *p_scalar) {
+void ec_GFp_mont_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
+                            const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                            const EC_SCALAR *p_scalar) {
   size_t bits = BN_num_bits(&group->order);
   size_t wNAF_len = bits + 1;
 
@@ -197,7 +197,7 @@
   int r_is_at_infinity = 1;
   for (size_t k = wNAF_len - 1; k < wNAF_len; k--) {
     if (!r_is_at_infinity) {
-      ec_GFp_simple_dbl(group, r, r);
+      ec_GFp_mont_dbl(group, r, r);
     }
 
     if (g_wNAF[k] != 0) {
@@ -206,7 +206,7 @@
         ec_GFp_simple_point_copy(r, &tmp);
         r_is_at_infinity = 0;
       } else {
-        ec_GFp_simple_add(group, r, r, &tmp);
+        ec_GFp_mont_add(group, r, r, &tmp);
       }
     }
 
@@ -216,7 +216,7 @@
         ec_GFp_simple_point_copy(r, &tmp);
         r_is_at_infinity = 0;
       } else {
-        ec_GFp_simple_add(group, r, r, &tmp);
+        ec_GFp_mont_add(group, r, r, &tmp);
       }
     }
   }