Route the tuned add/dbl implementations out of EC_METHOD.
Some consumer stumbled upon EC_POINT_{add,dbl} being faster with a
"custom" P-224 curve than the built-in one and made "custom" clones to
work around this. Before the EC_FELEM refactor, EC_GFp_nistp224_method
used BN_mod_mul for all reductions in fallback point arithmetic (we
primarily support the multiplication functions and keep the low-level
point arithmetic for legacy reasons) which took quite a performance hit.
EC_FELEM fixed this, but standalone felem_{mul,sqr} calls out of
nistp224 perform a lot of reductions, rather than batching them up as
that implementation is intended. So it is still slightly faster to use a
"custom" curve.
Custom curves are the last thing we want to encourage, so just route the
tuned implementations out of EC_METHOD to close this gap. Now the
built-in implementation is always solidly faster than (or identical to)
the custom clone. This also reduces the number of places where we mix
up tuned vs. generic implementation, which gets us closer to making
EC_POINT's representation EC_METHOD-specific.
Change-Id: I843e1101a6208eaabb56d29d342e886e523c78b4
Reviewed-on: https://boringssl-review.googlesource.com/c/32848
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/ec/ec.c b/crypto/fipsmodule/ec/ec.c
index e9b1e97..4c557a1 100644
--- a/crypto/fipsmodule/ec/ec.c
+++ b/crypto/fipsmodule/ec/ec.c
@@ -782,7 +782,7 @@
OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
return 0;
}
- ec_GFp_simple_add(group, &r->raw, &a->raw, &b->raw);
+ group->meth->add(group, &r->raw, &a->raw, &b->raw);
return 1;
}
@@ -793,7 +793,7 @@
OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
return 0;
}
- ec_GFp_simple_dbl(group, &r->raw, &a->raw);
+ group->meth->dbl(group, &r->raw, &a->raw);
return 1;
}
diff --git a/crypto/fipsmodule/ec/ec_montgomery.c b/crypto/fipsmodule/ec/ec_montgomery.c
index cd6bbfd..db2d657 100644
--- a/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/crypto/fipsmodule/ec/ec_montgomery.c
@@ -225,6 +225,8 @@
out->group_finish = ec_GFp_mont_group_finish;
out->group_set_curve = ec_GFp_mont_group_set_curve;
out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates;
+ out->add = ec_GFp_simple_add;
+ out->dbl = ec_GFp_simple_dbl;
out->mul = ec_GFp_simple_mul;
out->mul_public = ec_GFp_simple_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
index 02cd04a..4f58cb9 100644
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -127,6 +127,12 @@
int (*point_get_affine_coordinates)(const EC_GROUP *, const EC_RAW_POINT *,
BIGNUM *x, BIGNUM *y);
+ // add sets |r| to |a| + |b|.
+ void (*add)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *a,
+ const EC_RAW_POINT *b);
+ // dbl sets |r| to |a| + |a|.
+ void (*dbl)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *a);
+
// Computes |r = g_scalar*generator + p_scalar*p| if |g_scalar| and |p_scalar|
// are both non-null. Computes |r = g_scalar*generator| if |p_scalar| is null.
// Computes |r = p_scalar*p| if g_scalar is null. At least one of |g_scalar|
@@ -149,10 +155,9 @@
// TODO(davidben): This constrains |EC_FELEM|'s internal representation, adds
// many indirect calls in the middle of the generic code, and a bunch of
// conversions. If p224-64.c were easily convertable to Montgomery form, we
- // could say |EC_FELEM| is always in Montgomery form. If we exposed the
- // internal add and double implementations in each of the curves, we could
- // give |EC_POINT| an |EC_METHOD|-specific representation and |EC_FELEM| is
- // purely a |EC_GFp_mont_method| type.
+ // could say |EC_FELEM| is always in Montgomery form. If we routed the rest of
+ // simple.c to |EC_METHOD|, we could give |EC_POINT| an |EC_METHOD|-specific
+ // representation and say |EC_FELEM| is purely a |EC_GFp_mont_method| type.
void (*felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
const EC_FELEM *b);
void (*felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a);
diff --git a/crypto/fipsmodule/ec/p224-64.c b/crypto/fipsmodule/ec/p224-64.c
index 8c7a812..27bfc36 100644
--- a/crypto/fipsmodule/ec/p224-64.c
+++ b/crypto/fipsmodule/ec/p224-64.c
@@ -917,7 +917,7 @@
if (!skip) {
p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
- tmp[0], tmp[1], tmp[2]);
+ tmp[0], tmp[1], tmp[2]);
} else {
OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem));
skip = 0;
@@ -951,7 +951,7 @@
if (!skip) {
p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */,
- tmp[0], tmp[1], tmp[2]);
+ tmp[0], tmp[1], tmp[2]);
} else {
OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem));
skip = 0;
@@ -1013,6 +1013,33 @@
return 1;
}
+static void ec_GFp_nistp224_add(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
+ p224_felem x1, y1, z1, x2, y2, z2;
+ p224_generic_to_felem(x1, &a->X);
+ p224_generic_to_felem(y1, &a->Y);
+ p224_generic_to_felem(z1, &a->Z);
+ p224_generic_to_felem(x2, &b->X);
+ p224_generic_to_felem(y2, &b->Y);
+ p224_generic_to_felem(z2, &b->Z);
+ p224_point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2, z2);
+ p224_felem_to_generic(&r->X, x1);
+ p224_felem_to_generic(&r->Y, y1);
+ p224_felem_to_generic(&r->Z, z1);
+}
+
+static void ec_GFp_nistp224_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *a) {
+ p224_felem x, y, z;
+ p224_generic_to_felem(x, &a->X);
+ p224_generic_to_felem(y, &a->Y);
+ p224_generic_to_felem(z, &a->Z);
+ p224_point_double(x, y, z, x, y, z);
+ p224_felem_to_generic(&r->X, x);
+ p224_felem_to_generic(&r->Y, y);
+ p224_felem_to_generic(&r->Z, z);
+}
+
static void ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_SCALAR *g_scalar,
const EC_RAW_POINT *p,
@@ -1036,13 +1063,13 @@
for (size_t j = 2; j <= 16; ++j) {
if (j & 1) {
p224_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
- p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2],
- 0, p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
- p_pre_comp[j - 1][2]);
+ p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], 0,
+ p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
+ p_pre_comp[j - 1][2]);
} else {
- p224_point_double(p_pre_comp[j][0], p_pre_comp[j][1],
- p_pre_comp[j][2], p_pre_comp[j / 2][0],
- p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]);
+ p224_point_double(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
+ p_pre_comp[j / 2][0], p_pre_comp[j / 2][1],
+ p_pre_comp[j / 2][2]);
}
}
}
@@ -1100,6 +1127,8 @@
out->group_set_curve = ec_GFp_simple_group_set_curve;
out->point_get_affine_coordinates =
ec_GFp_nistp224_point_get_affine_coordinates;
+ out->add = ec_GFp_nistp224_add;
+ out->dbl = ec_GFp_nistp224_dbl;
out->mul = ec_GFp_nistp224_points_mul;
out->mul_public = ec_GFp_nistp224_points_mul;
out->felem_mul = ec_GFp_nistp224_felem_mul;
diff --git a/crypto/fipsmodule/ec/p256-x86_64.c b/crypto/fipsmodule/ec/p256-x86_64.c
index e630cb7..0ff5455 100644
--- a/crypto/fipsmodule/ec/p256-x86_64.c
+++ b/crypto/fipsmodule/ec/p256-x86_64.c
@@ -485,6 +485,33 @@
return 1;
}
+static void ecp_nistz256_add(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *a_, const EC_RAW_POINT *b_) {
+ P256_POINT a, b;
+ OPENSSL_memcpy(a.X, a_->X.words, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(a.Y, a_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(a.Z, a_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(b.X, b_->X.words, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(b.Y, b_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(b.Z, b_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
+ ecp_nistz256_point_add(&a, &a, &b);
+ OPENSSL_memcpy(r->X.words, a.X, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Y.words, a.Y, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Z.words, a.Z, P256_LIMBS * sizeof(BN_ULONG));
+}
+
+static void ecp_nistz256_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *a_) {
+ P256_POINT a;
+ OPENSSL_memcpy(a.X, a_->X.words, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(a.Y, a_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(a.Z, a_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
+ ecp_nistz256_point_double(&a, &a);
+ OPENSSL_memcpy(r->X.words, a.X, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Y.words, a.Y, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Z.words, a.Z, P256_LIMBS * sizeof(BN_ULONG));
+}
+
static void ecp_nistz256_inv_mod_ord(const EC_GROUP *group, EC_SCALAR *out,
const EC_SCALAR *in) {
// table[i] stores a power of |in| corresponding to the matching enum value.
@@ -633,6 +660,8 @@
out->group_finish = ec_GFp_mont_group_finish;
out->group_set_curve = ec_GFp_mont_group_set_curve;
out->point_get_affine_coordinates = ecp_nistz256_get_affine;
+ out->add = ecp_nistz256_add;
+ out->dbl = ecp_nistz256_dbl;
out->mul = ecp_nistz256_points_mul;
out->mul_public = ecp_nistz256_points_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;
diff --git a/third_party/fiat/p256.c b/third_party/fiat/p256.c
index e962d56..b6f3f49 100644
--- a/third_party/fiat/p256.c
+++ b/third_party/fiat/p256.c
@@ -1672,6 +1672,33 @@
return 1;
}
+static void ec_GFp_nistp256_add(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
+ fe x1, y1, z1, x2, y2, z2;
+ fe_from_generic(x1, &a->X);
+ fe_from_generic(y1, &a->Y);
+ fe_from_generic(z1, &a->Z);
+ fe_from_generic(x2, &b->X);
+ fe_from_generic(y2, &b->Y);
+ fe_from_generic(z2, &b->Z);
+ point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2, z2);
+ fe_to_generic(&r->X, x1);
+ fe_to_generic(&r->Y, y1);
+ fe_to_generic(&r->Z, z1);
+}
+
+static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *a) {
+ fe x, y, z;
+ fe_from_generic(x, &a->X);
+ fe_from_generic(y, &a->Y);
+ fe_from_generic(z, &a->Z);
+ point_double(x, y, z, x, y, z);
+ fe_to_generic(&r->X, x);
+ fe_to_generic(&r->Y, y);
+ fe_to_generic(&r->Z, z);
+}
+
static void ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_SCALAR *g_scalar,
const EC_RAW_POINT *p,
@@ -1800,6 +1827,8 @@
out->group_set_curve = ec_GFp_mont_group_set_curve;
out->point_get_affine_coordinates =
ec_GFp_nistp256_point_get_affine_coordinates;
+ out->add = ec_GFp_nistp256_add;
+ out->dbl = ec_GFp_nistp256_dbl;
out->mul = ec_GFp_nistp256_points_mul;
out->mul_public = ec_GFp_nistp256_point_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;