Split EC_METHOD.mul into two operations.
See I9c20b660ce4b58dc633588cfd5b2e97a40203ec3. Aside for p224-64.c, we'd
already split mul_public into a dedicated function, at which point it's
simpler to just have three functions.
This makes it clearer when we do and don't care about the doubling case
coming up and avoids a bunch of NULL checks.
Change-Id: I7c5dafa7f12f4f53937d912ba22c90cb5a786f04
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/36225
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/ec/ec.c b/crypto/fipsmodule/ec/ec.c
index 705d45f..158d66c 100644
--- a/crypto/fipsmodule/ec/ec.c
+++ b/crypto/fipsmodule/ec/ec.c
@@ -944,8 +944,7 @@
int ec_point_mul_scalar_public(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
const EC_SCALAR *p_scalar) {
- if ((g_scalar == NULL && p_scalar == NULL) ||
- (p == NULL) != (p_scalar == NULL)) {
+ if (g_scalar == NULL || p_scalar == NULL || p == NULL) {
OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER);
return 0;
}
@@ -961,7 +960,7 @@
return 0;
}
- group->meth->mul(group, r, NULL, p, scalar);
+ group->meth->mul(group, r, p, scalar);
return 1;
}
@@ -972,7 +971,7 @@
return 0;
}
- group->meth->mul(group, r, scalar, NULL, NULL);
+ group->meth->mul_base(group, r, scalar);
return 1;
}
diff --git a/crypto/fipsmodule/ec/ec_montgomery.c b/crypto/fipsmodule/ec/ec_montgomery.c
index caa1966..6fb32c4 100644
--- a/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/crypto/fipsmodule/ec/ec_montgomery.c
@@ -470,6 +470,7 @@
out->add = ec_GFp_mont_add;
out->dbl = ec_GFp_mont_dbl;
out->mul = ec_GFp_mont_mul;
+ out->mul_base = ec_GFp_mont_mul_base;
out->mul_public = ec_GFp_mont_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;
out->felem_sqr = ec_GFp_mont_felem_sqr;
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
index a29468f..7934c3a 100644
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -140,16 +140,15 @@
// dbl sets |r| to |a| + |a|.
void (*dbl)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *a);
- // Computes |r = g_scalar*generator + p_scalar*p| if |g_scalar| and |p_scalar|
- // are both non-null. Computes |r = g_scalar*generator| if |p_scalar| is null.
- // Computes |r = p_scalar*p| if g_scalar is null. At least one of |g_scalar|
- // and |p_scalar| must be non-null, and |p| must be non-null if |p_scalar| is
- // non-null.
- void (*mul)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_SCALAR *g_scalar,
- const EC_RAW_POINT *p, const EC_SCALAR *p_scalar);
- // mul_public performs the same computation as mul. It further assumes that
- // the inputs are public so there is no concern about leaking their values
- // through timing.
+ // mul sets |r| to |scalar|*|p|.
+ void (*mul)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *p,
+ const EC_SCALAR *scalar);
+ // mul_base sets |r| to |scalar|*generator.
+ void (*mul_base)(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_SCALAR *scalar);
+ // mul_public sets |r| to |g_scalar|*generator + |p_scalar|*|p|. It assumes
+ // that the inputs are public so there is no concern about leaking their
+ // values through timing.
void (*mul_public)(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
const EC_SCALAR *p_scalar);
@@ -372,8 +371,9 @@
int ec_field_element_to_scalar(const EC_GROUP *group, BIGNUM *r);
void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r,
- const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
- const EC_SCALAR *p_scalar);
+ const EC_RAW_POINT *p, const EC_SCALAR *scalar);
+void ec_GFp_mont_mul_base(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_SCALAR *scalar);
// ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of
// |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of
diff --git a/crypto/fipsmodule/ec/p224-64.c b/crypto/fipsmodule/ec/p224-64.c
index 2749686..7ae7c59 100644
--- a/crypto/fipsmodule/ec/p224-64.c
+++ b/crypto/fipsmodule/ec/p224-64.c
@@ -1031,6 +1031,11 @@
const EC_SCALAR *g_scalar,
const EC_RAW_POINT *p,
const EC_SCALAR *p_scalar) {
+ // Note this function must act in constant-time when exactly one of |g_scalar|
+ // and |p_scalar| is non-NULL. If both are non-NULL, this is not necessary and
+ // we'll hit the variable-time doubling case in |p224_point_add|.
+ //
+ // TODO(davidben): Split this like the other curves to ease analysis.
p224_felem p_pre_comp[17][3];
p224_felem x_out, y_out, z_out;
@@ -1072,6 +1077,19 @@
p224_felem_to_generic(&r->Z, z_out);
}
+static void ec_GFp_nistp224_point_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *p,
+ const EC_SCALAR *scalar) {
+ ec_GFp_nistp224_points_mul(group, r, NULL /* g_scalar */, p, scalar);
+}
+
+static void ec_GFp_nistp224_point_mul_base(const EC_GROUP *group,
+ EC_RAW_POINT *r,
+ const EC_SCALAR *scalar) {
+ ec_GFp_nistp224_points_mul(group, r, scalar, NULL /* p */,
+ NULL /* p_scalar */);
+}
+
static void ec_GFp_nistp224_felem_mul(const EC_GROUP *group, EC_FELEM *r,
const EC_FELEM *a, const EC_FELEM *b) {
p224_felem felem1, felem2;
@@ -1111,7 +1129,8 @@
ec_GFp_nistp224_point_get_affine_coordinates;
out->add = ec_GFp_nistp224_add;
out->dbl = ec_GFp_nistp224_dbl;
- out->mul = ec_GFp_nistp224_points_mul;
+ out->mul = ec_GFp_nistp224_point_mul;
+ out->mul_base = ec_GFp_nistp224_point_mul_base;
out->mul_public = ec_GFp_nistp224_points_mul;
out->felem_mul = ec_GFp_nistp224_felem_mul;
out->felem_sqr = ec_GFp_nistp224_felem_sqr;
diff --git a/crypto/fipsmodule/ec/p256-x86_64.c b/crypto/fipsmodule/ec/p256-x86_64.c
index dd8108d..d63e998 100644
--- a/crypto/fipsmodule/ec/p256-x86_64.c
+++ b/crypto/fipsmodule/ec/p256-x86_64.c
@@ -316,74 +316,57 @@
return booth_recode_w7(wvalue);
}
-static void mul_p_add_and_store(const EC_GROUP *group, EC_RAW_POINT *r,
- const EC_SCALAR *g_scalar,
- const EC_RAW_POINT *p_,
- const EC_SCALAR *p_scalar,
- p256_point_union_t *t, p256_point_union_t *p) {
- const int p_is_infinity = g_scalar == NULL;
- if (p_scalar != NULL) {
- P256_POINT *out = &t->p;
- if (p_is_infinity) {
- out = &p->p;
- }
+static void ecp_nistz256_point_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *p,
+ const EC_SCALAR *scalar) {
+ alignas(32) P256_POINT out;
+ ecp_nistz256_windowed_mul(group, &out, p, scalar);
- ecp_nistz256_windowed_mul(group, out, p_, p_scalar);
- if (!p_is_infinity) {
- ecp_nistz256_point_add(&p->p, &p->p, out);
- }
+ assert(group->field.width == P256_LIMBS);
+ OPENSSL_memcpy(r->X.words, out.X, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Y.words, out.Y, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Z.words, out.Z, P256_LIMBS * sizeof(BN_ULONG));
+}
+
+static void ecp_nistz256_point_mul_base(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_SCALAR *scalar) {
+ alignas(32) p256_point_union_t t, p;
+
+ uint8_t p_str[33];
+ OPENSSL_memcpy(p_str, scalar->bytes, 32);
+ p_str[32] = 0;
+
+ // First window
+ unsigned index = 0;
+ unsigned wvalue = calc_first_wvalue(&index, p_str);
+
+ ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], wvalue >> 1);
+ ecp_nistz256_neg(p.p.Z, p.p.Y);
+ copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
+
+ // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
+ // is infinity and |ONE| otherwise. |p| was computed from the table, so it
+ // is infinity iff |wvalue >> 1| is zero.
+ OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z));
+ copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1));
+
+ for (int i = 1; i < 37; i++) {
+ wvalue = calc_wvalue(&index, p_str);
+
+ ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], wvalue >> 1);
+
+ ecp_nistz256_neg(t.p.Z, t.a.Y);
+ copy_conditional(t.a.Y, t.p.Z, wvalue & 1);
+
+ // Note |ecp_nistz256_point_add_affine| does not work if |p.p| and |t.a|
+ // are the same non-infinity point.
+ ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
}
assert(group->field.width == P256_LIMBS);
- OPENSSL_memcpy(r->X.words, p->p.X, P256_LIMBS * sizeof(BN_ULONG));
- OPENSSL_memcpy(r->Y.words, p->p.Y, P256_LIMBS * sizeof(BN_ULONG));
- OPENSSL_memcpy(r->Z.words, p->p.Z, P256_LIMBS * sizeof(BN_ULONG));
-}
-
-static void ecp_nistz256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
- const EC_SCALAR *g_scalar,
- const EC_RAW_POINT *p_,
- const EC_SCALAR *p_scalar) {
- assert((p_ != NULL) == (p_scalar != NULL));
-
- alignas(32) p256_point_union_t t, p;
-
- if (g_scalar != NULL) {
- uint8_t p_str[33];
- OPENSSL_memcpy(p_str, g_scalar->bytes, 32);
- p_str[32] = 0;
-
- // First window
- unsigned index = 0;
- unsigned wvalue = calc_first_wvalue(&index, p_str);
-
- ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], wvalue >> 1);
-
- ecp_nistz256_neg(p.p.Z, p.p.Y);
- copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
-
- // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
- // is infinity and |ONE| otherwise. |p| was computed from the table, so it
- // is infinity iff |wvalue >> 1| is zero.
- OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z));
- copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1));
-
- for (int i = 1; i < 37; i++) {
- wvalue = calc_wvalue(&index, p_str);
-
- ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], wvalue >> 1);
-
- ecp_nistz256_neg(t.p.Z, t.a.Y);
- copy_conditional(t.a.Y, t.p.Z, wvalue & 1);
-
- // Note |ecp_nistz256_point_add_affine| does not work if |p.p| and |t.a|
- // are the same non-infinity point, so it is important that we compute the
- // |g_scalar| term before the |p_scalar| term.
- ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
- }
- }
-
- mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p);
+ OPENSSL_memcpy(r->X.words, p.p.X, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Y.words, p.p.Y, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Z.words, p.p.Z, P256_LIMBS * sizeof(BN_ULONG));
}
static void ecp_nistz256_points_mul_public(const EC_GROUP *group,
@@ -438,7 +421,13 @@
ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
}
- mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p);
+ ecp_nistz256_windowed_mul(group, &t.p, p_, p_scalar);
+ ecp_nistz256_point_add(&p.p, &p.p, &t.p);
+
+ assert(group->field.width == P256_LIMBS);
+ OPENSSL_memcpy(r->X.words, p.p.X, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Y.words, p.p.Y, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Z.words, p.p.Z, P256_LIMBS * sizeof(BN_ULONG));
}
static int ecp_nistz256_get_affine(const EC_GROUP *group,
@@ -645,7 +634,8 @@
out->point_get_affine_coordinates = ecp_nistz256_get_affine;
out->add = ecp_nistz256_add;
out->dbl = ecp_nistz256_dbl;
- out->mul = ecp_nistz256_points_mul;
+ out->mul = ecp_nistz256_point_mul;
+ out->mul_base = ecp_nistz256_point_mul_base;
out->mul_public = ecp_nistz256_points_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;
out->felem_sqr = ec_GFp_mont_felem_sqr;
diff --git a/crypto/fipsmodule/ec/simple_mul.c b/crypto/fipsmodule/ec/simple_mul.c
index e05f491..4ed6c48 100644
--- a/crypto/fipsmodule/ec/simple_mul.c
+++ b/crypto/fipsmodule/ec/simple_mul.c
@@ -21,9 +21,8 @@
#include "../../internal.h"
-static void ec_GFp_mont_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
- const EC_RAW_POINT *p,
- const EC_SCALAR *scalar) {
+void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *p, const EC_SCALAR *scalar) {
// This is a generic implementation for uncommon curves that not do not
// warrant a tuned one. It uses unsigned digits so that the doubling case in
// |ec_GFp_mont_add| is always unreachable, erring on safety and simplicity.
@@ -79,21 +78,7 @@
}
}
-void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r,
- const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
- const EC_SCALAR *p_scalar) {
- assert(g_scalar != NULL || p_scalar != NULL);
- if (p_scalar == NULL) {
- ec_GFp_mont_mul_single(group, r, &group->generator->raw, g_scalar);
- } else if (g_scalar == NULL) {
- ec_GFp_mont_mul_single(group, r, p, p_scalar);
- } else {
- // Support constant-time two-point multiplication for compatibility. This
- // does not actually come up in keygen, ECDH, or ECDSA, so we implement it
- // the naive way.
- ec_GFp_mont_mul_single(group, r, &group->generator->raw, g_scalar);
- EC_RAW_POINT tmp;
- ec_GFp_mont_mul_single(group, &tmp, p, p_scalar);
- ec_GFp_mont_add(group, r, r, &tmp);
- }
+void ec_GFp_mont_mul_base(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_SCALAR *scalar) {
+ ec_GFp_mont_mul(group, r, &group->generator->raw, scalar);
}
diff --git a/third_party/fiat/p256.c b/third_party/fiat/p256.c
index ebc5de6..8426beb 100644
--- a/third_party/fiat/p256.c
+++ b/third_party/fiat/p256.c
@@ -731,98 +731,6 @@
return (in[i >> 3] >> (i & 7)) & 1;
}
-// Interleaved point multiplication using precomputed point multiples: The
-// small point multiples 0*P, 1*P, ..., 17*P are in p_pre_comp, the scalar
-// in p_scalar, if non-NULL. If g_scalar is non-NULL, we also add this multiple
-// of the generator, using certain (large) precomputed multiples in g_pre_comp.
-// Output point (X, Y, Z) is stored in x_out, y_out, z_out.
-static void batch_mul(fe x_out, fe y_out, fe z_out,
- const uint8_t *p_scalar, const uint8_t *g_scalar,
- const fe p_pre_comp[17][3]) {
- // set nq to the point at infinity
- fe nq[3] = {{0},{0},{0}}, ftmp, tmp[3];
- uint64_t bits;
- uint8_t sign, digit;
-
- // Loop over both scalars msb-to-lsb, interleaving additions of multiples
- // of the generator (two in each of the last 32 rounds) and additions of p
- // (every 5th round).
-
- int skip = 1; // save two point operations in the first round
- size_t i = p_scalar != NULL ? 255 : 31;
- for (;;) {
- // double
- if (!skip) {
- point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
- }
-
- // add multiples of the generator
- if (g_scalar != NULL && i <= 31) {
- // first, look 32 bits upwards
- bits = get_bit(g_scalar, i + 224) << 3;
- bits |= get_bit(g_scalar, i + 160) << 2;
- bits |= get_bit(g_scalar, i + 96) << 1;
- bits |= get_bit(g_scalar, i + 32);
- // select the point to add, in constant time
- select_point(bits, 16, g_pre_comp[1], tmp);
-
- if (!skip) {
- point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
- tmp[0], tmp[1], tmp[2]);
- } else {
- fe_copy(nq[0], tmp[0]);
- fe_copy(nq[1], tmp[1]);
- fe_copy(nq[2], tmp[2]);
- skip = 0;
- }
-
- // second, look at the current position
- bits = get_bit(g_scalar, i + 192) << 3;
- bits |= get_bit(g_scalar, i + 128) << 2;
- bits |= get_bit(g_scalar, i + 64) << 1;
- bits |= get_bit(g_scalar, i);
- // select the point to add, in constant time
- select_point(bits, 16, g_pre_comp[0], tmp);
- point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0],
- tmp[1], tmp[2]);
- }
-
- // do other additions every 5 doublings
- if (p_scalar != NULL && i % 5 == 0) {
- bits = get_bit(p_scalar, i + 4) << 5;
- bits |= get_bit(p_scalar, i + 3) << 4;
- bits |= get_bit(p_scalar, i + 2) << 3;
- bits |= get_bit(p_scalar, i + 1) << 2;
- bits |= get_bit(p_scalar, i) << 1;
- bits |= get_bit(p_scalar, i - 1);
- ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
-
- // select the point to add or subtract, in constant time.
- select_point(digit, 17, p_pre_comp, tmp);
- fe_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point.
- fe_cmovznz(tmp[1], sign, tmp[1], ftmp);
-
- if (!skip) {
- point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */,
- tmp[0], tmp[1], tmp[2]);
- } else {
- fe_copy(nq[0], tmp[0]);
- fe_copy(nq[1], tmp[1]);
- fe_copy(nq[2], tmp[2]);
- skip = 0;
- }
- }
-
- if (i == 0) {
- break;
- }
- --i;
- }
- fe_copy(x_out, nq[0]);
- fe_copy(y_out, nq[1]);
- fe_copy(z_out, nq[2]);
-}
-
// OPENSSL EC_METHOD FUNCTIONS
// Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
@@ -890,45 +798,116 @@
fe_to_generic(&r->Z, z);
}
-static void ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
- const EC_SCALAR *g_scalar,
- const EC_RAW_POINT *p,
- const EC_SCALAR *p_scalar) {
+static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *p,
+ const EC_SCALAR *scalar) {
fe p_pre_comp[17][3];
- fe x_out, y_out, z_out;
+ OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
+ // Precompute multiples.
+ fe_from_generic(p_pre_comp[1][0], &p->X);
+ fe_from_generic(p_pre_comp[1][1], &p->Y);
+ fe_from_generic(p_pre_comp[1][2], &p->Z);
+ for (size_t j = 2; j <= 16; ++j) {
+ if (j & 1) {
+ point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
+ p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], 0,
+ p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
+ p_pre_comp[j - 1][2]);
+ } else {
+ point_double(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
+ p_pre_comp[j / 2][0], p_pre_comp[j / 2][1],
+ p_pre_comp[j / 2][2]);
+ }
+ }
- if (p != NULL && p_scalar != NULL) {
- // We treat NULL scalars as 0, and NULL points as points at infinity, i.e.,
- // they contribute nothing to the linear combination.
- OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
- // Precompute multiples.
- fe_from_generic(p_pre_comp[1][0], &p->X);
- fe_from_generic(p_pre_comp[1][1], &p->Y);
- fe_from_generic(p_pre_comp[1][2], &p->Z);
- for (size_t j = 2; j <= 16; ++j) {
- if (j & 1) {
- point_add(p_pre_comp[j][0], p_pre_comp[j][1],
- p_pre_comp[j][2], p_pre_comp[1][0],
- p_pre_comp[1][1], p_pre_comp[1][2],
- 0,
- p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
- p_pre_comp[j - 1][2]);
+ // Set nq to the point at infinity.
+ fe nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3];
+
+ // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round.
+ int skip = 1; // Save two point operations in the first round.
+ for (size_t i = 255; i < 256; i--) {
+ // double
+ if (!skip) {
+ point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
+ }
+
+ // do other additions every 5 doublings
+ if (i % 5 == 0) {
+ uint64_t bits = get_bit(scalar->bytes, i + 4) << 5;
+ bits |= get_bit(scalar->bytes, i + 3) << 4;
+ bits |= get_bit(scalar->bytes, i + 2) << 3;
+ bits |= get_bit(scalar->bytes, i + 1) << 2;
+ bits |= get_bit(scalar->bytes, i) << 1;
+ bits |= get_bit(scalar->bytes, i - 1);
+ uint8_t sign, digit;
+ ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
+
+ // select the point to add or subtract, in constant time.
+ select_point(digit, 17, (const fe(*)[3])p_pre_comp, tmp);
+ fe_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point.
+ fe_cmovznz(tmp[1], sign, tmp[1], ftmp);
+
+ if (!skip) {
+ point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */,
+ tmp[0], tmp[1], tmp[2]);
} else {
- point_double(p_pre_comp[j][0], p_pre_comp[j][1],
- p_pre_comp[j][2], p_pre_comp[j / 2][0],
- p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]);
+ fe_copy(nq[0], tmp[0]);
+ fe_copy(nq[1], tmp[1]);
+ fe_copy(nq[2], tmp[2]);
+ skip = 0;
}
}
}
- batch_mul(x_out, y_out, z_out,
- (p != NULL && p_scalar != NULL) ? p_scalar->bytes : NULL,
- g_scalar != NULL ? g_scalar->bytes : NULL,
- (const fe (*) [3])p_pre_comp);
+ fe_to_generic(&r->X, nq[0]);
+ fe_to_generic(&r->Y, nq[1]);
+ fe_to_generic(&r->Z, nq[2]);
+}
- fe_to_generic(&r->X, x_out);
- fe_to_generic(&r->Y, y_out);
- fe_to_generic(&r->Z, z_out);
+static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group,
+ EC_RAW_POINT *r,
+ const EC_SCALAR *scalar) {
+ // Set nq to the point at infinity.
+ fe nq[3] = {{0}, {0}, {0}}, tmp[3];
+
+ int skip = 1; // Save two point operations in the first round.
+ for (size_t i = 31; i < 32; i--) {
+ if (!skip) {
+ point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
+ }
+
+ // First, look 32 bits upwards.
+ uint64_t bits = get_bit(scalar->bytes, i + 224) << 3;
+ bits |= get_bit(scalar->bytes, i + 160) << 2;
+ bits |= get_bit(scalar->bytes, i + 96) << 1;
+ bits |= get_bit(scalar->bytes, i + 32);
+ // Select the point to add, in constant time.
+ select_point(bits, 16, g_pre_comp[1], tmp);
+
+ if (!skip) {
+ point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0],
+ tmp[1], tmp[2]);
+ } else {
+ fe_copy(nq[0], tmp[0]);
+ fe_copy(nq[1], tmp[1]);
+ fe_copy(nq[2], tmp[2]);
+ skip = 0;
+ }
+
+ // Second, look at the current position.
+ bits = get_bit(scalar->bytes, i + 192) << 3;
+ bits |= get_bit(scalar->bytes, i + 128) << 2;
+ bits |= get_bit(scalar->bytes, i + 64) << 1;
+ bits |= get_bit(scalar->bytes, i);
+ // Select the point to add, in constant time.
+ select_point(bits, 16, g_pre_comp[0], tmp);
+ point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0],
+ tmp[1], tmp[2]);
+ }
+
+ fe_to_generic(&r->X, nq[0]);
+ fe_to_generic(&r->Y, nq[1]);
+ fe_to_generic(&r->Z, nq[2]);
}
static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
@@ -1066,7 +1045,8 @@
ec_GFp_nistp256_point_get_affine_coordinates;
out->add = ec_GFp_nistp256_add;
out->dbl = ec_GFp_nistp256_dbl;
- out->mul = ec_GFp_nistp256_points_mul;
+ out->mul = ec_GFp_nistp256_point_mul;
+ out->mul_base = ec_GFp_nistp256_point_mul_base;
out->mul_public = ec_GFp_nistp256_point_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;
out->felem_sqr = ec_GFp_mont_felem_sqr;