Split EC_METHOD.mul into two operations. See I9c20b660ce4b58dc633588cfd5b2e97a40203ec3. Aside for p224-64.c, we'd already split mul_public into a dedicated function, at which point it's simpler to just have three functions. This makes it clearer when we do and don't care about the doubling case coming up and avoids a bunch of NULL checks. Change-Id: I7c5dafa7f12f4f53937d912ba22c90cb5a786f04 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/36225 Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/ec/ec.c b/crypto/fipsmodule/ec/ec.c index 705d45f..158d66c 100644 --- a/crypto/fipsmodule/ec/ec.c +++ b/crypto/fipsmodule/ec/ec.c
@@ -944,8 +944,7 @@ int ec_point_mul_scalar_public(const EC_GROUP *group, EC_RAW_POINT *r, const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, const EC_SCALAR *p_scalar) { - if ((g_scalar == NULL && p_scalar == NULL) || - (p == NULL) != (p_scalar == NULL)) { + if (g_scalar == NULL || p_scalar == NULL || p == NULL) { OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER); return 0; } @@ -961,7 +960,7 @@ return 0; } - group->meth->mul(group, r, NULL, p, scalar); + group->meth->mul(group, r, p, scalar); return 1; } @@ -972,7 +971,7 @@ return 0; } - group->meth->mul(group, r, scalar, NULL, NULL); + group->meth->mul_base(group, r, scalar); return 1; }
diff --git a/crypto/fipsmodule/ec/ec_montgomery.c b/crypto/fipsmodule/ec/ec_montgomery.c index caa1966..6fb32c4 100644 --- a/crypto/fipsmodule/ec/ec_montgomery.c +++ b/crypto/fipsmodule/ec/ec_montgomery.c
@@ -470,6 +470,7 @@ out->add = ec_GFp_mont_add; out->dbl = ec_GFp_mont_dbl; out->mul = ec_GFp_mont_mul; + out->mul_base = ec_GFp_mont_mul_base; out->mul_public = ec_GFp_mont_mul_public; out->felem_mul = ec_GFp_mont_felem_mul; out->felem_sqr = ec_GFp_mont_felem_sqr;
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h index a29468f..7934c3a 100644 --- a/crypto/fipsmodule/ec/internal.h +++ b/crypto/fipsmodule/ec/internal.h
@@ -140,16 +140,15 @@ // dbl sets |r| to |a| + |a|. void (*dbl)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *a); - // Computes |r = g_scalar*generator + p_scalar*p| if |g_scalar| and |p_scalar| - // are both non-null. Computes |r = g_scalar*generator| if |p_scalar| is null. - // Computes |r = p_scalar*p| if g_scalar is null. At least one of |g_scalar| - // and |p_scalar| must be non-null, and |p| must be non-null if |p_scalar| is - // non-null. - void (*mul)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_SCALAR *g_scalar, - const EC_RAW_POINT *p, const EC_SCALAR *p_scalar); - // mul_public performs the same computation as mul. It further assumes that - // the inputs are public so there is no concern about leaking their values - // through timing. + // mul sets |r| to |scalar|*|p|. + void (*mul)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *p, + const EC_SCALAR *scalar); + // mul_base sets |r| to |scalar|*generator. + void (*mul_base)(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_SCALAR *scalar); + // mul_public sets |r| to |g_scalar|*generator + |p_scalar|*|p|. It assumes + // that the inputs are public so there is no concern about leaking their + // values through timing. void (*mul_public)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, const EC_SCALAR *p_scalar); @@ -372,8 +371,9 @@ int ec_field_element_to_scalar(const EC_GROUP *group, BIGNUM *r); void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r, - const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, - const EC_SCALAR *p_scalar); + const EC_RAW_POINT *p, const EC_SCALAR *scalar); +void ec_GFp_mont_mul_base(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_SCALAR *scalar); // ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of // |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of
diff --git a/crypto/fipsmodule/ec/p224-64.c b/crypto/fipsmodule/ec/p224-64.c index 2749686..7ae7c59 100644 --- a/crypto/fipsmodule/ec/p224-64.c +++ b/crypto/fipsmodule/ec/p224-64.c
@@ -1031,6 +1031,11 @@ const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, const EC_SCALAR *p_scalar) { + // Note this function must act in constant-time when exactly one of |g_scalar| + // and |p_scalar| is non-NULL. If both are non-NULL, this is not necessary and + // we'll hit the variable-time doubling case in |p224_point_add|. + // + // TODO(davidben): Split this like the other curves to ease analysis. p224_felem p_pre_comp[17][3]; p224_felem x_out, y_out, z_out; @@ -1072,6 +1077,19 @@ p224_felem_to_generic(&r->Z, z_out); } +static void ec_GFp_nistp224_point_mul(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_RAW_POINT *p, + const EC_SCALAR *scalar) { + ec_GFp_nistp224_points_mul(group, r, NULL /* g_scalar */, p, scalar); +} + +static void ec_GFp_nistp224_point_mul_base(const EC_GROUP *group, + EC_RAW_POINT *r, + const EC_SCALAR *scalar) { + ec_GFp_nistp224_points_mul(group, r, scalar, NULL /* p */, + NULL /* p_scalar */); +} + static void ec_GFp_nistp224_felem_mul(const EC_GROUP *group, EC_FELEM *r, const EC_FELEM *a, const EC_FELEM *b) { p224_felem felem1, felem2; @@ -1111,7 +1129,8 @@ ec_GFp_nistp224_point_get_affine_coordinates; out->add = ec_GFp_nistp224_add; out->dbl = ec_GFp_nistp224_dbl; - out->mul = ec_GFp_nistp224_points_mul; + out->mul = ec_GFp_nistp224_point_mul; + out->mul_base = ec_GFp_nistp224_point_mul_base; out->mul_public = ec_GFp_nistp224_points_mul; out->felem_mul = ec_GFp_nistp224_felem_mul; out->felem_sqr = ec_GFp_nistp224_felem_sqr;
diff --git a/crypto/fipsmodule/ec/p256-x86_64.c b/crypto/fipsmodule/ec/p256-x86_64.c index dd8108d..d63e998 100644 --- a/crypto/fipsmodule/ec/p256-x86_64.c +++ b/crypto/fipsmodule/ec/p256-x86_64.c
@@ -316,74 +316,57 @@ return booth_recode_w7(wvalue); } -static void mul_p_add_and_store(const EC_GROUP *group, EC_RAW_POINT *r, - const EC_SCALAR *g_scalar, - const EC_RAW_POINT *p_, - const EC_SCALAR *p_scalar, - p256_point_union_t *t, p256_point_union_t *p) { - const int p_is_infinity = g_scalar == NULL; - if (p_scalar != NULL) { - P256_POINT *out = &t->p; - if (p_is_infinity) { - out = &p->p; - } +static void ecp_nistz256_point_mul(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_RAW_POINT *p, + const EC_SCALAR *scalar) { + alignas(32) P256_POINT out; + ecp_nistz256_windowed_mul(group, &out, p, scalar); - ecp_nistz256_windowed_mul(group, out, p_, p_scalar); - if (!p_is_infinity) { - ecp_nistz256_point_add(&p->p, &p->p, out); - } + assert(group->field.width == P256_LIMBS); + OPENSSL_memcpy(r->X.words, out.X, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Y.words, out.Y, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Z.words, out.Z, P256_LIMBS * sizeof(BN_ULONG)); +} + +static void ecp_nistz256_point_mul_base(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_SCALAR *scalar) { + alignas(32) p256_point_union_t t, p; + + uint8_t p_str[33]; + OPENSSL_memcpy(p_str, scalar->bytes, 32); + p_str[32] = 0; + + // First window + unsigned index = 0; + unsigned wvalue = calc_first_wvalue(&index, p_str); + + ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], wvalue >> 1); + ecp_nistz256_neg(p.p.Z, p.p.Y); + copy_conditional(p.p.Y, p.p.Z, wvalue & 1); + + // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p| + // is infinity and |ONE| otherwise. |p| was computed from the table, so it + // is infinity iff |wvalue >> 1| is zero. + OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z)); + copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1)); + + for (int i = 1; i < 37; i++) { + wvalue = calc_wvalue(&index, p_str); + + ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], wvalue >> 1); + + ecp_nistz256_neg(t.p.Z, t.a.Y); + copy_conditional(t.a.Y, t.p.Z, wvalue & 1); + + // Note |ecp_nistz256_point_add_affine| does not work if |p.p| and |t.a| + // are the same non-infinity point. + ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a); } assert(group->field.width == P256_LIMBS); - OPENSSL_memcpy(r->X.words, p->p.X, P256_LIMBS * sizeof(BN_ULONG)); - OPENSSL_memcpy(r->Y.words, p->p.Y, P256_LIMBS * sizeof(BN_ULONG)); - OPENSSL_memcpy(r->Z.words, p->p.Z, P256_LIMBS * sizeof(BN_ULONG)); -} - -static void ecp_nistz256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r, - const EC_SCALAR *g_scalar, - const EC_RAW_POINT *p_, - const EC_SCALAR *p_scalar) { - assert((p_ != NULL) == (p_scalar != NULL)); - - alignas(32) p256_point_union_t t, p; - - if (g_scalar != NULL) { - uint8_t p_str[33]; - OPENSSL_memcpy(p_str, g_scalar->bytes, 32); - p_str[32] = 0; - - // First window - unsigned index = 0; - unsigned wvalue = calc_first_wvalue(&index, p_str); - - ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], wvalue >> 1); - - ecp_nistz256_neg(p.p.Z, p.p.Y); - copy_conditional(p.p.Y, p.p.Z, wvalue & 1); - - // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p| - // is infinity and |ONE| otherwise. |p| was computed from the table, so it - // is infinity iff |wvalue >> 1| is zero. - OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z)); - copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1)); - - for (int i = 1; i < 37; i++) { - wvalue = calc_wvalue(&index, p_str); - - ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], wvalue >> 1); - - ecp_nistz256_neg(t.p.Z, t.a.Y); - copy_conditional(t.a.Y, t.p.Z, wvalue & 1); - - // Note |ecp_nistz256_point_add_affine| does not work if |p.p| and |t.a| - // are the same non-infinity point, so it is important that we compute the - // |g_scalar| term before the |p_scalar| term. - ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a); - } - } - - mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p); + OPENSSL_memcpy(r->X.words, p.p.X, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Y.words, p.p.Y, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Z.words, p.p.Z, P256_LIMBS * sizeof(BN_ULONG)); } static void ecp_nistz256_points_mul_public(const EC_GROUP *group, @@ -438,7 +421,13 @@ ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a); } - mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p); + ecp_nistz256_windowed_mul(group, &t.p, p_, p_scalar); + ecp_nistz256_point_add(&p.p, &p.p, &t.p); + + assert(group->field.width == P256_LIMBS); + OPENSSL_memcpy(r->X.words, p.p.X, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Y.words, p.p.Y, P256_LIMBS * sizeof(BN_ULONG)); + OPENSSL_memcpy(r->Z.words, p.p.Z, P256_LIMBS * sizeof(BN_ULONG)); } static int ecp_nistz256_get_affine(const EC_GROUP *group, @@ -645,7 +634,8 @@ out->point_get_affine_coordinates = ecp_nistz256_get_affine; out->add = ecp_nistz256_add; out->dbl = ecp_nistz256_dbl; - out->mul = ecp_nistz256_points_mul; + out->mul = ecp_nistz256_point_mul; + out->mul_base = ecp_nistz256_point_mul_base; out->mul_public = ecp_nistz256_points_mul_public; out->felem_mul = ec_GFp_mont_felem_mul; out->felem_sqr = ec_GFp_mont_felem_sqr;
diff --git a/crypto/fipsmodule/ec/simple_mul.c b/crypto/fipsmodule/ec/simple_mul.c index e05f491..4ed6c48 100644 --- a/crypto/fipsmodule/ec/simple_mul.c +++ b/crypto/fipsmodule/ec/simple_mul.c
@@ -21,9 +21,8 @@ #include "../../internal.h" -static void ec_GFp_mont_mul_single(const EC_GROUP *group, EC_RAW_POINT *r, - const EC_RAW_POINT *p, - const EC_SCALAR *scalar) { +void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_RAW_POINT *p, const EC_SCALAR *scalar) { // This is a generic implementation for uncommon curves that not do not // warrant a tuned one. It uses unsigned digits so that the doubling case in // |ec_GFp_mont_add| is always unreachable, erring on safety and simplicity. @@ -79,21 +78,7 @@ } } -void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r, - const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, - const EC_SCALAR *p_scalar) { - assert(g_scalar != NULL || p_scalar != NULL); - if (p_scalar == NULL) { - ec_GFp_mont_mul_single(group, r, &group->generator->raw, g_scalar); - } else if (g_scalar == NULL) { - ec_GFp_mont_mul_single(group, r, p, p_scalar); - } else { - // Support constant-time two-point multiplication for compatibility. This - // does not actually come up in keygen, ECDH, or ECDSA, so we implement it - // the naive way. - ec_GFp_mont_mul_single(group, r, &group->generator->raw, g_scalar); - EC_RAW_POINT tmp; - ec_GFp_mont_mul_single(group, &tmp, p, p_scalar); - ec_GFp_mont_add(group, r, r, &tmp); - } +void ec_GFp_mont_mul_base(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_SCALAR *scalar) { + ec_GFp_mont_mul(group, r, &group->generator->raw, scalar); }
diff --git a/third_party/fiat/p256.c b/third_party/fiat/p256.c index ebc5de6..8426beb 100644 --- a/third_party/fiat/p256.c +++ b/third_party/fiat/p256.c
@@ -731,98 +731,6 @@ return (in[i >> 3] >> (i & 7)) & 1; } -// Interleaved point multiplication using precomputed point multiples: The -// small point multiples 0*P, 1*P, ..., 17*P are in p_pre_comp, the scalar -// in p_scalar, if non-NULL. If g_scalar is non-NULL, we also add this multiple -// of the generator, using certain (large) precomputed multiples in g_pre_comp. -// Output point (X, Y, Z) is stored in x_out, y_out, z_out. -static void batch_mul(fe x_out, fe y_out, fe z_out, - const uint8_t *p_scalar, const uint8_t *g_scalar, - const fe p_pre_comp[17][3]) { - // set nq to the point at infinity - fe nq[3] = {{0},{0},{0}}, ftmp, tmp[3]; - uint64_t bits; - uint8_t sign, digit; - - // Loop over both scalars msb-to-lsb, interleaving additions of multiples - // of the generator (two in each of the last 32 rounds) and additions of p - // (every 5th round). - - int skip = 1; // save two point operations in the first round - size_t i = p_scalar != NULL ? 255 : 31; - for (;;) { - // double - if (!skip) { - point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); - } - - // add multiples of the generator - if (g_scalar != NULL && i <= 31) { - // first, look 32 bits upwards - bits = get_bit(g_scalar, i + 224) << 3; - bits |= get_bit(g_scalar, i + 160) << 2; - bits |= get_bit(g_scalar, i + 96) << 1; - bits |= get_bit(g_scalar, i + 32); - // select the point to add, in constant time - select_point(bits, 16, g_pre_comp[1], tmp); - - if (!skip) { - point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, - tmp[0], tmp[1], tmp[2]); - } else { - fe_copy(nq[0], tmp[0]); - fe_copy(nq[1], tmp[1]); - fe_copy(nq[2], tmp[2]); - skip = 0; - } - - // second, look at the current position - bits = get_bit(g_scalar, i + 192) << 3; - bits |= get_bit(g_scalar, i + 128) << 2; - bits |= get_bit(g_scalar, i + 64) << 1; - bits |= get_bit(g_scalar, i); - // select the point to add, in constant time - select_point(bits, 16, g_pre_comp[0], tmp); - point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0], - tmp[1], tmp[2]); - } - - // do other additions every 5 doublings - if (p_scalar != NULL && i % 5 == 0) { - bits = get_bit(p_scalar, i + 4) << 5; - bits |= get_bit(p_scalar, i + 3) << 4; - bits |= get_bit(p_scalar, i + 2) << 3; - bits |= get_bit(p_scalar, i + 1) << 2; - bits |= get_bit(p_scalar, i) << 1; - bits |= get_bit(p_scalar, i - 1); - ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); - - // select the point to add or subtract, in constant time. - select_point(digit, 17, p_pre_comp, tmp); - fe_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point. - fe_cmovznz(tmp[1], sign, tmp[1], ftmp); - - if (!skip) { - point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */, - tmp[0], tmp[1], tmp[2]); - } else { - fe_copy(nq[0], tmp[0]); - fe_copy(nq[1], tmp[1]); - fe_copy(nq[2], tmp[2]); - skip = 0; - } - } - - if (i == 0) { - break; - } - --i; - } - fe_copy(x_out, nq[0]); - fe_copy(y_out, nq[1]); - fe_copy(z_out, nq[2]); -} - // OPENSSL EC_METHOD FUNCTIONS // Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = @@ -890,45 +798,116 @@ fe_to_generic(&r->Z, z); } -static void ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r, - const EC_SCALAR *g_scalar, - const EC_RAW_POINT *p, - const EC_SCALAR *p_scalar) { +static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_RAW_POINT *p, + const EC_SCALAR *scalar) { fe p_pre_comp[17][3]; - fe x_out, y_out, z_out; + OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); + // Precompute multiples. + fe_from_generic(p_pre_comp[1][0], &p->X); + fe_from_generic(p_pre_comp[1][1], &p->Y); + fe_from_generic(p_pre_comp[1][2], &p->Z); + for (size_t j = 2; j <= 16; ++j) { + if (j & 1) { + point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2], + p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], 0, + p_pre_comp[j - 1][0], p_pre_comp[j - 1][1], + p_pre_comp[j - 1][2]); + } else { + point_double(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2], + p_pre_comp[j / 2][0], p_pre_comp[j / 2][1], + p_pre_comp[j / 2][2]); + } + } - if (p != NULL && p_scalar != NULL) { - // We treat NULL scalars as 0, and NULL points as points at infinity, i.e., - // they contribute nothing to the linear combination. - OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); - // Precompute multiples. - fe_from_generic(p_pre_comp[1][0], &p->X); - fe_from_generic(p_pre_comp[1][1], &p->Y); - fe_from_generic(p_pre_comp[1][2], &p->Z); - for (size_t j = 2; j <= 16; ++j) { - if (j & 1) { - point_add(p_pre_comp[j][0], p_pre_comp[j][1], - p_pre_comp[j][2], p_pre_comp[1][0], - p_pre_comp[1][1], p_pre_comp[1][2], - 0, - p_pre_comp[j - 1][0], p_pre_comp[j - 1][1], - p_pre_comp[j - 1][2]); + // Set nq to the point at infinity. + fe nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3]; + + // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round. + int skip = 1; // Save two point operations in the first round. + for (size_t i = 255; i < 256; i--) { + // double + if (!skip) { + point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); + } + + // do other additions every 5 doublings + if (i % 5 == 0) { + uint64_t bits = get_bit(scalar->bytes, i + 4) << 5; + bits |= get_bit(scalar->bytes, i + 3) << 4; + bits |= get_bit(scalar->bytes, i + 2) << 3; + bits |= get_bit(scalar->bytes, i + 1) << 2; + bits |= get_bit(scalar->bytes, i) << 1; + bits |= get_bit(scalar->bytes, i - 1); + uint8_t sign, digit; + ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); + + // select the point to add or subtract, in constant time. + select_point(digit, 17, (const fe(*)[3])p_pre_comp, tmp); + fe_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point. + fe_cmovznz(tmp[1], sign, tmp[1], ftmp); + + if (!skip) { + point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */, + tmp[0], tmp[1], tmp[2]); } else { - point_double(p_pre_comp[j][0], p_pre_comp[j][1], - p_pre_comp[j][2], p_pre_comp[j / 2][0], - p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]); + fe_copy(nq[0], tmp[0]); + fe_copy(nq[1], tmp[1]); + fe_copy(nq[2], tmp[2]); + skip = 0; } } } - batch_mul(x_out, y_out, z_out, - (p != NULL && p_scalar != NULL) ? p_scalar->bytes : NULL, - g_scalar != NULL ? g_scalar->bytes : NULL, - (const fe (*) [3])p_pre_comp); + fe_to_generic(&r->X, nq[0]); + fe_to_generic(&r->Y, nq[1]); + fe_to_generic(&r->Z, nq[2]); +} - fe_to_generic(&r->X, x_out); - fe_to_generic(&r->Y, y_out); - fe_to_generic(&r->Z, z_out); +static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group, + EC_RAW_POINT *r, + const EC_SCALAR *scalar) { + // Set nq to the point at infinity. + fe nq[3] = {{0}, {0}, {0}}, tmp[3]; + + int skip = 1; // Save two point operations in the first round. + for (size_t i = 31; i < 32; i--) { + if (!skip) { + point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); + } + + // First, look 32 bits upwards. + uint64_t bits = get_bit(scalar->bytes, i + 224) << 3; + bits |= get_bit(scalar->bytes, i + 160) << 2; + bits |= get_bit(scalar->bytes, i + 96) << 1; + bits |= get_bit(scalar->bytes, i + 32); + // Select the point to add, in constant time. + select_point(bits, 16, g_pre_comp[1], tmp); + + if (!skip) { + point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0], + tmp[1], tmp[2]); + } else { + fe_copy(nq[0], tmp[0]); + fe_copy(nq[1], tmp[1]); + fe_copy(nq[2], tmp[2]); + skip = 0; + } + + // Second, look at the current position. + bits = get_bit(scalar->bytes, i + 192) << 3; + bits |= get_bit(scalar->bytes, i + 128) << 2; + bits |= get_bit(scalar->bytes, i + 64) << 1; + bits |= get_bit(scalar->bytes, i); + // Select the point to add, in constant time. + select_point(bits, 16, g_pre_comp[0], tmp); + point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, tmp[0], + tmp[1], tmp[2]); + } + + fe_to_generic(&r->X, nq[0]); + fe_to_generic(&r->Y, nq[1]); + fe_to_generic(&r->Z, nq[2]); } static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, @@ -1066,7 +1045,8 @@ ec_GFp_nistp256_point_get_affine_coordinates; out->add = ec_GFp_nistp256_add; out->dbl = ec_GFp_nistp256_dbl; - out->mul = ec_GFp_nistp256_points_mul; + out->mul = ec_GFp_nistp256_point_mul; + out->mul_base = ec_GFp_nistp256_point_mul_base; out->mul_public = ec_GFp_nistp256_point_mul_public; out->felem_mul = ec_GFp_mont_felem_mul; out->felem_sqr = ec_GFp_mont_felem_sqr;