Implement constant-time generic multiplication.
This is slower, but constant-time. It intentionally omits the signed
digit optimization because we cannot be sure the doubling case will be
unreachable for all curves. This is a fallback generic implementation
for curves which we must support for compatibility but which are not
common or important enough to justify curve-specific work.
Before:
Did 814 ECDH P-384 operations in 1085384us (750.0 ops/sec)
Did 1430 ECDSA P-384 signing operations in 1081988us (1321.6 ops/sec)
Did 308 ECDH P-521 operations in 1057741us (291.2 ops/sec)
Did 539 ECDSA P-521 signing operations in 1049797us (513.4 ops/sec)
After:
Did 715 ECDH P-384 operations in 1080161us (661.9 ops/sec)
Did 1188 ECDSA P-384 verify operations in 1069567us (1110.7 ops/sec)
Did 275 ECDH P-521 operations in 1060503us (259.3 ops/sec)
Did 506 ECDSA P-521 signing operations in 1084739us (466.5 ops/sec)
But we're still faster than the old BIGNUM implementation. EC_FELEM
more than paid for both the loss of points_make_affine and this CL.
Bug: 239
Change-Id: I65d71a731aad16b523928ee47618822d503ea704
Reviewed-on: https://boringssl-review.googlesource.com/27708
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/bcm.c b/crypto/fipsmodule/bcm.c
index 5d9ebfc..51ac7d5 100644
--- a/crypto/fipsmodule/bcm.c
+++ b/crypto/fipsmodule/bcm.c
@@ -66,6 +66,7 @@
#include "ec/p256-x86_64.c"
#include "ec/scalar.c"
#include "ec/simple.c"
+#include "ec/simple_mul.c"
#include "ec/util.c"
#include "ec/wnaf.c"
#include "hmac/hmac.c"
diff --git a/crypto/fipsmodule/ec/ec_montgomery.c b/crypto/fipsmodule/ec/ec_montgomery.c
index eb37e50..9eace95 100644
--- a/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/crypto/fipsmodule/ec/ec_montgomery.c
@@ -225,8 +225,8 @@
out->group_finish = ec_GFp_mont_group_finish;
out->group_set_curve = ec_GFp_mont_group_set_curve;
out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates;
- out->mul = ec_wNAF_mul /* XXX: Not constant time. */;
- out->mul_public = ec_wNAF_mul;
+ out->mul = ec_GFp_simple_mul;
+ out->mul_public = ec_GFp_simple_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;
out->felem_sqr = ec_GFp_mont_felem_sqr;
out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
index aba7d1d..bb172b2 100644
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -287,6 +287,10 @@
const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar,
const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx);
+void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+ const EC_SCALAR *p_scalar);
+
// ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of
// |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of
// which will be either zero or odd with an absolute value less than 2^w
@@ -298,9 +302,9 @@
void ec_compute_wNAF(const EC_GROUP *group, int8_t *out,
const EC_SCALAR *scalar, size_t bits, int w);
-void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r,
- const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
- const EC_SCALAR *p_scalar);
+void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+ const EC_SCALAR *p_scalar);
// method functions in simple.c
int ec_GFp_simple_group_init(EC_GROUP *);
diff --git a/crypto/fipsmodule/ec/simple.c b/crypto/fipsmodule/ec/simple.c
index 8bbb5a9..5c63711 100644
--- a/crypto/fipsmodule/ec/simple.c
+++ b/crypto/fipsmodule/ec/simple.c
@@ -287,9 +287,7 @@
BN_ULONG yneq = ec_felem_non_zero_mask(group, &r);
- // TODO(davidben): Analyze how case relates to timing considerations for the
- // supported curves which hit it (P-224, P-384, and P-521) and the
- // to-be-written constant-time generic multiplication implementation.
+ // This case will never occur in the constant-time |ec_GFp_simple_mul|.
if (!xneq && !yneq && z1nz && z2nz) {
ec_GFp_simple_dbl(group, out, a);
return;
diff --git a/crypto/fipsmodule/ec/simple_mul.c b/crypto/fipsmodule/ec/simple_mul.c
new file mode 100644
index 0000000..394f3199
--- /dev/null
+++ b/crypto/fipsmodule/ec/simple_mul.c
@@ -0,0 +1,98 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/ec.h>
+
+#include <assert.h>
+
+#include "internal.h"
+#include "../bn/internal.h"
+#include "../../internal.h"
+
+
+static void ec_GFp_simple_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_RAW_POINT *p,
+ const EC_SCALAR *scalar) {
+ // This is a generic implementation for uncommon curves that not do not
+ // warrant a tuned one. It uses unsigned digits so that the doubling case in
+ // |ec_GFp_simple_add| is always unreachable, erring on safety and simplicity.
+
+ // Compute a table of the first 32 multiples of |p| (including infinity).
+ EC_RAW_POINT precomp[32];
+ ec_GFp_simple_point_set_to_infinity(group, &precomp[0]);
+ ec_GFp_simple_point_copy(&precomp[1], p);
+ for (size_t j = 2; j < OPENSSL_ARRAY_SIZE(precomp); j++) {
+ if (j & 1) {
+ ec_GFp_simple_add(group, &precomp[j], &precomp[1], &precomp[j - 1]);
+ } else {
+ ec_GFp_simple_dbl(group, &precomp[j], &precomp[j / 2]);
+ }
+ }
+
+ // Divide bits in |scalar| into windows.
+ unsigned bits = BN_num_bits(&group->order);
+ int r_is_at_infinity = 1;
+ for (unsigned i = bits - 1; i < bits; i--) {
+ if (!r_is_at_infinity) {
+ ec_GFp_simple_dbl(group, r, r);
+ }
+ if (i % 5 == 0) {
+ // Compute the next window value.
+ const size_t width = group->order.width;
+ uint8_t window = bn_is_bit_set_words(scalar->words, width, i + 4) << 4;
+ window |= bn_is_bit_set_words(scalar->words, width, i + 3) << 3;
+ window |= bn_is_bit_set_words(scalar->words, width, i + 2) << 2;
+ window |= bn_is_bit_set_words(scalar->words, width, i + 1) << 1;
+ window |= bn_is_bit_set_words(scalar->words, width, i);
+
+ // Select the entry in constant-time.
+ EC_RAW_POINT tmp;
+ for (size_t j = 0; j < OPENSSL_ARRAY_SIZE(precomp); j++) {
+ BN_ULONG mask = constant_time_eq_w(j, window);
+ ec_felem_select(group, &tmp.X, mask, &precomp[j].X, &tmp.X);
+ ec_felem_select(group, &tmp.Y, mask, &precomp[j].Y, &tmp.Y);
+ ec_felem_select(group, &tmp.Z, mask, &precomp[j].Z, &tmp.Z);
+ }
+
+ if (r_is_at_infinity) {
+ ec_GFp_simple_point_copy(r, &tmp);
+ r_is_at_infinity = 0;
+ } else {
+ ec_GFp_simple_add(group, r, r, &tmp);
+ }
+ }
+ }
+ if (r_is_at_infinity) {
+ ec_GFp_simple_point_set_to_infinity(group, r);
+ }
+}
+
+void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+ const EC_SCALAR *p_scalar) {
+ assert(g_scalar != NULL || p_scalar != NULL);
+ if (p_scalar == NULL) {
+ ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar);
+ } else if (g_scalar == NULL) {
+ ec_GFp_simple_mul_single(group, r, p, p_scalar);
+ } else {
+ // Support constant-time two-point multiplication for compatibility. This
+ // does not actually come up in keygen, ECDH, or ECDSA, so we implement it
+ // the naive way.
+ ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar);
+ EC_RAW_POINT tmp;
+ ec_GFp_simple_mul_single(group, &tmp, p, p_scalar);
+ ec_GFp_simple_add(group, r, r, &tmp);
+ }
+}
diff --git a/crypto/fipsmodule/ec/wnaf.c b/crypto/fipsmodule/ec/wnaf.c
index e136079..145caa0 100644
--- a/crypto/fipsmodule/ec/wnaf.c
+++ b/crypto/fipsmodule/ec/wnaf.c
@@ -72,7 +72,6 @@
#include <openssl/bn.h>
#include <openssl/err.h>
-#include <openssl/mem.h>
#include <openssl/thread.h>
#include "internal.h"
@@ -169,37 +168,30 @@
}
}
-// EC_WNAF_WINDOW_BITS is the window size to use for |ec_wNAF_mul|.
+// EC_WNAF_WINDOW_BITS is the window size to use for |ec_GFp_simple_mul_public|.
#define EC_WNAF_WINDOW_BITS 4
-// EC_WNAF_TABLE_SIZE is the table size to use for |ec_wNAF_mul|.
+// EC_WNAF_TABLE_SIZE is the table size to use for |ec_GFp_simple_mul_public|.
#define EC_WNAF_TABLE_SIZE (1 << (EC_WNAF_WINDOW_BITS - 1))
-void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r,
- const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
- const EC_SCALAR *p_scalar) {
+void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+ const EC_SCALAR *p_scalar) {
size_t bits = BN_num_bits(&group->order);
size_t wNAF_len = bits + 1;
- // TODO(davidben): |mul_public| is for ECDSA verification which can assume
- // non-NULL inputs, but this code is also used for |mul| which cannot. It's
- // not constant-time, so replace the generic |mul| and remove the NULL checks.
int8_t g_wNAF[EC_MAX_SCALAR_BYTES * 8 + 1];
EC_RAW_POINT g_precomp[EC_WNAF_TABLE_SIZE];
assert(wNAF_len <= OPENSSL_ARRAY_SIZE(g_wNAF));
- if (g_scalar != NULL) {
- const EC_RAW_POINT *g = &group->generator->raw;
- ec_compute_wNAF(group, g_wNAF, g_scalar, bits, EC_WNAF_WINDOW_BITS);
- compute_precomp(group, g_precomp, g, EC_WNAF_TABLE_SIZE);
- }
+ const EC_RAW_POINT *g = &group->generator->raw;
+ ec_compute_wNAF(group, g_wNAF, g_scalar, bits, EC_WNAF_WINDOW_BITS);
+ compute_precomp(group, g_precomp, g, EC_WNAF_TABLE_SIZE);
int8_t p_wNAF[EC_MAX_SCALAR_BYTES * 8 + 1];
EC_RAW_POINT p_precomp[EC_WNAF_TABLE_SIZE];
assert(wNAF_len <= OPENSSL_ARRAY_SIZE(p_wNAF));
- if (p_scalar != NULL) {
- ec_compute_wNAF(group, p_wNAF, p_scalar, bits, EC_WNAF_WINDOW_BITS);
- compute_precomp(group, p_precomp, p, EC_WNAF_TABLE_SIZE);
- }
+ ec_compute_wNAF(group, p_wNAF, p_scalar, bits, EC_WNAF_WINDOW_BITS);
+ compute_precomp(group, p_precomp, p, EC_WNAF_TABLE_SIZE);
EC_RAW_POINT tmp;
int r_is_at_infinity = 1;
@@ -208,7 +200,7 @@
ec_GFp_simple_dbl(group, r, r);
}
- if (g_scalar != NULL && g_wNAF[k] != 0) {
+ if (g_wNAF[k] != 0) {
lookup_precomp(group, &tmp, g_precomp, g_wNAF[k]);
if (r_is_at_infinity) {
ec_GFp_simple_point_copy(r, &tmp);
@@ -218,7 +210,7 @@
}
}
- if (p_scalar != NULL && p_wNAF[k] != 0) {
+ if (p_wNAF[k] != 0) {
lookup_precomp(group, &tmp, p_precomp, p_wNAF[k]);
if (r_is_at_infinity) {
ec_GFp_simple_point_copy(r, &tmp);
@@ -232,7 +224,4 @@
if (r_is_at_infinity) {
ec_GFp_simple_point_set_to_infinity(group, r);
}
-
- OPENSSL_cleanse(&g_wNAF, sizeof(g_wNAF));
- OPENSSL_cleanse(&p_wNAF, sizeof(p_wNAF));
}