Optimize EC_GFp_nistp256_method's cmp_x_coordinate.
Before:
Did 35496 ECDSA P-256 verify operations in 10027999us (3539.7 ops/sec)
After [+6.9%]:
Did 38170 ECDSA P-256 verify operations in 10090160us (3782.9 ops/sec)
Change-Id: Ib272d19954f46d96efc2b6d5dd480b5b85a34523
Reviewed-on: https://boringssl-review.googlesource.com/c/33067
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/third_party/fiat/p256.c b/third_party/fiat/p256.c
index c8e42a3..638f681 100644
--- a/third_party/fiat/p256.c
+++ b/third_party/fiat/p256.c
@@ -35,6 +35,7 @@
#include <openssl/mem.h>
#include <openssl/type_check.h>
+#include <assert.h>
#include <string.h>
#include "../../crypto/fipsmodule/delocate.h"
@@ -1807,6 +1808,58 @@
fe_to_generic(&r->Z, ret[2]);
}
+static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group,
+ const EC_RAW_POINT *p,
+ const EC_SCALAR *r) {
+ if (ec_GFp_simple_is_at_infinity(group, p)) {
+ return 0;
+ }
+
+ // We wish to compare X/Z^2 with r. This is equivalent to comparing X with
+ // r*Z^2. Note that X and Z are represented in Montgomery form, while r is
+ // not.
+ fe Z2_mont;
+ fe_from_generic(Z2_mont, &p->Z);
+ fe_mul(Z2_mont, Z2_mont, Z2_mont);
+
+ fe r_Z2;
+ fe_frombytes(r_Z2, r->bytes); // r < order < p, so this is valid.
+ fe_mul(r_Z2, r_Z2, Z2_mont);
+
+ fe X;
+ fe_from_generic(X, &p->X);
+ fe_from_montgomery(X);
+
+ if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) {
+ return 1;
+ }
+
+ // During signing the x coefficient is reduced modulo the group order.
+ // Therefore there is a small possibility, less than 1/2^128, that group_order
+ // < p.x < P. in that case we need not only to compare against |r| but also to
+ // compare against r+group_order.
+
+ // P_MINUS_ORDER is the difference between the field order (p) and the group
+ // order (N). This value is not in the Montgomery domain.
+ static const BN_ULONG P_MINUS_ORDER[] = {
+ TOBN(0x0c46353d, 0x039cdaae), TOBN(0x43190553, 0x58e8617b),
+ TOBN(0x00000000, 0x00000000), TOBN(0x00000000, 0x00000000)};
+ assert(OPENSSL_ARRAY_SIZE(P_MINUS_ORDER) == group->order.width);
+ if (bn_less_than_words(r->words, P_MINUS_ORDER,
+ OPENSSL_ARRAY_SIZE(P_MINUS_ORDER))) {
+ // We can ignore the carry because: r + group_order < p < 2^256.
+ EC_FELEM tmp;
+ bn_add_words(tmp.words, r->words, group->order.d, group->order.width);
+ fe_from_generic(r_Z2, &tmp);
+ fe_mul(r_Z2, r_Z2, Z2_mont);
+ if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) {
out->group_init = ec_GFp_mont_group_init;
out->group_finish = ec_GFp_mont_group_finish;
@@ -1823,7 +1876,7 @@
out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
out->scalar_inv_montgomery_vartime = ec_GFp_simple_mont_inv_mod_ord_vartime;
- out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate;
+ out->cmp_x_coordinate = ec_GFp_nistp256_cmp_x_coordinate;
};
#undef BORINGSSL_NISTP256_64BIT