|  | // Copyright 2020 The BoringSSL Authors | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //     https://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  |  | 
|  | // An implementation of the NIST P-256 elliptic curve point multiplication. | 
|  | // 256-bit Montgomery form for 64 and 32-bit. Field operations are generated by | 
|  | // Fiat, which lives in //third_party/fiat. | 
|  |  | 
|  | #include <openssl/base.h> | 
|  |  | 
|  | #include <openssl/bn.h> | 
|  | #include <openssl/ec.h> | 
|  | #include <openssl/err.h> | 
|  | #include <openssl/mem.h> | 
|  |  | 
|  | #include <assert.h> | 
|  | #include <string.h> | 
|  |  | 
|  | #include "../../internal.h" | 
|  | #include "../delocate.h" | 
|  | #include "./internal.h" | 
|  |  | 
|  | #include "../../../third_party/fiat/p256_field.c.inc" | 
|  | #include "../../../third_party/fiat/p256_point.br.c.inc" | 
|  |  | 
|  | // utility functions, handwritten | 
|  |  | 
|  | #if defined(OPENSSL_64_BIT) | 
|  | #define FIAT_P256_NLIMBS 4 | 
|  | typedef uint64_t fiat_p256_limb_t; | 
|  | typedef uint64_t fiat_p256_felem[FIAT_P256_NLIMBS]; | 
|  | static const fiat_p256_felem fiat_p256_one = {0x1, 0xffffffff00000000, | 
|  | 0xffffffffffffffff, 0xfffffffe}; | 
|  | #else  // 64BIT; else 32BIT | 
|  | #define FIAT_P256_NLIMBS 8 | 
|  | typedef uint32_t fiat_p256_limb_t; | 
|  | typedef uint32_t fiat_p256_felem[FIAT_P256_NLIMBS]; | 
|  | static const fiat_p256_felem fiat_p256_one = { | 
|  | 0x1, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0x0}; | 
|  | #endif  // 64BIT | 
|  |  | 
|  |  | 
|  | static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS], | 
|  | const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) { | 
|  | for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) { | 
|  | out[i] = in1[i]; | 
|  | } | 
|  | } | 
|  |  | 
|  | static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS], | 
|  | fiat_p256_limb_t t, | 
|  | const fiat_p256_limb_t z[FIAT_P256_NLIMBS], | 
|  | const fiat_p256_limb_t nz[FIAT_P256_NLIMBS]) { | 
|  | fiat_p256_selectznz(out, !!t, z, nz); | 
|  | } | 
|  |  | 
|  | static void fiat_p256_from_words(fiat_p256_felem out, | 
|  | const BN_ULONG in[32 / sizeof(BN_ULONG)]) { | 
|  | // Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on | 
|  | // 64-bit platforms without |uint128_t|, they are different. However, on | 
|  | // little-endian systems, |uint64_t[4]| and |uint32_t[8]| have the same | 
|  | // layout. | 
|  | OPENSSL_memcpy(out, in, 32); | 
|  | } | 
|  |  | 
|  | static void fiat_p256_from_generic(fiat_p256_felem out, const EC_FELEM *in) { | 
|  | fiat_p256_from_words(out, in->words); | 
|  | } | 
|  |  | 
|  | static void fiat_p256_to_generic(EC_FELEM *out, const fiat_p256_felem in) { | 
|  | // See |fiat_p256_from_words|. | 
|  | OPENSSL_memcpy(out->words, in, 32); | 
|  | } | 
|  |  | 
|  | // fiat_p256_inv_square calculates |out| = |in|^{-2} | 
|  | // | 
|  | // Based on Fermat's Little Theorem: | 
|  | //   a^p = a (mod p) | 
|  | //   a^{p-1} = 1 (mod p) | 
|  | //   a^{p-3} = a^{-2} (mod p) | 
|  | static void fiat_p256_inv_square(fiat_p256_felem out, | 
|  | const fiat_p256_felem in) { | 
|  | // This implements the addition chain described in | 
|  | // https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion | 
|  | fiat_p256_felem x2, x3, x6, x12, x15, x30, x32; | 
|  | fiat_p256_square(x2, in);   // 2^2 - 2^1 | 
|  | fiat_p256_mul(x2, x2, in);  // 2^2 - 2^0 | 
|  |  | 
|  | fiat_p256_square(x3, x2);   // 2^3 - 2^1 | 
|  | fiat_p256_mul(x3, x3, in);  // 2^3 - 2^0 | 
|  |  | 
|  | fiat_p256_square(x6, x3); | 
|  | for (int i = 1; i < 3; i++) { | 
|  | fiat_p256_square(x6, x6); | 
|  | }                           // 2^6 - 2^3 | 
|  | fiat_p256_mul(x6, x6, x3);  // 2^6 - 2^0 | 
|  |  | 
|  | fiat_p256_square(x12, x6); | 
|  | for (int i = 1; i < 6; i++) { | 
|  | fiat_p256_square(x12, x12); | 
|  | }                             // 2^12 - 2^6 | 
|  | fiat_p256_mul(x12, x12, x6);  // 2^12 - 2^0 | 
|  |  | 
|  | fiat_p256_square(x15, x12); | 
|  | for (int i = 1; i < 3; i++) { | 
|  | fiat_p256_square(x15, x15); | 
|  | }                             // 2^15 - 2^3 | 
|  | fiat_p256_mul(x15, x15, x3);  // 2^15 - 2^0 | 
|  |  | 
|  | fiat_p256_square(x30, x15); | 
|  | for (int i = 1; i < 15; i++) { | 
|  | fiat_p256_square(x30, x30); | 
|  | }                              // 2^30 - 2^15 | 
|  | fiat_p256_mul(x30, x30, x15);  // 2^30 - 2^0 | 
|  |  | 
|  | fiat_p256_square(x32, x30); | 
|  | fiat_p256_square(x32, x32);   // 2^32 - 2^2 | 
|  | fiat_p256_mul(x32, x32, x2);  // 2^32 - 2^0 | 
|  |  | 
|  | fiat_p256_felem ret; | 
|  | fiat_p256_square(ret, x32); | 
|  | for (int i = 1; i < 31 + 1; i++) { | 
|  | fiat_p256_square(ret, ret); | 
|  | }                             // 2^64 - 2^32 | 
|  | fiat_p256_mul(ret, ret, in);  // 2^64 - 2^32 + 2^0 | 
|  |  | 
|  | for (int i = 0; i < 96 + 32; i++) { | 
|  | fiat_p256_square(ret, ret); | 
|  | }                              // 2^192 - 2^160 + 2^128 | 
|  | fiat_p256_mul(ret, ret, x32);  // 2^192 - 2^160 + 2^128 + 2^32 - 2^0 | 
|  |  | 
|  | for (int i = 0; i < 32; i++) { | 
|  | fiat_p256_square(ret, ret); | 
|  | }                              // 2^224 - 2^192 + 2^160 + 2^64 - 2^32 | 
|  | fiat_p256_mul(ret, ret, x32);  // 2^224 - 2^192 + 2^160 + 2^64 - 2^0 | 
|  |  | 
|  | for (int i = 0; i < 30; i++) { | 
|  | fiat_p256_square(ret, ret); | 
|  | }                              // 2^254 - 2^222 + 2^190 + 2^94 - 2^30 | 
|  | fiat_p256_mul(ret, ret, x30);  // 2^254 - 2^222 + 2^190 + 2^94 - 2^0 | 
|  |  | 
|  | fiat_p256_square(ret, ret); | 
|  | fiat_p256_square(out, ret);  // 2^256 - 2^224 + 2^192 + 2^96 - 2^2 | 
|  | } | 
|  |  | 
|  | // Group operations | 
|  | // ---------------- | 
|  | // | 
|  | // Building on top of the field operations we have the operations on the | 
|  | // elliptic curve group itself. Points on the curve are represented in Jacobian | 
|  | // coordinates. | 
|  |  | 
|  | static void fiat_p256_point_double(fiat_p256_felem x_out, fiat_p256_felem y_out, | 
|  | fiat_p256_felem z_out, | 
|  | const fiat_p256_felem x_in, | 
|  | const fiat_p256_felem y_in, | 
|  | const fiat_p256_felem z_in) { | 
|  | uint8_t out[3*32], in[3*32]; | 
|  | static_assert(sizeof(fiat_p256_felem) == 32); | 
|  | OPENSSL_memcpy(&in[0], x_in, 32); | 
|  | OPENSSL_memcpy(&in[32], y_in, 32); | 
|  | OPENSSL_memcpy(&in[64], z_in, 32); | 
|  | p256_point_double((br_word_t)out, (br_word_t)in); | 
|  | OPENSSL_memcpy(x_out, &out[0], 32); | 
|  | OPENSSL_memcpy(y_out, &out[32], 32); | 
|  | OPENSSL_memcpy(z_out, &out[64], 32); | 
|  | } | 
|  |  | 
|  | static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3, | 
|  | fiat_p256_felem z3, const fiat_p256_felem x1, | 
|  | const fiat_p256_felem y1, | 
|  | const fiat_p256_felem z1, | 
|  | const fiat_p256_felem x2, | 
|  | const fiat_p256_felem y2, | 
|  | const fiat_p256_felem z2) { | 
|  | uint8_t out[3 * 32], in1[3 * 32], in2[3 * 32]; | 
|  | static_assert(sizeof(fiat_p256_felem) == 32); | 
|  | OPENSSL_memcpy(&in1[0], x1, 32); | 
|  | OPENSSL_memcpy(&in1[32], y1, 32); | 
|  | OPENSSL_memcpy(&in1[64], z1, 32); | 
|  | OPENSSL_memcpy(&in2[0], x2, 32); | 
|  | OPENSSL_memcpy(&in2[32], y2, 32); | 
|  | OPENSSL_memcpy(&in2[64], z2, 32); | 
|  | p256_point_add_vartime_if_doubling((br_word_t)out, (br_word_t)in1, | 
|  | (br_word_t)in2); | 
|  | OPENSSL_memcpy(x3, &out[0], 32); | 
|  | OPENSSL_memcpy(y3, &out[32], 32); | 
|  | OPENSSL_memcpy(z3, &out[64], 32); | 
|  | } | 
|  | #include "./p256_table.h" | 
|  |  | 
|  | // fiat_p256_select_point_affine selects the |idx-1|th point from a | 
|  | // precomputation table and copies it to out. If |idx| is zero, the output is | 
|  | // the point at infinity. | 
|  | static void fiat_p256_select_point_affine( | 
|  | const fiat_p256_limb_t idx, size_t size, | 
|  | const fiat_p256_felem pre_comp[/*size*/][2], fiat_p256_felem out[3]) { | 
|  | OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3); | 
|  | for (size_t i = 0; i < size; i++) { | 
|  | fiat_p256_limb_t mismatch = i ^ (idx - 1); | 
|  | fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]); | 
|  | fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]); | 
|  | } | 
|  | fiat_p256_cmovznz(out[2], idx, out[2], fiat_p256_one); | 
|  | } | 
|  |  | 
|  | // fiat_p256_select_point selects the |idx|th point from a precomputation table | 
|  | // and copies it to out. | 
|  | static void fiat_p256_select_point(const fiat_p256_limb_t idx, size_t size, | 
|  | const fiat_p256_felem pre_comp[/*size*/][3], | 
|  | fiat_p256_felem out[3]) { | 
|  | OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3); | 
|  | for (size_t i = 0; i < size; i++) { | 
|  | fiat_p256_limb_t mismatch = i ^ idx; | 
|  | fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]); | 
|  | fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]); | 
|  | fiat_p256_cmovznz(out[2], mismatch, pre_comp[i][2], out[2]); | 
|  | } | 
|  | } | 
|  |  | 
|  | // fiat_p256_get_bit returns the |i|th bit in |in|. | 
|  | static crypto_word_t fiat_p256_get_bit(const EC_SCALAR *in, int i) { | 
|  | if (i < 0 || i >= 256) { | 
|  | return 0; | 
|  | } | 
|  | #if defined(OPENSSL_64_BIT) | 
|  | static_assert(sizeof(BN_ULONG) == 8, "BN_ULONG was not 64-bit"); | 
|  | return (in->words[i >> 6] >> (i & 63)) & 1; | 
|  | #else | 
|  | static_assert(sizeof(BN_ULONG) == 4, "BN_ULONG was not 32-bit"); | 
|  | return (in->words[i >> 5] >> (i & 31)) & 1; | 
|  | #endif | 
|  | } | 
|  |  | 
|  | // OPENSSL EC_METHOD FUNCTIONS | 
|  |  | 
|  | // Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = | 
|  | // (X/Z^2, Y/Z^3). | 
|  | static int ec_GFp_nistp256_point_get_affine_coordinates( | 
|  | const EC_GROUP *group, const EC_JACOBIAN *point, EC_FELEM *x_out, | 
|  | EC_FELEM *y_out) { | 
|  | if (constant_time_declassify_int( | 
|  | ec_GFp_simple_is_at_infinity(group, point))) { | 
|  | OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | fiat_p256_felem z1, z2; | 
|  | fiat_p256_from_generic(z1, &point->Z); | 
|  | fiat_p256_inv_square(z2, z1); | 
|  |  | 
|  | if (x_out != NULL) { | 
|  | fiat_p256_felem x; | 
|  | fiat_p256_from_generic(x, &point->X); | 
|  | fiat_p256_mul(x, x, z2); | 
|  | fiat_p256_to_generic(x_out, x); | 
|  | } | 
|  |  | 
|  | if (y_out != NULL) { | 
|  | fiat_p256_felem y; | 
|  | fiat_p256_from_generic(y, &point->Y); | 
|  | fiat_p256_square(z2, z2);  // z^-4 | 
|  | fiat_p256_mul(y, y, z1);   // y * z | 
|  | fiat_p256_mul(y, y, z2);   // y * z^-3 | 
|  | fiat_p256_to_generic(y_out, y); | 
|  | } | 
|  |  | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | static void ec_GFp_nistp256_add(const EC_GROUP *group, EC_JACOBIAN *r, | 
|  | const EC_JACOBIAN *a, const EC_JACOBIAN *b) { | 
|  | fiat_p256_felem x1, y1, z1, x2, y2, z2; | 
|  | fiat_p256_from_generic(x1, &a->X); | 
|  | fiat_p256_from_generic(y1, &a->Y); | 
|  | fiat_p256_from_generic(z1, &a->Z); | 
|  | fiat_p256_from_generic(x2, &b->X); | 
|  | fiat_p256_from_generic(y2, &b->Y); | 
|  | fiat_p256_from_generic(z2, &b->Z); | 
|  | fiat_p256_point_add(x1, y1, z1, x1, y1, z1, x2, y2, z2); | 
|  | fiat_p256_to_generic(&r->X, x1); | 
|  | fiat_p256_to_generic(&r->Y, y1); | 
|  | fiat_p256_to_generic(&r->Z, z1); | 
|  | } | 
|  |  | 
|  | static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_JACOBIAN *r, | 
|  | const EC_JACOBIAN *a) { | 
|  | fiat_p256_felem x, y, z; | 
|  | fiat_p256_from_generic(x, &a->X); | 
|  | fiat_p256_from_generic(y, &a->Y); | 
|  | fiat_p256_from_generic(z, &a->Z); | 
|  | fiat_p256_point_double(x, y, z, x, y, z); | 
|  | fiat_p256_to_generic(&r->X, x); | 
|  | fiat_p256_to_generic(&r->Y, y); | 
|  | fiat_p256_to_generic(&r->Z, z); | 
|  | } | 
|  |  | 
|  | static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r, | 
|  | const EC_JACOBIAN *p, | 
|  | const EC_SCALAR *scalar) { | 
|  | fiat_p256_felem p_pre_comp[17][3]; | 
|  | OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); | 
|  | // Precompute multiples. | 
|  | fiat_p256_from_generic(p_pre_comp[1][0], &p->X); | 
|  | fiat_p256_from_generic(p_pre_comp[1][1], &p->Y); | 
|  | fiat_p256_from_generic(p_pre_comp[1][2], &p->Z); | 
|  | for (size_t j = 2; j <= 16; ++j) { | 
|  | if (j & 1) { | 
|  | fiat_p256_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2], | 
|  | p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], | 
|  | p_pre_comp[j - 1][0], p_pre_comp[j - 1][1], | 
|  | p_pre_comp[j - 1][2]); | 
|  | } else { | 
|  | fiat_p256_point_double(p_pre_comp[j][0], p_pre_comp[j][1], | 
|  | p_pre_comp[j][2], p_pre_comp[j / 2][0], | 
|  | p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Set nq to the point at infinity. | 
|  | fiat_p256_felem nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3]; | 
|  |  | 
|  | // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round. | 
|  | int skip = 1;  // Save two point operations in the first round. | 
|  | for (size_t i = 255; i < 256; i--) { | 
|  | // double | 
|  | if (!skip) { | 
|  | fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); | 
|  | } | 
|  |  | 
|  | // do other additions every 5 doublings | 
|  | if (i % 5 == 0) { | 
|  | crypto_word_t bits = fiat_p256_get_bit(scalar, i + 4) << 5; | 
|  | bits |= fiat_p256_get_bit(scalar, i + 3) << 4; | 
|  | bits |= fiat_p256_get_bit(scalar, i + 2) << 3; | 
|  | bits |= fiat_p256_get_bit(scalar, i + 1) << 2; | 
|  | bits |= fiat_p256_get_bit(scalar, i) << 1; | 
|  | bits |= fiat_p256_get_bit(scalar, i - 1); | 
|  | crypto_word_t sign, digit; | 
|  | ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); | 
|  |  | 
|  | // select the point to add or subtract, in constant time. | 
|  | fiat_p256_select_point((fiat_p256_limb_t)digit, 17, | 
|  | (const fiat_p256_felem(*)[3])p_pre_comp, tmp); | 
|  | fiat_p256_opp(ftmp, tmp[1]);  // (X, -Y, Z) is the negative point. | 
|  | fiat_p256_cmovznz(tmp[1], (fiat_p256_limb_t)sign, tmp[1], ftmp); | 
|  |  | 
|  | if (!skip) { | 
|  | fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], tmp[0], | 
|  | tmp[1], tmp[2]); | 
|  | } else { | 
|  | fiat_p256_copy(nq[0], tmp[0]); | 
|  | fiat_p256_copy(nq[1], tmp[1]); | 
|  | fiat_p256_copy(nq[2], tmp[2]); | 
|  | skip = 0; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | fiat_p256_to_generic(&r->X, nq[0]); | 
|  | fiat_p256_to_generic(&r->Y, nq[1]); | 
|  | fiat_p256_to_generic(&r->Z, nq[2]); | 
|  | } | 
|  |  | 
|  | static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group, | 
|  | EC_JACOBIAN *r, | 
|  | const EC_SCALAR *scalar) { | 
|  | // Set nq to the point at infinity. | 
|  | fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3]; | 
|  |  | 
|  | int skip = 1;  // Save two point operations in the first round. | 
|  | for (size_t i = 31; i < 32; i--) { | 
|  | if (!skip) { | 
|  | fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); | 
|  | } | 
|  |  | 
|  | // First, look 32 bits upwards. | 
|  | crypto_word_t bits = fiat_p256_get_bit(scalar, i + 224) << 3; | 
|  | bits |= fiat_p256_get_bit(scalar, i + 160) << 2; | 
|  | bits |= fiat_p256_get_bit(scalar, i + 96) << 1; | 
|  | bits |= fiat_p256_get_bit(scalar, i + 32); | 
|  | // Select the point to add, in constant time. | 
|  | fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15, | 
|  | fiat_p256_g_pre_comp[1], tmp); | 
|  |  | 
|  | if (!skip) { | 
|  | fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], tmp[0], | 
|  | tmp[1], tmp[2]); | 
|  | } else { | 
|  | fiat_p256_copy(nq[0], tmp[0]); | 
|  | fiat_p256_copy(nq[1], tmp[1]); | 
|  | fiat_p256_copy(nq[2], tmp[2]); | 
|  | skip = 0; | 
|  | } | 
|  |  | 
|  | // Second, look at the current position. | 
|  | bits = fiat_p256_get_bit(scalar, i + 192) << 3; | 
|  | bits |= fiat_p256_get_bit(scalar, i + 128) << 2; | 
|  | bits |= fiat_p256_get_bit(scalar, i + 64) << 1; | 
|  | bits |= fiat_p256_get_bit(scalar, i); | 
|  | // Select the point to add, in constant time. | 
|  | fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15, | 
|  | fiat_p256_g_pre_comp[0], tmp); | 
|  | fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], tmp[0], | 
|  | tmp[1], tmp[2]); | 
|  | } | 
|  |  | 
|  | fiat_p256_to_generic(&r->X, nq[0]); | 
|  | fiat_p256_to_generic(&r->Y, nq[1]); | 
|  | fiat_p256_to_generic(&r->Z, nq[2]); | 
|  | } | 
|  |  | 
|  | static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, | 
|  | EC_JACOBIAN *r, | 
|  | const EC_SCALAR *g_scalar, | 
|  | const EC_JACOBIAN *p, | 
|  | const EC_SCALAR *p_scalar) { | 
|  | #define P256_WSIZE_PUBLIC 4 | 
|  | // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|. | 
|  | fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3]; | 
|  | fiat_p256_from_generic(p_pre_comp[0][0], &p->X); | 
|  | fiat_p256_from_generic(p_pre_comp[0][1], &p->Y); | 
|  | fiat_p256_from_generic(p_pre_comp[0][2], &p->Z); | 
|  | fiat_p256_felem p2[3]; | 
|  | fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0], | 
|  | p_pre_comp[0][1], p_pre_comp[0][2]); | 
|  | for (size_t i = 1; i < OPENSSL_ARRAY_SIZE(p_pre_comp); i++) { | 
|  | fiat_p256_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2], | 
|  | p_pre_comp[i - 1][0], p_pre_comp[i - 1][1], | 
|  | p_pre_comp[i - 1][2], p2[0], p2[1], p2[2]); | 
|  | } | 
|  |  | 
|  | // Set up the coefficients for |p_scalar|. | 
|  | int8_t p_wNAF[257]; | 
|  | ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC); | 
|  |  | 
|  | // Set |ret| to the point at infinity. | 
|  | int skip = 1;  // Save some point operations. | 
|  | fiat_p256_felem ret[3] = {{0}, {0}, {0}}; | 
|  | for (int i = 256; i >= 0; i--) { | 
|  | if (!skip) { | 
|  | fiat_p256_point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]); | 
|  | } | 
|  |  | 
|  | // For the |g_scalar|, we use the precomputed table without the | 
|  | // constant-time lookup. | 
|  | if (i <= 31) { | 
|  | // First, look 32 bits upwards. | 
|  | crypto_word_t bits = fiat_p256_get_bit(g_scalar, i + 224) << 3; | 
|  | bits |= fiat_p256_get_bit(g_scalar, i + 160) << 2; | 
|  | bits |= fiat_p256_get_bit(g_scalar, i + 96) << 1; | 
|  | bits |= fiat_p256_get_bit(g_scalar, i + 32); | 
|  | if (bits != 0) { | 
|  | size_t index = (size_t)(bits - 1); | 
|  | fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], | 
|  | fiat_p256_g_pre_comp[1][index][0], | 
|  | fiat_p256_g_pre_comp[1][index][1], fiat_p256_one); | 
|  | skip = 0; | 
|  | } | 
|  |  | 
|  | // Second, look at the current position. | 
|  | bits = fiat_p256_get_bit(g_scalar, i + 192) << 3; | 
|  | bits |= fiat_p256_get_bit(g_scalar, i + 128) << 2; | 
|  | bits |= fiat_p256_get_bit(g_scalar, i + 64) << 1; | 
|  | bits |= fiat_p256_get_bit(g_scalar, i); | 
|  | if (bits != 0) { | 
|  | size_t index = (size_t)(bits - 1); | 
|  | fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], | 
|  | fiat_p256_g_pre_comp[0][index][0], | 
|  | fiat_p256_g_pre_comp[0][index][1], fiat_p256_one); | 
|  | skip = 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | int digit = p_wNAF[i]; | 
|  | if (digit != 0) { | 
|  | assert(digit & 1); | 
|  | size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1); | 
|  | fiat_p256_felem *y = &p_pre_comp[idx][1], tmp; | 
|  | if (digit < 0) { | 
|  | fiat_p256_opp(tmp, p_pre_comp[idx][1]); | 
|  | y = &tmp; | 
|  | } | 
|  | if (!skip) { | 
|  | fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], | 
|  | p_pre_comp[idx][0], *y, p_pre_comp[idx][2]); | 
|  | } else { | 
|  | fiat_p256_copy(ret[0], p_pre_comp[idx][0]); | 
|  | fiat_p256_copy(ret[1], *y); | 
|  | fiat_p256_copy(ret[2], p_pre_comp[idx][2]); | 
|  | skip = 0; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | fiat_p256_to_generic(&r->X, ret[0]); | 
|  | fiat_p256_to_generic(&r->Y, ret[1]); | 
|  | fiat_p256_to_generic(&r->Z, ret[2]); | 
|  | } | 
|  |  | 
|  | static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group, | 
|  | const EC_JACOBIAN *p, | 
|  | const EC_SCALAR *r) { | 
|  | if (ec_GFp_simple_is_at_infinity(group, p)) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | // We wish to compare X/Z^2 with r. This is equivalent to comparing X with | 
|  | // r*Z^2. Note that X and Z are represented in Montgomery form, while r is | 
|  | // not. | 
|  | fiat_p256_felem Z2_mont; | 
|  | fiat_p256_from_generic(Z2_mont, &p->Z); | 
|  | fiat_p256_mul(Z2_mont, Z2_mont, Z2_mont); | 
|  |  | 
|  | fiat_p256_felem r_Z2; | 
|  | fiat_p256_from_words(r_Z2, r->words);  // r < order < p, so this is valid. | 
|  | fiat_p256_mul(r_Z2, r_Z2, Z2_mont); | 
|  |  | 
|  | fiat_p256_felem X; | 
|  | fiat_p256_from_generic(X, &p->X); | 
|  | fiat_p256_from_montgomery(X, X); | 
|  |  | 
|  | if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) { | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | // During signing the x coefficient is reduced modulo the group order. | 
|  | // Therefore there is a small possibility, less than 1/2^128, that group_order | 
|  | // < p.x < P. in that case we need not only to compare against |r| but also to | 
|  | // compare against r+group_order. | 
|  | assert(group->field.N.width == group->order.N.width); | 
|  | EC_FELEM tmp; | 
|  | BN_ULONG carry = | 
|  | bn_add_words(tmp.words, r->words, group->order.N.d, group->field.N.width); | 
|  | if (carry == 0 && | 
|  | bn_less_than_words(tmp.words, group->field.N.d, group->field.N.width)) { | 
|  | fiat_p256_from_generic(r_Z2, &tmp); | 
|  | fiat_p256_mul(r_Z2, r_Z2, Z2_mont); | 
|  | if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) { | 
|  | return 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) { | 
|  | out->point_get_affine_coordinates = | 
|  | ec_GFp_nistp256_point_get_affine_coordinates; | 
|  | out->add = ec_GFp_nistp256_add; | 
|  | out->dbl = ec_GFp_nistp256_dbl; | 
|  | out->mul = ec_GFp_nistp256_point_mul; | 
|  | out->mul_base = ec_GFp_nistp256_point_mul_base; | 
|  | out->mul_public = ec_GFp_nistp256_point_mul_public; | 
|  | out->felem_mul = ec_GFp_mont_felem_mul; | 
|  | out->felem_sqr = ec_GFp_mont_felem_sqr; | 
|  | out->felem_to_bytes = ec_GFp_mont_felem_to_bytes; | 
|  | out->felem_from_bytes = ec_GFp_mont_felem_from_bytes; | 
|  | out->felem_reduce = ec_GFp_mont_felem_reduce; | 
|  | // TODO(davidben): This should use the specialized field arithmetic | 
|  | // implementation, rather than the generic one. | 
|  | out->felem_exp = ec_GFp_mont_felem_exp; | 
|  | out->scalar_inv0_montgomery = ec_simple_scalar_inv0_montgomery; | 
|  | out->scalar_to_montgomery_inv_vartime = | 
|  | ec_simple_scalar_to_montgomery_inv_vartime; | 
|  | out->cmp_x_coordinate = ec_GFp_nistp256_cmp_x_coordinate; | 
|  | } |