Revert "Speed up ECDSA verify on x86-64."
This reverts commit 3d450d2844db825a906fc19f7bb1e6ce765047db. It fails
SDE, looks like a missing CPUID check before using vector instructions.
Change-Id: I6b7dd71d9e5b1f509d2e018bd8be38c973476b4e
Reviewed-on: https://boringssl-review.googlesource.com/c/32864
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/CMakeLists.txt b/crypto/fipsmodule/CMakeLists.txt
index 463febb..e6c8cc6 100644
--- a/crypto/fipsmodule/CMakeLists.txt
+++ b/crypto/fipsmodule/CMakeLists.txt
@@ -11,7 +11,6 @@
ghash-x86_64.${ASM_EXT}
md5-x86_64.${ASM_EXT}
p256-x86_64-asm.${ASM_EXT}
- p256_beeu-x86_64-asm.${ASM_EXT}
rdrand-x86_64.${ASM_EXT}
rsaz-avx2.${ASM_EXT}
sha1-x86_64.${ASM_EXT}
@@ -101,7 +100,6 @@
perlasm(md5-586.${ASM_EXT} md5/asm/md5-586.pl)
perlasm(md5-x86_64.${ASM_EXT} md5/asm/md5-x86_64.pl)
perlasm(p256-x86_64-asm.${ASM_EXT} ec/asm/p256-x86_64-asm.pl)
-perlasm(p256_beeu-x86_64-asm.${ASM_EXT} ec/asm/p256_beeu-x86_64-asm.pl)
perlasm(rdrand-x86_64.${ASM_EXT} rand/asm/rdrand-x86_64.pl)
perlasm(rsaz-avx2.${ASM_EXT} bn/asm/rsaz-avx2.pl)
perlasm(sha1-586.${ASM_EXT} sha/asm/sha1-586.pl)
diff --git a/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl b/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
deleted file mode 100644
index 12b9f5a..0000000
--- a/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
+++ /dev/null
@@ -1,405 +0,0 @@
-# Copyright (c) 2018, Amazon Inc.
-#
-# Permission to use, copy, modify, and/or distribute this software for any
-# purpose with or without fee is hereby granted, provided that the above
-# copyright notice and this permission notice appear in all copies.
-#
-# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
-# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
-# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-#
-# Written by Nir Drucker, and Shay Gueron
-# AWS Cryptographic Algorithms Group
-# (ndrucker@amazon.com, gueron@amazon.com)
-# based on BN_mod_inverse_odd
-
-$flavour = shift;
-$output = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
-
-$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
-die "can't locate x86_64-xlate.pl";
-
-open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
-*STDOUT=*OUT;
-
-#############################################################################
-# extern int beeu_mod_inverse_vartime(BN_ULONG out[P256_LIMBS],
-# BN_ULONG a[P256_LIMBS],
-# BN_ULONG n[P256_LIMBS]);
-#
-# (Binary Extended Euclidean Algorithm.
-# See https://en.wikipedia.org/wiki/Binary_GCD_algorithm)
-#
-# Assumption 1: n is odd for the BEEU
-# Assumption 2: 1 < a < n < 2^256
-
-$out = "%rdi";
-$a = "%rsi";
-$n = "%rdx";
-
-# X/Y will hold the inverse parameter
-# Assumption: X,Y<2^(256)
-$x0 = "%r8";
-$x1 = "%r9";
-$x2 = "%r10";
-$x3 = "%r11";
-# borrow from out (out is needed only at the end)
-$x4 = "%rdi";
-$y0 = "%r12";
-$y1 = "%r13";
-$y2 = "%r14";
-$y3 = "%r15";
-$y4 = "%rbp";
-$shift = "%rcx";
-$t0 = "%rax";
-$t1 = "%rbx";
-$t2 = "%rsi";
-# borrow
-$t3 = "%rcx";
-
-$T0 = "%xmm0";
-$T1 = "%xmm1";
-
-# Offsets on the stack
-$out_rsp = 0;
-$shift_rsp = $out_rsp+0x8;
-$a_rsp0 = $shift_rsp+0x8;
-$a_rsp1 = $a_rsp0+0x8;
-$a_rsp2 = $a_rsp1+0x8;
-$a_rsp3 = $a_rsp2+0x8;
-$b_rsp0 = $a_rsp3+0x8;
-$b_rsp1 = $b_rsp0+0x8;
-$b_rsp2 = $b_rsp1+0x8;
-$b_rsp3 = $b_rsp2+0x8;
-
-# Borrow when a_rsp/b_rsp are no longer needed.
-$y_rsp0 = $a_rsp0;
-$y_rsp1 = $y_rsp0+0x8;
-$y_rsp2 = $y_rsp1+0x8;
-$y_rsp3 = $y_rsp2+0x8;
-$y_rsp4 = $y_rsp3+0x8;
-$last_rsp_offset = $b_rsp3+0x8;
-
-sub TEST_B_ZERO {
- return <<___;
- xorq $t1, $t1
- or $b_rsp0(%rsp), $t1
- or $b_rsp1(%rsp), $t1
- or $b_rsp2(%rsp), $t1
- or $b_rsp3(%rsp), $t1
- jz .Lbeeu_loop_end
-___
-}
-
-$g_next_label = 0;
-
-sub SHIFT1 {
- my ($var0, $var1, $var2, $var3, $var4) = @_;
- my $label = ".Lshift1_${g_next_label}";
- $g_next_label++;
-
- return <<___;
- # Ensure X is even and divide by two.
- movq \$1, $t1
- andq $var0, $t1
- jz $label
- add 0*8($n), $var0
- adc 1*8($n), $var1
- adc 2*8($n), $var2
- adc 3*8($n), $var3
- adc \$0, $var4
-
-$label:
- shrdq \$1, $var1, $var0
- shrdq \$1, $var2, $var1
- shrdq \$1, $var3, $var2
- shrdq \$1, $var4, $var3
- shrq \$1, $var4
-___
-}
-
-sub SHIFT256 {
- my ($var) = @_;
- return <<___;
- # Copy shifted values.
- # Remember not to override t3=rcx
- movq 1*8+$var(%rsp), $t0
- movq 2*8+$var(%rsp), $t1
- movq 3*8+$var(%rsp), $t2
-
- shrdq %cl, $t0, 0*8+$var(%rsp)
- shrdq %cl, $t1, 1*8+$var(%rsp)
- shrdq %cl, $t2, 2*8+$var(%rsp)
-
- shrq %cl, $t2
- mov $t2, 3*8+$var(%rsp)
-___
-}
-
-$code.=<<___;
-.text
-
-.type beeu_mod_inverse_vartime,\@function
-.hidden beeu_mod_inverse_vartime
-.globl beeu_mod_inverse_vartime
-.align 32
-beeu_mod_inverse_vartime:
-.cfi_startproc
- push %rbp
-.cfi_push rbp
- movq %rsp, %rbp
-.cfi_def_cfa_register rbp
-
- push %r12
-.cfi_push r12
- push %r13
-.cfi_push r13
- push %r14
-.cfi_push r14
- push %r15
-.cfi_push r15
- push %rbx
-.cfi_push rbx
- push %rsi
-.cfi_push rsi
-
- sub \$$last_rsp_offset, %rsp
- movq $out, $out_rsp(%rsp)
-
- # X=1, Y=0
- movq \$1, $x0
- xorq $x1, $x1
- xorq $x2, $x2
- xorq $x3, $x3
- xorq $x4, $x4
-
- xorq $y0, $y0
- xorq $y1, $y1
- xorq $y2, $y2
- xorq $y3, $y3
- xorq $y4, $y4
-
- # Copy a/n into B/A on the stack.
- vmovdqu 0*8($a), $T0
- vmovdqu 2*8($a), $T1
- vmovdqu $T0, $b_rsp0(%rsp)
- vmovdqu $T1, $b_rsp2(%rsp)
-
- vmovdqu 0*8($n), $T0
- vmovdqu 2*8($n), $T1
- vmovdqu $T0, $a_rsp0(%rsp)
- vmovdqu $T1, $a_rsp2(%rsp)
-
-.Lbeeu_loop:
- ${\TEST_B_ZERO}
-
- # 0 < B < |n|,
- # 0 < A <= |n|,
- # (1) X*a == B (mod |n|),
- # (2) (-1)*Y*a == A (mod |n|)
-
- # Now divide B by the maximum possible power of two in the
- # integers, and divide X by the same value mod |n|. When we're
- # done, (1) still holds.
- movq \$1, $shift
-
- # Note that B > 0
-.Lbeeu_shift_loop_XB:
- movq $shift, $t1
- andq $b_rsp0(%rsp), $t1
- jnz .Lbeeu_shift_loop_end_XB
-
- ${\SHIFT1($x0, $x1, $x2, $x3, $x4)}
- shl \$1, $shift
-
- # Test wraparound of the shift parameter. The probability to have 32 zeroes
- # in a row is small Therefore having the value below equal \$0x8000000 or
- # \$0x8000 does not affect the performance. We choose 0x8000000 because it
- # is the maximal immediate value possible.
- cmp \$0x8000000, $shift
- jne .Lbeeu_shift_loop_XB
-
-.Lbeeu_shift_loop_end_XB:
- bsf $shift, $shift
- test $shift, $shift
- jz .Lbeeu_no_shift_XB
-
- ${\SHIFT256($b_rsp0)}
-
-.Lbeeu_no_shift_XB:
- # Same for A and Y. Afterwards, (2) still holds.
- movq \$1, $shift
-
- # Note that A > 0
-.Lbeeu_shift_loop_YA:
- movq $shift, $t1
- andq $a_rsp0(%rsp), $t1
- jnz .Lbeeu_shift_loop_end_YA
-
- ${\SHIFT1($y0, $y1, $y2, $y3, $y4)}
- shl \$1, $shift
-
- # Test wraparound of the shift parameter. The probability to have 32 zeroes
- # in a row is small therefore having the value below equal \$0x8000000 or
- # \$0x8000 Does not affect the performance. We choose 0x8000000 because it
- # is the maximal immediate value possible.
- cmp \$0x8000000, $shift
- jne .Lbeeu_shift_loop_YA
-
-.Lbeeu_shift_loop_end_YA:
- bsf $shift, $shift
- test $shift, $shift
- jz .Lbeeu_no_shift_YA
-
- ${\SHIFT256($a_rsp0)}
-
-.Lbeeu_no_shift_YA:
- # T = B-A (A,B < 2^256)
- mov $b_rsp0(%rsp), $t0
- mov $b_rsp1(%rsp), $t1
- mov $b_rsp2(%rsp), $t2
- mov $b_rsp3(%rsp), $t3
- sub $a_rsp0(%rsp), $t0
- sbb $a_rsp1(%rsp), $t1
- sbb $a_rsp2(%rsp), $t2
- sbb $a_rsp3(%rsp), $t3 # borrow from shift
- jnc .Lbeeu_B_bigger_than_A
-
- # A = A - B
- mov $a_rsp0(%rsp), $t0
- mov $a_rsp1(%rsp), $t1
- mov $a_rsp2(%rsp), $t2
- mov $a_rsp3(%rsp), $t3
- sub $b_rsp0(%rsp), $t0
- sbb $b_rsp1(%rsp), $t1
- sbb $b_rsp2(%rsp), $t2
- sbb $b_rsp3(%rsp), $t3
- mov $t0, $a_rsp0(%rsp)
- mov $t1, $a_rsp1(%rsp)
- mov $t2, $a_rsp2(%rsp)
- mov $t3, $a_rsp3(%rsp)
-
- # Y = Y + X
- add $x0, $y0
- adc $x1, $y1
- adc $x2, $y2
- adc $x3, $y3
- adc $x4, $y4
- jmp .Lbeeu_loop
-
-.Lbeeu_B_bigger_than_A:
- # B = T = B - A
- mov $t0, $b_rsp0(%rsp)
- mov $t1, $b_rsp1(%rsp)
- mov $t2, $b_rsp2(%rsp)
- mov $t3, $b_rsp3(%rsp)
-
- # X = Y + X
- add $y0, $x0
- adc $y1, $x1
- adc $y2, $x2
- adc $y3, $x3
- adc $y4, $x4
-
- jmp .Lbeeu_loop
-
-.Lbeeu_loop_end:
- # The Euclid's algorithm loop ends when A == beeu(a,n);
- # Therefore (-1)*Y*a == A (mod |n|), Y>0
-
- # Verify that A = 1 ==> (-1)*Y*a = A = 1 (mod |n|)
- mov $a_rsp0(%rsp), $t1
- sub \$1, $t1
- or $a_rsp1(%rsp), $t1
- or $a_rsp2(%rsp), $t1
- or $a_rsp3(%rsp), $t1
- # If not, fail.
- jnz .Lbeeu_err
-
- # From this point on, we no longer need X
- # Therefore we use it as a temporary storage.
- # X = n
- movq 0*8($n), $x0
- movq 1*8($n), $x1
- movq 2*8($n), $x2
- movq 3*8($n), $x3
- xorq $x4, $x4
-
-.Lbeeu_reduction_loop:
- movq $y0, $y_rsp0(%rsp)
- movq $y1, $y_rsp1(%rsp)
- movq $y2, $y_rsp2(%rsp)
- movq $y3, $y_rsp3(%rsp)
- movq $y4, $y_rsp4(%rsp)
-
- # If Y>n ==> Y=Y-n
- sub $x0, $y0
- sbb $x1, $y1
- sbb $x2, $y2
- sbb $x3, $y3
- sbb \$0, $y4
-
- # Choose old Y or new Y
- cmovc $y_rsp0(%rsp), $y0
- cmovc $y_rsp1(%rsp), $y1
- cmovc $y_rsp2(%rsp), $y2
- cmovc $y_rsp3(%rsp), $y3
- jnc .Lbeeu_reduction_loop
-
- # X = n - Y (n, Y < 2^256), (Cancel the (-1))
- sub $y0, $x0
- sbb $y1, $x1
- sbb $y2, $x2
- sbb $y3, $x3
-
-.Lbeeu_save:
- # Save the inverse(<2^256) to out.
- mov $out_rsp(%rsp), $out
-
- movq $x0, 0*8($out)
- movq $x1, 1*8($out)
- movq $x2, 2*8($out)
- movq $x3, 3*8($out)
-
- # Return 1.
- movq \$1, %rax
- jmp .Lbeeu_finish
-
-.Lbeeu_err:
- # Return 0.
- xorq %rax, %rax
-
-.Lbeeu_finish:
- add \$$last_rsp_offset, %rsp
- pop %rsi
-.cfi_pop rsi
- pop %rbx
-.cfi_pop rbx
- pop %r15
-.cfi_pop r15
- pop %r14
-.cfi_pop r14
- pop %r13
-.cfi_pop r13
- pop %r12
-.cfi_pop r12
- pop %rbp
-.cfi_pop rbp
-.cfi_def_cfa rsp, 8
-.cfi_endproc
- ret
-
-.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
-___
-
-print $code;
-close STDOUT;
diff --git a/crypto/fipsmodule/ec/ec.c b/crypto/fipsmodule/ec/ec.c
index 4c557a1..134d051 100644
--- a/crypto/fipsmodule/ec/ec.c
+++ b/crypto/fipsmodule/ec/ec.c
@@ -911,43 +911,6 @@
return 1;
}
-int ec_cmp_x_coordinate(const EC_GROUP *group, const EC_POINT *p,
- const BIGNUM *r, BN_CTX *ctx) {
- return group->meth->cmp_x_coordinate(group, p, r, ctx);
-}
-
-int ec_field_element_to_scalar(const EC_GROUP *group, BIGNUM *r) {
- // We must have p < 2×order, assuming p is not tiny (p >= 17). Thus rather we
- // can reduce by performing at most one subtraction.
- //
- // Proof: We only work with prime order curves, so the number of points on
- // the curve is the order. Thus Hasse's theorem gives:
- //
- // |order - (p + 1)| <= 2×sqrt(p)
- // p + 1 - order <= 2×sqrt(p)
- // p + 1 - 2×sqrt(p) <= order
- // p + 1 - 2×(p/4) < order (p/4 > sqrt(p) for p >= 17)
- // p/2 < p/2 + 1 < order
- // p < 2×order
- //
- // Additionally, one can manually check this property for built-in curves. It
- // is enforced for legacy custom curves in |EC_GROUP_set_generator|.
- //
- // TODO(davidben): Introduce |EC_FIELD_ELEMENT|, make this a function from
- // |EC_FIELD_ELEMENT| to |EC_SCALAR|, and cut out the |BIGNUM|. Does this need
- // to be constant-time for signing? |r| is the x-coordinate for kG, which is
- // public unless k was rerolled because |s| was zero.
- assert(!BN_is_negative(r));
- assert(BN_cmp(r, &group->field) < 0);
- if (BN_cmp(r, &group->order) >= 0 &&
- !BN_sub(r, r, &group->order)) {
- return 0;
- }
- assert(!BN_is_negative(r));
- assert(BN_cmp(r, &group->order) < 0);
- return 1;
-}
-
void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag) {}
const EC_METHOD *EC_GROUP_method_of(const EC_GROUP *group) {
diff --git a/crypto/fipsmodule/ec/ec_montgomery.c b/crypto/fipsmodule/ec/ec_montgomery.c
index db2d657..3fb67e2 100644
--- a/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/crypto/fipsmodule/ec/ec_montgomery.c
@@ -234,6 +234,4 @@
out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
- out->scalar_inv_montgomery_vartime = ec_GFp_simple_mont_inv_mod_ord_vartime;
- out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate;
}
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
index 4f58cb9..e51109a 100644
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -167,21 +167,11 @@
int (*felem_to_bignum)(const EC_GROUP *group, BIGNUM *out,
const EC_FELEM *in);
- // scalar_inv_montgomery sets |out| to |in|^-1, where both input and output
- // are in Montgomery form.
+ // scalar_inv_mont sets |out| to |in|^-1, where both input and output are in
+ // Montgomery form.
void (*scalar_inv_montgomery)(const EC_GROUP *group, EC_SCALAR *out,
const EC_SCALAR *in);
- // scalar_inv_montgomery_vartime performs the same computation as
- // |scalar_inv_montgomery|. It further assumes that the inputs are public so
- // there is no concern about leaking their values through timing.
- int (*scalar_inv_montgomery_vartime)(const EC_GROUP *group, EC_SCALAR *out,
- const EC_SCALAR *in);
-
- // cmp_x_coordinate compares the x (affine) coordinate of |p|, mod the group
- // order, with |r|. It returns one if they are equal and zero otherwise.
- int (*cmp_x_coordinate)(const EC_GROUP *group, const EC_POINT *p,
- const BIGNUM *r, BN_CTX *ctx);
} /* EC_METHOD */;
const EC_METHOD *EC_GFp_mont_method(void);
@@ -287,11 +277,6 @@
void ec_scalar_inv_montgomery(const EC_GROUP *group, EC_SCALAR *r,
const EC_SCALAR *a);
-// ec_scalar_inv_montgomery_vartime performs the same actions as
-// |ec_scalar_inv_montgomery|, but in variable time.
-int ec_scalar_inv_montgomery_vartime(const EC_GROUP *group, EC_SCALAR *r,
- const EC_SCALAR *a);
-
// ec_point_mul_scalar sets |r| to generator * |g_scalar| + |p| *
// |p_scalar|. Unlike other functions which take |EC_SCALAR|, |g_scalar| and
// |p_scalar| need not be fully reduced. They need only contain as many bits as
@@ -307,16 +292,6 @@
const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar,
const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx);
-// ec_cmp_x_coordinate compares the x (affine) coordinate of |p| with |r|. It
-// returns one if they are equal and zero otherwise. The |ctx| must have been
-// started by the caller.
-int ec_cmp_x_coordinate(const EC_GROUP *group, const EC_POINT *p,
- const BIGNUM *r, BN_CTX *ctx);
-
-// ec_field_element_to_scalar reduces |r| modulo |group->order|. |r| must
-// previously have been reduced modulo |group->field|.
-int ec_field_element_to_scalar(const EC_GROUP *group, BIGNUM *r);
-
void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
const EC_SCALAR *p_scalar);
@@ -362,14 +337,6 @@
void ec_simple_scalar_inv_montgomery(const EC_GROUP *group, EC_SCALAR *r,
const EC_SCALAR *a);
-int ec_GFp_simple_mont_inv_mod_ord_vartime(const EC_GROUP *group, EC_SCALAR *r,
- const EC_SCALAR *a);
-
-// ec_GFp_simple_cmp_x_coordinate compares the x (affine) coordinate of |p|, mod
-// the group order, with |r|. It returns one on success or zero otherwise.
-int ec_GFp_simple_cmp_x_coordinate(const EC_GROUP *group, const EC_POINT *p,
- const BIGNUM *r, BN_CTX *ctx);
-
// method functions in montgomery.c
int ec_GFp_mont_group_init(EC_GROUP *);
int ec_GFp_mont_group_set_curve(EC_GROUP *, const BIGNUM *p, const BIGNUM *a,
diff --git a/crypto/fipsmodule/ec/p224-64.c b/crypto/fipsmodule/ec/p224-64.c
index 27bfc36..ebaca85 100644
--- a/crypto/fipsmodule/ec/p224-64.c
+++ b/crypto/fipsmodule/ec/p224-64.c
@@ -1136,8 +1136,6 @@
out->bignum_to_felem = ec_GFp_nistp224_bignum_to_felem;
out->felem_to_bignum = ec_GFp_nistp224_felem_to_bignum;
out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
- out->scalar_inv_montgomery_vartime = ec_GFp_simple_mont_inv_mod_ord_vartime;
- out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate;
};
#endif // BORINGSSL_HAS_UINT128 && !SMALL
diff --git a/crypto/fipsmodule/ec/p256-x86_64.c b/crypto/fipsmodule/ec/p256-x86_64.c
index 0ff5455..73d36b6 100644
--- a/crypto/fipsmodule/ec/p256-x86_64.c
+++ b/crypto/fipsmodule/ec/p256-x86_64.c
@@ -44,12 +44,6 @@
TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe),
};
-// P256_ORDER is the order of the P-256 group, not in Montgomery form.
-static const BN_ULONG P256_ORDER[P256_LIMBS] = {
- TOBN(0xf3b9cac2, 0xfc632551), TOBN(0xbce6faad, 0xa7179e84),
- TOBN(0xffffffff, 0xffffffff), TOBN(0xffffffff, 0x00000000),
-};
-
// Precomputed tables for the default generator
#include "p256-x86_64-table.h"
@@ -295,63 +289,19 @@
ecp_nistz256_point_add(r, r, &h);
}
-typedef union {
- P256_POINT p;
- P256_POINT_AFFINE a;
-} p256_point_union_t;
-
-static unsigned calc_first_wvalue(unsigned *index, const uint8_t p_str[33]) {
- static const unsigned kWindowSize = 7;
- static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
- *index = kWindowSize;
-
- unsigned wvalue = (p_str[0] << 1) & kMask;
- return booth_recode_w7(wvalue);
-}
-
-static unsigned calc_wvalue(unsigned *index, const uint8_t p_str[33]) {
- static const unsigned kWindowSize = 7;
- static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
-
- const unsigned off = (*index - 1) / 8;
- unsigned wvalue = p_str[off] | p_str[off + 1] << 8;
- wvalue = (wvalue >> ((*index - 1) % 8)) & kMask;
- *index += kWindowSize;
-
- return booth_recode_w7(wvalue);
-}
-
-static void mul_p_add_and_store(const EC_GROUP *group, EC_RAW_POINT *r,
- const EC_SCALAR *g_scalar,
- const EC_RAW_POINT *p_,
- const EC_SCALAR *p_scalar,
- p256_point_union_t *t, p256_point_union_t *p) {
- const int p_is_infinity = g_scalar == NULL;
- if (p_scalar != NULL) {
- P256_POINT *out = &t->p;
- if (p_is_infinity) {
- out = &p->p;
- }
-
- ecp_nistz256_windowed_mul(group, out, p_, p_scalar);
- if (!p_is_infinity) {
- ecp_nistz256_point_add(&p->p, &p->p, out);
- }
- }
-
- assert(group->field.width == P256_LIMBS);
- OPENSSL_memcpy(r->X.words, p->p.X, P256_LIMBS * sizeof(BN_ULONG));
- OPENSSL_memcpy(r->Y.words, p->p.Y, P256_LIMBS * sizeof(BN_ULONG));
- OPENSSL_memcpy(r->Z.words, p->p.Z, P256_LIMBS * sizeof(BN_ULONG));
-}
-
static void ecp_nistz256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_SCALAR *g_scalar,
const EC_RAW_POINT *p_,
const EC_SCALAR *p_scalar) {
assert((p_ != NULL) == (p_scalar != NULL));
- alignas(32) p256_point_union_t t, p;
+ static const unsigned kWindowSize = 7;
+ static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
+
+ alignas(32) union {
+ P256_POINT p;
+ P256_POINT_AFFINE a;
+ } t, p;
if (g_scalar != NULL) {
uint8_t p_str[33];
@@ -359,8 +309,10 @@
p_str[32] = 0;
// First window
- unsigned index = 0;
- unsigned wvalue = calc_first_wvalue(&index, p_str);
+ unsigned wvalue = (p_str[0] << 1) & kMask;
+ unsigned index = kWindowSize;
+
+ wvalue = booth_recode_w7(wvalue);
const PRECOMP256_ROW *const precomputed_table =
(const PRECOMP256_ROW *)ecp_nistz256_precomputed;
@@ -376,7 +328,12 @@
copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1));
for (int i = 1; i < 37; i++) {
- wvalue = calc_wvalue(&index, p_str);
+ unsigned off = (index - 1) / 8;
+ wvalue = p_str[off] | p_str[off + 1] << 8;
+ wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
+ index += kWindowSize;
+
+ wvalue = booth_recode_w7(wvalue);
ecp_nistz256_select_w7(&t.a, precomputed_table[i], wvalue >> 1);
@@ -387,60 +344,23 @@
}
}
- mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p);
-}
-
-static void ecp_nistz256_points_mul_public(const EC_GROUP *group,
- EC_RAW_POINT *r,
- const EC_SCALAR *g_scalar,
- const EC_RAW_POINT *p_,
- const EC_SCALAR *p_scalar) {
- assert(p_ != NULL && p_scalar != NULL && g_scalar != NULL);
-
- alignas(32) p256_point_union_t t, p;
- uint8_t p_str[33];
- OPENSSL_memcpy(p_str, g_scalar->bytes, 32);
- p_str[32] = 0;
-
- // First window
- unsigned index = 0;
- unsigned wvalue = calc_first_wvalue(&index, p_str);
-
- const PRECOMP256_ROW *const precomputed_table =
- (const PRECOMP256_ROW *)ecp_nistz256_precomputed;
-
- // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
- // is infinity and |ONE| otherwise. |p| was computed from the table, so it
- // is infinity iff |wvalue >> 1| is zero.
- if ((wvalue >> 1) != 0) {
- OPENSSL_memcpy(&p.a, &precomputed_table[0][(wvalue >> 1) - 1], sizeof(p.a));
- OPENSSL_memcpy(&p.p.Z, ONE, sizeof(p.p.Z));
- } else {
- OPENSSL_memset(&p.a, 0, sizeof(p.a));
- OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z));
- }
-
- if ((wvalue & 1) == 1) {
- ecp_nistz256_neg(p.p.Y, p.p.Y);
- }
-
- for (int i = 1; i < 37; i++) {
- wvalue = calc_wvalue(&index, p_str);
-
- if ((wvalue >> 1) == 0) {
- continue;
+ const int p_is_infinity = g_scalar == NULL;
+ if (p_scalar != NULL) {
+ P256_POINT *out = &t.p;
+ if (p_is_infinity) {
+ out = &p.p;
}
- OPENSSL_memcpy(&t.a, &precomputed_table[i][(wvalue >> 1) - 1], sizeof(p.a));
-
- if ((wvalue & 1) == 1) {
- ecp_nistz256_neg(t.a.Y, t.a.Y);
+ ecp_nistz256_windowed_mul(group, out, p_, p_scalar);
+ if (!p_is_infinity) {
+ ecp_nistz256_point_add(&p.p, &p.p, out);
}
-
- ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
}
- mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p);
+ assert(group->field.width == P256_LIMBS);
+ OPENSSL_memcpy(r->X.words, p.p.X, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Y.words, p.p.Y, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Z.words, p.p.Z, P256_LIMBS * sizeof(BN_ULONG));
}
static int ecp_nistz256_get_affine(const EC_GROUP *group,
@@ -593,68 +513,6 @@
}
}
-static int ecp_nistz256_mont_inv_mod_ord_vartime(const EC_GROUP *group,
- EC_SCALAR *out,
- const EC_SCALAR *in) {
- if (!beeu_mod_inverse_vartime(out->words, in->words, P256_ORDER)) {
- return 0;
- }
-
- // The result should be returned in the Montgomery domain.
- ec_scalar_to_montgomery(group, out, out);
- return 1;
-}
-
-static int ecp_nistz256_cmp_x_coordinate(const EC_GROUP *group,
- const EC_POINT *p, const BIGNUM *r,
- BN_CTX *ctx) {
- if (ec_GFp_simple_is_at_infinity(group, &p->raw)) {
- OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
- return 0;
- }
-
- BN_ULONG r_words[P256_LIMBS];
- if (!bn_copy_words(r_words, P256_LIMBS, r)) {
- return 0;
- }
-
- // We wish to compare X/Z^2 with r. This is equivalent to comparing X with
- // r*Z^2. Note that X and Z are represented in Montgomery form, while r is
- // not.
- BN_ULONG r_Z2[P256_LIMBS], Z2_mont[P256_LIMBS], X[P256_LIMBS];
- ecp_nistz256_mul_mont(Z2_mont, p->raw.Z.words, p->raw.Z.words);
- ecp_nistz256_mul_mont(r_Z2, r_words, Z2_mont);
- ecp_nistz256_from_mont(X, p->raw.X.words);
-
- if (OPENSSL_memcmp(r_Z2, X, sizeof(r_Z2)) == 0) {
- return 1;
- }
-
- // During signing the x coefficient is reduced modulo the group order.
- // Therefore there is a small possibility, less than 1/2^128, that group_order
- // < p.x < P. in that case we need not only to compare against |r| but also to
- // compare against r+group_order.
-
- // P_MINUS_ORDER is the difference between the field order (p) and the group
- // order (N). This value is not in the Montgomery domain.
- static const BN_ULONG P_MINUS_ORDER[P256_LIMBS] = {
- TOBN(0x0c46353d, 0x039cdaae), TOBN(0x43190553, 0x58e8617b),
- TOBN(0x00000000, 0x00000000), TOBN(0x00000000, 0x00000000)};
-
- if (bn_less_than_words(r_words, P_MINUS_ORDER, P256_LIMBS)) {
- // We can add in-place, ignoring the carry, because: r + group_order < p <
- // 2^256
- bn_add_words(r_words, r_words, P256_ORDER, P256_LIMBS);
- ecp_nistz256_mul_mont(r_Z2, r_words, Z2_mont);
- if (OPENSSL_memcmp(r_Z2, X, sizeof(r_Z2)) == 0) {
- return 1;
- }
- }
-
- OPENSSL_PUT_ERROR(ECDSA, ECDSA_R_BAD_SIGNATURE);
- return 0;
-}
-
DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistz256_method) {
out->group_init = ec_GFp_mont_group_init;
out->group_finish = ec_GFp_mont_group_finish;
@@ -663,14 +521,12 @@
out->add = ecp_nistz256_add;
out->dbl = ecp_nistz256_dbl;
out->mul = ecp_nistz256_points_mul;
- out->mul_public = ecp_nistz256_points_mul_public;
+ out->mul_public = ecp_nistz256_points_mul;
out->felem_mul = ec_GFp_mont_felem_mul;
out->felem_sqr = ec_GFp_mont_felem_sqr;
out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
out->scalar_inv_montgomery = ecp_nistz256_inv_mod_ord;
- out->scalar_inv_montgomery_vartime = ecp_nistz256_mont_inv_mod_ord_vartime;
- out->cmp_x_coordinate = ecp_nistz256_cmp_x_coordinate;
};
#endif /* !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
diff --git a/crypto/fipsmodule/ec/p256-x86_64.h b/crypto/fipsmodule/ec/p256-x86_64.h
index 9de3240..21b461c 100644
--- a/crypto/fipsmodule/ec/p256-x86_64.h
+++ b/crypto/fipsmodule/ec/p256-x86_64.h
@@ -61,16 +61,6 @@
ecp_nistz256_mul_mont(res, in, ONE);
}
-// ecp_nistz256_to_mont sets |res| to |in|, converted to Montgomery domain
-// by multiplying with RR = 2^512 mod P precomputed for NIST P256 curve.
-static inline void ecp_nistz256_to_mont(BN_ULONG res[P256_LIMBS],
- const BN_ULONG in[P256_LIMBS]) {
- static const BN_ULONG RR[P256_LIMBS] = {
- TOBN(0x00000000, 0x00000003), TOBN(0xfffffffb, 0xffffffff),
- TOBN(0xffffffff, 0xfffffffe), TOBN(0x00000004, 0xfffffffd)};
- ecp_nistz256_mul_mont(res, in, RR);
-}
-
// P-256 scalar operations.
//
@@ -89,12 +79,6 @@
void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS],
const BN_ULONG a[P256_LIMBS], int rep);
-// beeu_mod_inverse_vartime sets out = a^-1 mod p using a Euclidean algorithm.
-// Assumption: 0 < a < p < 2^(256) and p is odd.
-int beeu_mod_inverse_vartime(BN_ULONG out[P256_LIMBS],
- const BN_ULONG a[P256_LIMBS],
- const BN_ULONG p[P256_LIMBS]);
-
// P-256 point operations.
//
diff --git a/crypto/fipsmodule/ec/p256-x86_64_test.cc b/crypto/fipsmodule/ec/p256-x86_64_test.cc
index 68749e2..8ed1dd4 100644
--- a/crypto/fipsmodule/ec/p256-x86_64_test.cc
+++ b/crypto/fipsmodule/ec/p256-x86_64_test.cc
@@ -24,11 +24,8 @@
#include <gtest/gtest.h>
#include <openssl/bn.h>
-#include <openssl/ec.h>
#include <openssl/mem.h>
-#include <openssl/nid.h>
-#include "internal.h"
#include "../bn/internal.h"
#include "../../internal.h"
#include "../../test/file_test.h"
@@ -90,59 +87,6 @@
}
}
-TEST(P256_X86_64Test, BEEU) {
- bssl::UniquePtr<EC_GROUP> group(
- EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1));
- ASSERT_TRUE(group);
-
- BN_ULONG order_words[P256_LIMBS];
- ASSERT_TRUE(
- bn_copy_words(order_words, P256_LIMBS, EC_GROUP_get0_order(group.get())));
-
- BN_ULONG in[P256_LIMBS], out[P256_LIMBS];
- EC_SCALAR in_scalar, out_scalar, result;
- OPENSSL_memset(in, 0, sizeof(in));
-
- // Trying to find the inverse of zero should fail.
- ASSERT_FALSE(beeu_mod_inverse_vartime(out, in, order_words));
-
- // kOneMont is 1, in Montgomery form.
- static const BN_ULONG kOneMont[P256_LIMBS] = {
- TOBN(0xc46353d, 0x039cdaaf), TOBN(0x43190552, 0x58e8617b),
- 0, 0xffffffff,
- };
-
- for (BN_ULONG i = 1; i < 2000; i++) {
- SCOPED_TRACE(i);
-
- in[0] = i;
- if (i >= 1000) {
- in[1] = i << 8;
- in[2] = i << 32;
- in[3] = i << 48;
- } else {
- in[1] = in[2] = in[3] = 0;
- }
-
- EXPECT_TRUE(bn_less_than_words(in, order_words, P256_LIMBS));
- ASSERT_TRUE(beeu_mod_inverse_vartime(out, in, order_words));
- EXPECT_TRUE(bn_less_than_words(out, order_words, P256_LIMBS));
-
- // Calculate out*in and confirm that it equals one, modulo the order.
- OPENSSL_memcpy(in_scalar.bytes, in, sizeof(in));
- OPENSSL_memcpy(out_scalar.bytes, out, sizeof(out));
- ec_scalar_to_montgomery(group.get(), &in_scalar, &in_scalar);
- ec_scalar_to_montgomery(group.get(), &out_scalar, &out_scalar);
- ec_scalar_mul_montgomery(group.get(), &result, &in_scalar, &out_scalar);
-
- EXPECT_EQ(0, OPENSSL_memcmp(kOneMont, &result, sizeof(kOneMont)));
-
- // Invert the result and expect to get back to the original value.
- ASSERT_TRUE(beeu_mod_inverse_vartime(out, out, order_words));
- EXPECT_EQ(0, OPENSSL_memcmp(in, out, sizeof(in)));
- }
-}
-
static bool GetFieldElement(FileTest *t, BN_ULONG out[P256_LIMBS],
const char *name) {
std::vector<uint8_t> bytes;
diff --git a/crypto/fipsmodule/ec/scalar.c b/crypto/fipsmodule/ec/scalar.c
index 88678a9..1bd6b02 100644
--- a/crypto/fipsmodule/ec/scalar.c
+++ b/crypto/fipsmodule/ec/scalar.c
@@ -74,8 +74,3 @@
const EC_SCALAR *a) {
group->meth->scalar_inv_montgomery(group, r, a);
}
-
-int ec_scalar_inv_montgomery_vartime(const EC_GROUP *group, EC_SCALAR *r,
- const EC_SCALAR *a) {
- return group->meth->scalar_inv_montgomery_vartime(group, r, a);
-}
diff --git a/crypto/fipsmodule/ec/simple.c b/crypto/fipsmodule/ec/simple.c
index 854da1e..5c63711 100644
--- a/crypto/fipsmodule/ec/simple.c
+++ b/crypto/fipsmodule/ec/simple.c
@@ -570,55 +570,3 @@
// The points are equal.
return 0;
}
-
-int ec_GFp_simple_mont_inv_mod_ord_vartime(const EC_GROUP *group,
- EC_SCALAR *out,
- const EC_SCALAR *in) {
- // This implementation (in fact) runs in constant time,
- // even though for this interface it is not mandatory.
-
- // out = in^-1 in the Montgomery domain. This is
- // |ec_scalar_to_montgomery| followed by |ec_scalar_inv_montgomery|, but
- // |ec_scalar_inv_montgomery| followed by |ec_scalar_from_montgomery| is
- // equivalent and slightly more efficient.
- ec_scalar_inv_montgomery(group, out, in);
- ec_scalar_from_montgomery(group, out, out);
- return 1;
-}
-
-// Compares the x (affine) coordinate of the point p with x.
-// Return 1 on success 0 otherwise
-// Assumption: the caller starts the BN_CTX.
-int ec_GFp_simple_cmp_x_coordinate(const EC_GROUP *group, const EC_POINT *p,
- const BIGNUM *r, BN_CTX *ctx) {
- int ret = 0;
- BN_CTX_start(ctx);
-
- BIGNUM *X = BN_CTX_get(ctx);
- if (X == NULL) {
- OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
- goto out;
- }
-
- if (!EC_POINT_get_affine_coordinates_GFp(group, p, X, NULL, ctx)) {
- OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
- goto out;
- }
-
- if (!ec_field_element_to_scalar(group, X)) {
- OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
- goto out;
- }
-
- // The signature is correct iff |X| is equal to |r|.
- if (BN_ucmp(X, r) != 0) {
- OPENSSL_PUT_ERROR(ECDSA, ECDSA_R_BAD_SIGNATURE);
- goto out;
- }
-
- ret = 1;
-
-out:
- BN_CTX_end(ctx);
- return ret;
-}
diff --git a/crypto/fipsmodule/ecdsa/ecdsa.c b/crypto/fipsmodule/ecdsa/ecdsa.c
index 7ec4565..f3ce214 100644
--- a/crypto/fipsmodule/ecdsa/ecdsa.c
+++ b/crypto/fipsmodule/ecdsa/ecdsa.c
@@ -98,6 +98,40 @@
order->width);
}
+// field_element_to_scalar reduces |r| modulo |group->order|. |r| must
+// previously have been reduced modulo |group->field|.
+static int field_element_to_scalar(const EC_GROUP *group, BIGNUM *r) {
+ // We must have p < 2×order, assuming p is not tiny (p >= 17). Thus rather we
+ // can reduce by performing at most one subtraction.
+ //
+ // Proof: We only work with prime order curves, so the number of points on
+ // the curve is the order. Thus Hasse's theorem gives:
+ //
+ // |order - (p + 1)| <= 2×sqrt(p)
+ // p + 1 - order <= 2×sqrt(p)
+ // p + 1 - 2×sqrt(p) <= order
+ // p + 1 - 2×(p/4) < order (p/4 > sqrt(p) for p >= 17)
+ // p/2 < p/2 + 1 < order
+ // p < 2×order
+ //
+ // Additionally, one can manually check this property for built-in curves. It
+ // is enforced for legacy custom curves in |EC_GROUP_set_generator|.
+ //
+ // TODO(davidben): Introduce |EC_FIELD_ELEMENT|, make this a function from
+ // |EC_FIELD_ELEMENT| to |EC_SCALAR|, and cut out the |BIGNUM|. Does this need
+ // to be constant-time for signing? |r| is the x-coordinate for kG, which is
+ // public unless k was rerolled because |s| was zero.
+ assert(!BN_is_negative(r));
+ assert(BN_cmp(r, &group->field) < 0);
+ if (BN_cmp(r, &group->order) >= 0 &&
+ !BN_sub(r, r, &group->order)) {
+ return 0;
+ }
+ assert(!BN_is_negative(r));
+ assert(BN_cmp(r, &group->order) < 0);
+ return 1;
+}
+
ECDSA_SIG *ECDSA_SIG_new(void) {
ECDSA_SIG *sig = OPENSSL_malloc(sizeof(ECDSA_SIG));
if (sig == NULL) {
@@ -159,6 +193,12 @@
}
int ret = 0;
EC_POINT *point = NULL;
+ BN_CTX_start(ctx);
+ BIGNUM *X = BN_CTX_get(ctx);
+ if (X == NULL) {
+ OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
+ goto err;
+ }
EC_SCALAR r, s, u1, u2, s_inv_mont, m;
if (BN_is_zero(sig->r) ||
@@ -170,7 +210,11 @@
}
// s_inv_mont = s^-1 in the Montgomery domain. This is
- ec_scalar_inv_montgomery_vartime(group, &s_inv_mont, &s);
+ // |ec_scalar_to_montgomery| followed by |ec_scalar_inv_montgomery|, but
+ // |ec_scalar_inv_montgomery| followed by |ec_scalar_from_montgomery| is
+ // equivalent and slightly more efficient.
+ ec_scalar_inv_montgomery(group, &s_inv_mont, &s);
+ ec_scalar_from_montgomery(group, &s_inv_mont, &s_inv_mont);
// u1 = m * s^-1 mod order
// u2 = r * s^-1 mod order
@@ -190,10 +234,24 @@
OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
goto err;
}
+ if (!EC_POINT_get_affine_coordinates_GFp(group, point, X, NULL, ctx)) {
+ OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
+ goto err;
+ }
+ if (!field_element_to_scalar(group, X)) {
+ OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
+ goto err;
+ }
+ // The signature is correct iff |X| is equal to |sig->r|.
+ if (BN_ucmp(X, sig->r) != 0) {
+ OPENSSL_PUT_ERROR(ECDSA, ECDSA_R_BAD_SIGNATURE);
+ goto err;
+ }
- ret = ec_cmp_x_coordinate(group, point, sig->r, ctx);
+ ret = 1;
err:
+ BN_CTX_end(ctx);
BN_CTX_free(ctx);
EC_POINT_free(point);
return ret;
@@ -262,7 +320,7 @@
goto err;
}
- if (!ec_field_element_to_scalar(group, r)) {
+ if (!field_element_to_scalar(group, r)) {
goto err;
}
} while (BN_is_zero(r));
diff --git a/third_party/fiat/p256.c b/third_party/fiat/p256.c
index b6f3f49..2d40ea0 100644
--- a/third_party/fiat/p256.c
+++ b/third_party/fiat/p256.c
@@ -1836,8 +1836,6 @@
out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
- out->scalar_inv_montgomery_vartime = ec_GFp_simple_mont_inv_mod_ord_vartime;
- out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate;
};
#undef BORINGSSL_NISTP256_64BIT