Revert "Revert "Speed up ECDSA verify on x86-64.""
This reverts commit e907ed4c4b92cd7cea250240e32695c73dacf7eb. CPUID
checks have been added so hopefully this time sticks.
Change-Id: I5e0e5b87427c1230132681f936b3c70bac8263b8
Reviewed-on: https://boringssl-review.googlesource.com/c/32924
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/crypto/fipsmodule/CMakeLists.txt b/crypto/fipsmodule/CMakeLists.txt
index e6c8cc6..463febb 100644
--- a/crypto/fipsmodule/CMakeLists.txt
+++ b/crypto/fipsmodule/CMakeLists.txt
@@ -11,6 +11,7 @@
ghash-x86_64.${ASM_EXT}
md5-x86_64.${ASM_EXT}
p256-x86_64-asm.${ASM_EXT}
+ p256_beeu-x86_64-asm.${ASM_EXT}
rdrand-x86_64.${ASM_EXT}
rsaz-avx2.${ASM_EXT}
sha1-x86_64.${ASM_EXT}
@@ -100,6 +101,7 @@
perlasm(md5-586.${ASM_EXT} md5/asm/md5-586.pl)
perlasm(md5-x86_64.${ASM_EXT} md5/asm/md5-x86_64.pl)
perlasm(p256-x86_64-asm.${ASM_EXT} ec/asm/p256-x86_64-asm.pl)
+perlasm(p256_beeu-x86_64-asm.${ASM_EXT} ec/asm/p256_beeu-x86_64-asm.pl)
perlasm(rdrand-x86_64.${ASM_EXT} rand/asm/rdrand-x86_64.pl)
perlasm(rsaz-avx2.${ASM_EXT} bn/asm/rsaz-avx2.pl)
perlasm(sha1-586.${ASM_EXT} sha/asm/sha1-586.pl)
diff --git a/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl b/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
new file mode 100644
index 0000000..12b9f5a
--- /dev/null
+++ b/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
@@ -0,0 +1,405 @@
+# Copyright (c) 2018, Amazon Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+#
+# Written by Nir Drucker, and Shay Gueron
+# AWS Cryptographic Algorithms Group
+# (ndrucker@amazon.com, gueron@amazon.com)
+# based on BN_mod_inverse_odd
+
+$flavour = shift;
+$output = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT=*OUT;
+
+#############################################################################
+# extern int beeu_mod_inverse_vartime(BN_ULONG out[P256_LIMBS],
+# BN_ULONG a[P256_LIMBS],
+# BN_ULONG n[P256_LIMBS]);
+#
+# (Binary Extended Euclidean Algorithm.
+# See https://en.wikipedia.org/wiki/Binary_GCD_algorithm)
+#
+# Assumption 1: n is odd for the BEEU
+# Assumption 2: 1 < a < n < 2^256
+
+$out = "%rdi";
+$a = "%rsi";
+$n = "%rdx";
+
+# X/Y will hold the inverse parameter
+# Assumption: X,Y<2^(256)
+$x0 = "%r8";
+$x1 = "%r9";
+$x2 = "%r10";
+$x3 = "%r11";
+# borrow from out (out is needed only at the end)
+$x4 = "%rdi";
+$y0 = "%r12";
+$y1 = "%r13";
+$y2 = "%r14";
+$y3 = "%r15";
+$y4 = "%rbp";
+$shift = "%rcx";
+$t0 = "%rax";
+$t1 = "%rbx";
+$t2 = "%rsi";
+# borrow
+$t3 = "%rcx";
+
+$T0 = "%xmm0";
+$T1 = "%xmm1";
+
+# Offsets on the stack
+$out_rsp = 0;
+$shift_rsp = $out_rsp+0x8;
+$a_rsp0 = $shift_rsp+0x8;
+$a_rsp1 = $a_rsp0+0x8;
+$a_rsp2 = $a_rsp1+0x8;
+$a_rsp3 = $a_rsp2+0x8;
+$b_rsp0 = $a_rsp3+0x8;
+$b_rsp1 = $b_rsp0+0x8;
+$b_rsp2 = $b_rsp1+0x8;
+$b_rsp3 = $b_rsp2+0x8;
+
+# Borrow when a_rsp/b_rsp are no longer needed.
+$y_rsp0 = $a_rsp0;
+$y_rsp1 = $y_rsp0+0x8;
+$y_rsp2 = $y_rsp1+0x8;
+$y_rsp3 = $y_rsp2+0x8;
+$y_rsp4 = $y_rsp3+0x8;
+$last_rsp_offset = $b_rsp3+0x8;
+
+sub TEST_B_ZERO {
+ return <<___;
+ xorq $t1, $t1
+ or $b_rsp0(%rsp), $t1
+ or $b_rsp1(%rsp), $t1
+ or $b_rsp2(%rsp), $t1
+ or $b_rsp3(%rsp), $t1
+ jz .Lbeeu_loop_end
+___
+}
+
+$g_next_label = 0;
+
+sub SHIFT1 {
+ my ($var0, $var1, $var2, $var3, $var4) = @_;
+ my $label = ".Lshift1_${g_next_label}";
+ $g_next_label++;
+
+ return <<___;
+ # Ensure X is even and divide by two.
+ movq \$1, $t1
+ andq $var0, $t1
+ jz $label
+ add 0*8($n), $var0
+ adc 1*8($n), $var1
+ adc 2*8($n), $var2
+ adc 3*8($n), $var3
+ adc \$0, $var4
+
+$label:
+ shrdq \$1, $var1, $var0
+ shrdq \$1, $var2, $var1
+ shrdq \$1, $var3, $var2
+ shrdq \$1, $var4, $var3
+ shrq \$1, $var4
+___
+}
+
+sub SHIFT256 {
+ my ($var) = @_;
+ return <<___;
+ # Copy shifted values.
+ # Remember not to override t3=rcx
+ movq 1*8+$var(%rsp), $t0
+ movq 2*8+$var(%rsp), $t1
+ movq 3*8+$var(%rsp), $t2
+
+ shrdq %cl, $t0, 0*8+$var(%rsp)
+ shrdq %cl, $t1, 1*8+$var(%rsp)
+ shrdq %cl, $t2, 2*8+$var(%rsp)
+
+ shrq %cl, $t2
+ mov $t2, 3*8+$var(%rsp)
+___
+}
+
+$code.=<<___;
+.text
+
+.type beeu_mod_inverse_vartime,\@function
+.hidden beeu_mod_inverse_vartime
+.globl beeu_mod_inverse_vartime
+.align 32
+beeu_mod_inverse_vartime:
+.cfi_startproc
+ push %rbp
+.cfi_push rbp
+ movq %rsp, %rbp
+.cfi_def_cfa_register rbp
+
+ push %r12
+.cfi_push r12
+ push %r13
+.cfi_push r13
+ push %r14
+.cfi_push r14
+ push %r15
+.cfi_push r15
+ push %rbx
+.cfi_push rbx
+ push %rsi
+.cfi_push rsi
+
+ sub \$$last_rsp_offset, %rsp
+ movq $out, $out_rsp(%rsp)
+
+ # X=1, Y=0
+ movq \$1, $x0
+ xorq $x1, $x1
+ xorq $x2, $x2
+ xorq $x3, $x3
+ xorq $x4, $x4
+
+ xorq $y0, $y0
+ xorq $y1, $y1
+ xorq $y2, $y2
+ xorq $y3, $y3
+ xorq $y4, $y4
+
+ # Copy a/n into B/A on the stack.
+ vmovdqu 0*8($a), $T0
+ vmovdqu 2*8($a), $T1
+ vmovdqu $T0, $b_rsp0(%rsp)
+ vmovdqu $T1, $b_rsp2(%rsp)
+
+ vmovdqu 0*8($n), $T0
+ vmovdqu 2*8($n), $T1
+ vmovdqu $T0, $a_rsp0(%rsp)
+ vmovdqu $T1, $a_rsp2(%rsp)
+
+.Lbeeu_loop:
+ ${\TEST_B_ZERO}
+
+ # 0 < B < |n|,
+ # 0 < A <= |n|,
+ # (1) X*a == B (mod |n|),
+ # (2) (-1)*Y*a == A (mod |n|)
+
+ # Now divide B by the maximum possible power of two in the
+ # integers, and divide X by the same value mod |n|. When we're
+ # done, (1) still holds.
+ movq \$1, $shift
+
+ # Note that B > 0
+.Lbeeu_shift_loop_XB:
+ movq $shift, $t1
+ andq $b_rsp0(%rsp), $t1
+ jnz .Lbeeu_shift_loop_end_XB
+
+ ${\SHIFT1($x0, $x1, $x2, $x3, $x4)}
+ shl \$1, $shift
+
+ # Test wraparound of the shift parameter. The probability to have 32 zeroes
+ # in a row is small Therefore having the value below equal \$0x8000000 or
+ # \$0x8000 does not affect the performance. We choose 0x8000000 because it
+ # is the maximal immediate value possible.
+ cmp \$0x8000000, $shift
+ jne .Lbeeu_shift_loop_XB
+
+.Lbeeu_shift_loop_end_XB:
+ bsf $shift, $shift
+ test $shift, $shift
+ jz .Lbeeu_no_shift_XB
+
+ ${\SHIFT256($b_rsp0)}
+
+.Lbeeu_no_shift_XB:
+ # Same for A and Y. Afterwards, (2) still holds.
+ movq \$1, $shift
+
+ # Note that A > 0
+.Lbeeu_shift_loop_YA:
+ movq $shift, $t1
+ andq $a_rsp0(%rsp), $t1
+ jnz .Lbeeu_shift_loop_end_YA
+
+ ${\SHIFT1($y0, $y1, $y2, $y3, $y4)}
+ shl \$1, $shift
+
+ # Test wraparound of the shift parameter. The probability to have 32 zeroes
+ # in a row is small therefore having the value below equal \$0x8000000 or
+ # \$0x8000 Does not affect the performance. We choose 0x8000000 because it
+ # is the maximal immediate value possible.
+ cmp \$0x8000000, $shift
+ jne .Lbeeu_shift_loop_YA
+
+.Lbeeu_shift_loop_end_YA:
+ bsf $shift, $shift
+ test $shift, $shift
+ jz .Lbeeu_no_shift_YA
+
+ ${\SHIFT256($a_rsp0)}
+
+.Lbeeu_no_shift_YA:
+ # T = B-A (A,B < 2^256)
+ mov $b_rsp0(%rsp), $t0
+ mov $b_rsp1(%rsp), $t1
+ mov $b_rsp2(%rsp), $t2
+ mov $b_rsp3(%rsp), $t3
+ sub $a_rsp0(%rsp), $t0
+ sbb $a_rsp1(%rsp), $t1
+ sbb $a_rsp2(%rsp), $t2
+ sbb $a_rsp3(%rsp), $t3 # borrow from shift
+ jnc .Lbeeu_B_bigger_than_A
+
+ # A = A - B
+ mov $a_rsp0(%rsp), $t0
+ mov $a_rsp1(%rsp), $t1
+ mov $a_rsp2(%rsp), $t2
+ mov $a_rsp3(%rsp), $t3
+ sub $b_rsp0(%rsp), $t0
+ sbb $b_rsp1(%rsp), $t1
+ sbb $b_rsp2(%rsp), $t2
+ sbb $b_rsp3(%rsp), $t3
+ mov $t0, $a_rsp0(%rsp)
+ mov $t1, $a_rsp1(%rsp)
+ mov $t2, $a_rsp2(%rsp)
+ mov $t3, $a_rsp3(%rsp)
+
+ # Y = Y + X
+ add $x0, $y0
+ adc $x1, $y1
+ adc $x2, $y2
+ adc $x3, $y3
+ adc $x4, $y4
+ jmp .Lbeeu_loop
+
+.Lbeeu_B_bigger_than_A:
+ # B = T = B - A
+ mov $t0, $b_rsp0(%rsp)
+ mov $t1, $b_rsp1(%rsp)
+ mov $t2, $b_rsp2(%rsp)
+ mov $t3, $b_rsp3(%rsp)
+
+ # X = Y + X
+ add $y0, $x0
+ adc $y1, $x1
+ adc $y2, $x2
+ adc $y3, $x3
+ adc $y4, $x4
+
+ jmp .Lbeeu_loop
+
+.Lbeeu_loop_end:
+ # The Euclid's algorithm loop ends when A == beeu(a,n);
+ # Therefore (-1)*Y*a == A (mod |n|), Y>0
+
+ # Verify that A = 1 ==> (-1)*Y*a = A = 1 (mod |n|)
+ mov $a_rsp0(%rsp), $t1
+ sub \$1, $t1
+ or $a_rsp1(%rsp), $t1
+ or $a_rsp2(%rsp), $t1
+ or $a_rsp3(%rsp), $t1
+ # If not, fail.
+ jnz .Lbeeu_err
+
+ # From this point on, we no longer need X
+ # Therefore we use it as a temporary storage.
+ # X = n
+ movq 0*8($n), $x0
+ movq 1*8($n), $x1
+ movq 2*8($n), $x2
+ movq 3*8($n), $x3
+ xorq $x4, $x4
+
+.Lbeeu_reduction_loop:
+ movq $y0, $y_rsp0(%rsp)
+ movq $y1, $y_rsp1(%rsp)
+ movq $y2, $y_rsp2(%rsp)
+ movq $y3, $y_rsp3(%rsp)
+ movq $y4, $y_rsp4(%rsp)
+
+ # If Y>n ==> Y=Y-n
+ sub $x0, $y0
+ sbb $x1, $y1
+ sbb $x2, $y2
+ sbb $x3, $y3
+ sbb \$0, $y4
+
+ # Choose old Y or new Y
+ cmovc $y_rsp0(%rsp), $y0
+ cmovc $y_rsp1(%rsp), $y1
+ cmovc $y_rsp2(%rsp), $y2
+ cmovc $y_rsp3(%rsp), $y3
+ jnc .Lbeeu_reduction_loop
+
+ # X = n - Y (n, Y < 2^256), (Cancel the (-1))
+ sub $y0, $x0
+ sbb $y1, $x1
+ sbb $y2, $x2
+ sbb $y3, $x3
+
+.Lbeeu_save:
+ # Save the inverse(<2^256) to out.
+ mov $out_rsp(%rsp), $out
+
+ movq $x0, 0*8($out)
+ movq $x1, 1*8($out)
+ movq $x2, 2*8($out)
+ movq $x3, 3*8($out)
+
+ # Return 1.
+ movq \$1, %rax
+ jmp .Lbeeu_finish
+
+.Lbeeu_err:
+ # Return 0.
+ xorq %rax, %rax
+
+.Lbeeu_finish:
+ add \$$last_rsp_offset, %rsp
+ pop %rsi
+.cfi_pop rsi
+ pop %rbx
+.cfi_pop rbx
+ pop %r15
+.cfi_pop r15
+ pop %r14
+.cfi_pop r14
+ pop %r13
+.cfi_pop r13
+ pop %r12
+.cfi_pop r12
+ pop %rbp
+.cfi_pop rbp
+.cfi_def_cfa rsp, 8
+.cfi_endproc
+ ret
+
+.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
+___
+
+print $code;
+close STDOUT;
diff --git a/crypto/fipsmodule/ec/ec.c b/crypto/fipsmodule/ec/ec.c
index 134d051..4c557a1 100644
--- a/crypto/fipsmodule/ec/ec.c
+++ b/crypto/fipsmodule/ec/ec.c
@@ -911,6 +911,43 @@
return 1;
}
+int ec_cmp_x_coordinate(const EC_GROUP *group, const EC_POINT *p,
+ const BIGNUM *r, BN_CTX *ctx) {
+ return group->meth->cmp_x_coordinate(group, p, r, ctx);
+}
+
+int ec_field_element_to_scalar(const EC_GROUP *group, BIGNUM *r) {
+ // We must have p < 2×order, assuming p is not tiny (p >= 17). Thus rather we
+ // can reduce by performing at most one subtraction.
+ //
+ // Proof: We only work with prime order curves, so the number of points on
+ // the curve is the order. Thus Hasse's theorem gives:
+ //
+ // |order - (p + 1)| <= 2×sqrt(p)
+ // p + 1 - order <= 2×sqrt(p)
+ // p + 1 - 2×sqrt(p) <= order
+ // p + 1 - 2×(p/4) < order (p/4 > sqrt(p) for p >= 17)
+ // p/2 < p/2 + 1 < order
+ // p < 2×order
+ //
+ // Additionally, one can manually check this property for built-in curves. It
+ // is enforced for legacy custom curves in |EC_GROUP_set_generator|.
+ //
+ // TODO(davidben): Introduce |EC_FIELD_ELEMENT|, make this a function from
+ // |EC_FIELD_ELEMENT| to |EC_SCALAR|, and cut out the |BIGNUM|. Does this need
+ // to be constant-time for signing? |r| is the x-coordinate for kG, which is
+ // public unless k was rerolled because |s| was zero.
+ assert(!BN_is_negative(r));
+ assert(BN_cmp(r, &group->field) < 0);
+ if (BN_cmp(r, &group->order) >= 0 &&
+ !BN_sub(r, r, &group->order)) {
+ return 0;
+ }
+ assert(!BN_is_negative(r));
+ assert(BN_cmp(r, &group->order) < 0);
+ return 1;
+}
+
void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag) {}
const EC_METHOD *EC_GROUP_method_of(const EC_GROUP *group) {
diff --git a/crypto/fipsmodule/ec/ec_montgomery.c b/crypto/fipsmodule/ec/ec_montgomery.c
index 8f277b0..90bd8ce 100644
--- a/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/crypto/fipsmodule/ec/ec_montgomery.c
@@ -439,4 +439,6 @@
out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
+ out->scalar_inv_montgomery_vartime = ec_GFp_simple_mont_inv_mod_ord_vartime;
+ out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate;
}
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
index e62ceb8..0588fb5 100644
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -167,11 +167,21 @@
int (*felem_to_bignum)(const EC_GROUP *group, BIGNUM *out,
const EC_FELEM *in);
- // scalar_inv_mont sets |out| to |in|^-1, where both input and output are in
- // Montgomery form.
+ // scalar_inv_montgomery sets |out| to |in|^-1, where both input and output
+ // are in Montgomery form.
void (*scalar_inv_montgomery)(const EC_GROUP *group, EC_SCALAR *out,
const EC_SCALAR *in);
+ // scalar_inv_montgomery_vartime performs the same computation as
+ // |scalar_inv_montgomery|. It further assumes that the inputs are public so
+ // there is no concern about leaking their values through timing.
+ int (*scalar_inv_montgomery_vartime)(const EC_GROUP *group, EC_SCALAR *out,
+ const EC_SCALAR *in);
+
+ // cmp_x_coordinate compares the x (affine) coordinate of |p|, mod the group
+ // order, with |r|. It returns one if they are equal and zero otherwise.
+ int (*cmp_x_coordinate)(const EC_GROUP *group, const EC_POINT *p,
+ const BIGNUM *r, BN_CTX *ctx);
} /* EC_METHOD */;
const EC_METHOD *EC_GFp_mont_method(void);
@@ -277,6 +287,11 @@
void ec_scalar_inv_montgomery(const EC_GROUP *group, EC_SCALAR *r,
const EC_SCALAR *a);
+// ec_scalar_inv_montgomery_vartime performs the same actions as
+// |ec_scalar_inv_montgomery|, but in variable time.
+int ec_scalar_inv_montgomery_vartime(const EC_GROUP *group, EC_SCALAR *r,
+ const EC_SCALAR *a);
+
// ec_point_mul_scalar sets |r| to generator * |g_scalar| + |p| *
// |p_scalar|. Unlike other functions which take |EC_SCALAR|, |g_scalar| and
// |p_scalar| need not be fully reduced. They need only contain as many bits as
@@ -292,6 +307,16 @@
const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar,
const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx);
+// ec_cmp_x_coordinate compares the x (affine) coordinate of |p| with |r|. It
+// returns one if they are equal and zero otherwise. The |ctx| must have been
+// started by the caller.
+int ec_cmp_x_coordinate(const EC_GROUP *group, const EC_POINT *p,
+ const BIGNUM *r, BN_CTX *ctx);
+
+// ec_field_element_to_scalar reduces |r| modulo |group->order|. |r| must
+// previously have been reduced modulo |group->field|.
+int ec_field_element_to_scalar(const EC_GROUP *group, BIGNUM *r);
+
void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
const EC_SCALAR *p_scalar);
@@ -336,6 +361,14 @@
void ec_simple_scalar_inv_montgomery(const EC_GROUP *group, EC_SCALAR *r,
const EC_SCALAR *a);
+int ec_GFp_simple_mont_inv_mod_ord_vartime(const EC_GROUP *group, EC_SCALAR *r,
+ const EC_SCALAR *a);
+
+// ec_GFp_simple_cmp_x_coordinate compares the x (affine) coordinate of |p|, mod
+// the group order, with |r|. It returns one on success or zero otherwise.
+int ec_GFp_simple_cmp_x_coordinate(const EC_GROUP *group, const EC_POINT *p,
+ const BIGNUM *r, BN_CTX *ctx);
+
// method functions in montgomery.c
int ec_GFp_mont_group_init(EC_GROUP *);
int ec_GFp_mont_group_set_curve(EC_GROUP *, const BIGNUM *p, const BIGNUM *a,
diff --git a/crypto/fipsmodule/ec/p224-64.c b/crypto/fipsmodule/ec/p224-64.c
index ebaca85..27bfc36 100644
--- a/crypto/fipsmodule/ec/p224-64.c
+++ b/crypto/fipsmodule/ec/p224-64.c
@@ -1136,6 +1136,8 @@
out->bignum_to_felem = ec_GFp_nistp224_bignum_to_felem;
out->felem_to_bignum = ec_GFp_nistp224_felem_to_bignum;
out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
+ out->scalar_inv_montgomery_vartime = ec_GFp_simple_mont_inv_mod_ord_vartime;
+ out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate;
};
#endif // BORINGSSL_HAS_UINT128 && !SMALL
diff --git a/crypto/fipsmodule/ec/p256-x86_64.c b/crypto/fipsmodule/ec/p256-x86_64.c
index 73d36b6..4008aa4 100644
--- a/crypto/fipsmodule/ec/p256-x86_64.c
+++ b/crypto/fipsmodule/ec/p256-x86_64.c
@@ -44,6 +44,12 @@
TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe),
};
+// P256_ORDER is the order of the P-256 group, not in Montgomery form.
+static const BN_ULONG P256_ORDER[P256_LIMBS] = {
+ TOBN(0xf3b9cac2, 0xfc632551), TOBN(0xbce6faad, 0xa7179e84),
+ TOBN(0xffffffff, 0xffffffff), TOBN(0xffffffff, 0x00000000),
+};
+
// Precomputed tables for the default generator
#include "p256-x86_64-table.h"
@@ -289,19 +295,63 @@
ecp_nistz256_point_add(r, r, &h);
}
+typedef union {
+ P256_POINT p;
+ P256_POINT_AFFINE a;
+} p256_point_union_t;
+
+static unsigned calc_first_wvalue(unsigned *index, const uint8_t p_str[33]) {
+ static const unsigned kWindowSize = 7;
+ static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
+ *index = kWindowSize;
+
+ unsigned wvalue = (p_str[0] << 1) & kMask;
+ return booth_recode_w7(wvalue);
+}
+
+static unsigned calc_wvalue(unsigned *index, const uint8_t p_str[33]) {
+ static const unsigned kWindowSize = 7;
+ static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
+
+ const unsigned off = (*index - 1) / 8;
+ unsigned wvalue = p_str[off] | p_str[off + 1] << 8;
+ wvalue = (wvalue >> ((*index - 1) % 8)) & kMask;
+ *index += kWindowSize;
+
+ return booth_recode_w7(wvalue);
+}
+
+static void mul_p_add_and_store(const EC_GROUP *group, EC_RAW_POINT *r,
+ const EC_SCALAR *g_scalar,
+ const EC_RAW_POINT *p_,
+ const EC_SCALAR *p_scalar,
+ p256_point_union_t *t, p256_point_union_t *p) {
+ const int p_is_infinity = g_scalar == NULL;
+ if (p_scalar != NULL) {
+ P256_POINT *out = &t->p;
+ if (p_is_infinity) {
+ out = &p->p;
+ }
+
+ ecp_nistz256_windowed_mul(group, out, p_, p_scalar);
+ if (!p_is_infinity) {
+ ecp_nistz256_point_add(&p->p, &p->p, out);
+ }
+ }
+
+ assert(group->field.width == P256_LIMBS);
+ OPENSSL_memcpy(r->X.words, p->p.X, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Y.words, p->p.Y, P256_LIMBS * sizeof(BN_ULONG));
+ OPENSSL_memcpy(r->Z.words, p->p.Z, P256_LIMBS * sizeof(BN_ULONG));
+}
+
static void ecp_nistz256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_SCALAR *g_scalar,
const EC_RAW_POINT *p_,
const EC_SCALAR *p_scalar) {
assert((p_ != NULL) == (p_scalar != NULL));
- static const unsigned kWindowSize = 7;
- static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
-
- alignas(32) union {
- P256_POINT p;
- P256_POINT_AFFINE a;
- } t, p;
+ alignas(32) p256_point_union_t t, p;
if (g_scalar != NULL) {
uint8_t p_str[33];
@@ -309,10 +359,8 @@
p_str[32] = 0;
// First window
- unsigned wvalue = (p_str[0] << 1) & kMask;
- unsigned index = kWindowSize;
-
- wvalue = booth_recode_w7(wvalue);
+ unsigned index = 0;
+ unsigned wvalue = calc_first_wvalue(&index, p_str);
const PRECOMP256_ROW *const precomputed_table =
(const PRECOMP256_ROW *)ecp_nistz256_precomputed;
@@ -328,12 +376,7 @@
copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1));
for (int i = 1; i < 37; i++) {
- unsigned off = (index - 1) / 8;
- wvalue = p_str[off] | p_str[off + 1] << 8;
- wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
- index += kWindowSize;
-
- wvalue = booth_recode_w7(wvalue);
+ wvalue = calc_wvalue(&index, p_str);
ecp_nistz256_select_w7(&t.a, precomputed_table[i], wvalue >> 1);
@@ -344,23 +387,60 @@
}
}
- const int p_is_infinity = g_scalar == NULL;
- if (p_scalar != NULL) {
- P256_POINT *out = &t.p;
- if (p_is_infinity) {
- out = &p.p;
- }
+ mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p);
+}
- ecp_nistz256_windowed_mul(group, out, p_, p_scalar);
- if (!p_is_infinity) {
- ecp_nistz256_point_add(&p.p, &p.p, out);
- }
+static void ecp_nistz256_points_mul_public(const EC_GROUP *group,
+ EC_RAW_POINT *r,
+ const EC_SCALAR *g_scalar,
+ const EC_RAW_POINT *p_,
+ const EC_SCALAR *p_scalar) {
+ assert(p_ != NULL && p_scalar != NULL && g_scalar != NULL);
+
+ alignas(32) p256_point_union_t t, p;
+ uint8_t p_str[33];
+ OPENSSL_memcpy(p_str, g_scalar->bytes, 32);
+ p_str[32] = 0;
+
+ // First window
+ unsigned index = 0;
+ unsigned wvalue = calc_first_wvalue(&index, p_str);
+
+ const PRECOMP256_ROW *const precomputed_table =
+ (const PRECOMP256_ROW *)ecp_nistz256_precomputed;
+
+ // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
+ // is infinity and |ONE| otherwise. |p| was computed from the table, so it
+ // is infinity iff |wvalue >> 1| is zero.
+ if ((wvalue >> 1) != 0) {
+ OPENSSL_memcpy(&p.a, &precomputed_table[0][(wvalue >> 1) - 1], sizeof(p.a));
+ OPENSSL_memcpy(&p.p.Z, ONE, sizeof(p.p.Z));
+ } else {
+ OPENSSL_memset(&p.a, 0, sizeof(p.a));
+ OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z));
}
- assert(group->field.width == P256_LIMBS);
- OPENSSL_memcpy(r->X.words, p.p.X, P256_LIMBS * sizeof(BN_ULONG));
- OPENSSL_memcpy(r->Y.words, p.p.Y, P256_LIMBS * sizeof(BN_ULONG));
- OPENSSL_memcpy(r->Z.words, p.p.Z, P256_LIMBS * sizeof(BN_ULONG));
+ if ((wvalue & 1) == 1) {
+ ecp_nistz256_neg(p.p.Y, p.p.Y);
+ }
+
+ for (int i = 1; i < 37; i++) {
+ wvalue = calc_wvalue(&index, p_str);
+
+ if ((wvalue >> 1) == 0) {
+ continue;
+ }
+
+ OPENSSL_memcpy(&t.a, &precomputed_table[i][(wvalue >> 1) - 1], sizeof(p.a));
+
+ if ((wvalue & 1) == 1) {
+ ecp_nistz256_neg(t.a.Y, t.a.Y);
+ }
+
+ ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
+ }
+
+ mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p);
}
static int ecp_nistz256_get_affine(const EC_GROUP *group,
@@ -513,6 +593,73 @@
}
}
+static int ecp_nistz256_mont_inv_mod_ord_vartime(const EC_GROUP *group,
+ EC_SCALAR *out,
+ const EC_SCALAR *in) {
+ if ((OPENSSL_ia32cap_P[1] & (1 << 28)) == 0) {
+ // No AVX support; fallback to generic code.
+ return ec_GFp_simple_mont_inv_mod_ord_vartime(group, out, in);
+ }
+
+ if (!beeu_mod_inverse_vartime(out->words, in->words, P256_ORDER)) {
+ return 0;
+ }
+
+ // The result should be returned in the Montgomery domain.
+ ec_scalar_to_montgomery(group, out, out);
+ return 1;
+}
+
+static int ecp_nistz256_cmp_x_coordinate(const EC_GROUP *group,
+ const EC_POINT *p, const BIGNUM *r,
+ BN_CTX *ctx) {
+ if (ec_GFp_simple_is_at_infinity(group, &p->raw)) {
+ OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
+ return 0;
+ }
+
+ BN_ULONG r_words[P256_LIMBS];
+ if (!bn_copy_words(r_words, P256_LIMBS, r)) {
+ return 0;
+ }
+
+ // We wish to compare X/Z^2 with r. This is equivalent to comparing X with
+ // r*Z^2. Note that X and Z are represented in Montgomery form, while r is
+ // not.
+ BN_ULONG r_Z2[P256_LIMBS], Z2_mont[P256_LIMBS], X[P256_LIMBS];
+ ecp_nistz256_mul_mont(Z2_mont, p->raw.Z.words, p->raw.Z.words);
+ ecp_nistz256_mul_mont(r_Z2, r_words, Z2_mont);
+ ecp_nistz256_from_mont(X, p->raw.X.words);
+
+ if (OPENSSL_memcmp(r_Z2, X, sizeof(r_Z2)) == 0) {
+ return 1;
+ }
+
+ // During signing the x coefficient is reduced modulo the group order.
+ // Therefore there is a small possibility, less than 1/2^128, that group_order
+ // < p.x < P. in that case we need not only to compare against |r| but also to
+ // compare against r+group_order.
+
+ // P_MINUS_ORDER is the difference between the field order (p) and the group
+ // order (N). This value is not in the Montgomery domain.
+ static const BN_ULONG P_MINUS_ORDER[P256_LIMBS] = {
+ TOBN(0x0c46353d, 0x039cdaae), TOBN(0x43190553, 0x58e8617b),
+ TOBN(0x00000000, 0x00000000), TOBN(0x00000000, 0x00000000)};
+
+ if (bn_less_than_words(r_words, P_MINUS_ORDER, P256_LIMBS)) {
+ // We can add in-place, ignoring the carry, because: r + group_order < p <
+ // 2^256
+ bn_add_words(r_words, r_words, P256_ORDER, P256_LIMBS);
+ ecp_nistz256_mul_mont(r_Z2, r_words, Z2_mont);
+ if (OPENSSL_memcmp(r_Z2, X, sizeof(r_Z2)) == 0) {
+ return 1;
+ }
+ }
+
+ OPENSSL_PUT_ERROR(ECDSA, ECDSA_R_BAD_SIGNATURE);
+ return 0;
+}
+
DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistz256_method) {
out->group_init = ec_GFp_mont_group_init;
out->group_finish = ec_GFp_mont_group_finish;
@@ -521,12 +668,14 @@
out->add = ecp_nistz256_add;
out->dbl = ecp_nistz256_dbl;
out->mul = ecp_nistz256_points_mul;
- out->mul_public = ecp_nistz256_points_mul;
+ out->mul_public = ecp_nistz256_points_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;
out->felem_sqr = ec_GFp_mont_felem_sqr;
out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
out->scalar_inv_montgomery = ecp_nistz256_inv_mod_ord;
+ out->scalar_inv_montgomery_vartime = ecp_nistz256_mont_inv_mod_ord_vartime;
+ out->cmp_x_coordinate = ecp_nistz256_cmp_x_coordinate;
};
#endif /* !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
diff --git a/crypto/fipsmodule/ec/p256-x86_64.h b/crypto/fipsmodule/ec/p256-x86_64.h
index 21b461c..9de3240 100644
--- a/crypto/fipsmodule/ec/p256-x86_64.h
+++ b/crypto/fipsmodule/ec/p256-x86_64.h
@@ -61,6 +61,16 @@
ecp_nistz256_mul_mont(res, in, ONE);
}
+// ecp_nistz256_to_mont sets |res| to |in|, converted to Montgomery domain
+// by multiplying with RR = 2^512 mod P precomputed for NIST P256 curve.
+static inline void ecp_nistz256_to_mont(BN_ULONG res[P256_LIMBS],
+ const BN_ULONG in[P256_LIMBS]) {
+ static const BN_ULONG RR[P256_LIMBS] = {
+ TOBN(0x00000000, 0x00000003), TOBN(0xfffffffb, 0xffffffff),
+ TOBN(0xffffffff, 0xfffffffe), TOBN(0x00000004, 0xfffffffd)};
+ ecp_nistz256_mul_mont(res, in, RR);
+}
+
// P-256 scalar operations.
//
@@ -79,6 +89,12 @@
void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS],
const BN_ULONG a[P256_LIMBS], int rep);
+// beeu_mod_inverse_vartime sets out = a^-1 mod p using a Euclidean algorithm.
+// Assumption: 0 < a < p < 2^(256) and p is odd.
+int beeu_mod_inverse_vartime(BN_ULONG out[P256_LIMBS],
+ const BN_ULONG a[P256_LIMBS],
+ const BN_ULONG p[P256_LIMBS]);
+
// P-256 point operations.
//
diff --git a/crypto/fipsmodule/ec/p256-x86_64_test.cc b/crypto/fipsmodule/ec/p256-x86_64_test.cc
index 8ed1dd4..ab93dfb 100644
--- a/crypto/fipsmodule/ec/p256-x86_64_test.cc
+++ b/crypto/fipsmodule/ec/p256-x86_64_test.cc
@@ -24,8 +24,12 @@
#include <gtest/gtest.h>
#include <openssl/bn.h>
+#include <openssl/cpu.h>
+#include <openssl/ec.h>
#include <openssl/mem.h>
+#include <openssl/nid.h>
+#include "internal.h"
#include "../bn/internal.h"
#include "../../internal.h"
#include "../../test/file_test.h"
@@ -87,6 +91,64 @@
}
}
+TEST(P256_X86_64Test, BEEU) {
+ if ((OPENSSL_ia32cap_P[1] & (1 << 28)) == 0) {
+ // No AVX support; cannot run the BEEU code.
+ return;
+ }
+
+ bssl::UniquePtr<EC_GROUP> group(
+ EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1));
+ ASSERT_TRUE(group);
+
+ BN_ULONG order_words[P256_LIMBS];
+ ASSERT_TRUE(
+ bn_copy_words(order_words, P256_LIMBS, EC_GROUP_get0_order(group.get())));
+
+ BN_ULONG in[P256_LIMBS], out[P256_LIMBS];
+ EC_SCALAR in_scalar, out_scalar, result;
+ OPENSSL_memset(in, 0, sizeof(in));
+
+ // Trying to find the inverse of zero should fail.
+ ASSERT_FALSE(beeu_mod_inverse_vartime(out, in, order_words));
+
+ // kOneMont is 1, in Montgomery form.
+ static const BN_ULONG kOneMont[P256_LIMBS] = {
+ TOBN(0xc46353d, 0x039cdaaf), TOBN(0x43190552, 0x58e8617b),
+ 0, 0xffffffff,
+ };
+
+ for (BN_ULONG i = 1; i < 2000; i++) {
+ SCOPED_TRACE(i);
+
+ in[0] = i;
+ if (i >= 1000) {
+ in[1] = i << 8;
+ in[2] = i << 32;
+ in[3] = i << 48;
+ } else {
+ in[1] = in[2] = in[3] = 0;
+ }
+
+ EXPECT_TRUE(bn_less_than_words(in, order_words, P256_LIMBS));
+ ASSERT_TRUE(beeu_mod_inverse_vartime(out, in, order_words));
+ EXPECT_TRUE(bn_less_than_words(out, order_words, P256_LIMBS));
+
+ // Calculate out*in and confirm that it equals one, modulo the order.
+ OPENSSL_memcpy(in_scalar.bytes, in, sizeof(in));
+ OPENSSL_memcpy(out_scalar.bytes, out, sizeof(out));
+ ec_scalar_to_montgomery(group.get(), &in_scalar, &in_scalar);
+ ec_scalar_to_montgomery(group.get(), &out_scalar, &out_scalar);
+ ec_scalar_mul_montgomery(group.get(), &result, &in_scalar, &out_scalar);
+
+ EXPECT_EQ(0, OPENSSL_memcmp(kOneMont, &result, sizeof(kOneMont)));
+
+ // Invert the result and expect to get back to the original value.
+ ASSERT_TRUE(beeu_mod_inverse_vartime(out, out, order_words));
+ EXPECT_EQ(0, OPENSSL_memcmp(in, out, sizeof(in)));
+ }
+}
+
static bool GetFieldElement(FileTest *t, BN_ULONG out[P256_LIMBS],
const char *name) {
std::vector<uint8_t> bytes;
diff --git a/crypto/fipsmodule/ec/scalar.c b/crypto/fipsmodule/ec/scalar.c
index 1bd6b02..88678a9 100644
--- a/crypto/fipsmodule/ec/scalar.c
+++ b/crypto/fipsmodule/ec/scalar.c
@@ -74,3 +74,8 @@
const EC_SCALAR *a) {
group->meth->scalar_inv_montgomery(group, r, a);
}
+
+int ec_scalar_inv_montgomery_vartime(const EC_GROUP *group, EC_SCALAR *r,
+ const EC_SCALAR *a) {
+ return group->meth->scalar_inv_montgomery_vartime(group, r, a);
+}
diff --git a/crypto/fipsmodule/ec/simple.c b/crypto/fipsmodule/ec/simple.c
index bf4aa4f..f875f22 100644
--- a/crypto/fipsmodule/ec/simple.c
+++ b/crypto/fipsmodule/ec/simple.c
@@ -354,3 +354,55 @@
// The points are equal.
return 0;
}
+
+int ec_GFp_simple_mont_inv_mod_ord_vartime(const EC_GROUP *group,
+ EC_SCALAR *out,
+ const EC_SCALAR *in) {
+ // This implementation (in fact) runs in constant time,
+ // even though for this interface it is not mandatory.
+
+ // out = in^-1 in the Montgomery domain. This is
+ // |ec_scalar_to_montgomery| followed by |ec_scalar_inv_montgomery|, but
+ // |ec_scalar_inv_montgomery| followed by |ec_scalar_from_montgomery| is
+ // equivalent and slightly more efficient.
+ ec_scalar_inv_montgomery(group, out, in);
+ ec_scalar_from_montgomery(group, out, out);
+ return 1;
+}
+
+// Compares the x (affine) coordinate of the point p with x.
+// Return 1 on success 0 otherwise
+// Assumption: the caller starts the BN_CTX.
+int ec_GFp_simple_cmp_x_coordinate(const EC_GROUP *group, const EC_POINT *p,
+ const BIGNUM *r, BN_CTX *ctx) {
+ int ret = 0;
+ BN_CTX_start(ctx);
+
+ BIGNUM *X = BN_CTX_get(ctx);
+ if (X == NULL) {
+ OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
+ goto out;
+ }
+
+ if (!EC_POINT_get_affine_coordinates_GFp(group, p, X, NULL, ctx)) {
+ OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
+ goto out;
+ }
+
+ if (!ec_field_element_to_scalar(group, X)) {
+ OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
+ goto out;
+ }
+
+ // The signature is correct iff |X| is equal to |r|.
+ if (BN_ucmp(X, r) != 0) {
+ OPENSSL_PUT_ERROR(ECDSA, ECDSA_R_BAD_SIGNATURE);
+ goto out;
+ }
+
+ ret = 1;
+
+out:
+ BN_CTX_end(ctx);
+ return ret;
+}
diff --git a/crypto/fipsmodule/ecdsa/ecdsa.c b/crypto/fipsmodule/ecdsa/ecdsa.c
index f3ce214..7ec4565 100644
--- a/crypto/fipsmodule/ecdsa/ecdsa.c
+++ b/crypto/fipsmodule/ecdsa/ecdsa.c
@@ -98,40 +98,6 @@
order->width);
}
-// field_element_to_scalar reduces |r| modulo |group->order|. |r| must
-// previously have been reduced modulo |group->field|.
-static int field_element_to_scalar(const EC_GROUP *group, BIGNUM *r) {
- // We must have p < 2×order, assuming p is not tiny (p >= 17). Thus rather we
- // can reduce by performing at most one subtraction.
- //
- // Proof: We only work with prime order curves, so the number of points on
- // the curve is the order. Thus Hasse's theorem gives:
- //
- // |order - (p + 1)| <= 2×sqrt(p)
- // p + 1 - order <= 2×sqrt(p)
- // p + 1 - 2×sqrt(p) <= order
- // p + 1 - 2×(p/4) < order (p/4 > sqrt(p) for p >= 17)
- // p/2 < p/2 + 1 < order
- // p < 2×order
- //
- // Additionally, one can manually check this property for built-in curves. It
- // is enforced for legacy custom curves in |EC_GROUP_set_generator|.
- //
- // TODO(davidben): Introduce |EC_FIELD_ELEMENT|, make this a function from
- // |EC_FIELD_ELEMENT| to |EC_SCALAR|, and cut out the |BIGNUM|. Does this need
- // to be constant-time for signing? |r| is the x-coordinate for kG, which is
- // public unless k was rerolled because |s| was zero.
- assert(!BN_is_negative(r));
- assert(BN_cmp(r, &group->field) < 0);
- if (BN_cmp(r, &group->order) >= 0 &&
- !BN_sub(r, r, &group->order)) {
- return 0;
- }
- assert(!BN_is_negative(r));
- assert(BN_cmp(r, &group->order) < 0);
- return 1;
-}
-
ECDSA_SIG *ECDSA_SIG_new(void) {
ECDSA_SIG *sig = OPENSSL_malloc(sizeof(ECDSA_SIG));
if (sig == NULL) {
@@ -193,12 +159,6 @@
}
int ret = 0;
EC_POINT *point = NULL;
- BN_CTX_start(ctx);
- BIGNUM *X = BN_CTX_get(ctx);
- if (X == NULL) {
- OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
- goto err;
- }
EC_SCALAR r, s, u1, u2, s_inv_mont, m;
if (BN_is_zero(sig->r) ||
@@ -210,11 +170,7 @@
}
// s_inv_mont = s^-1 in the Montgomery domain. This is
- // |ec_scalar_to_montgomery| followed by |ec_scalar_inv_montgomery|, but
- // |ec_scalar_inv_montgomery| followed by |ec_scalar_from_montgomery| is
- // equivalent and slightly more efficient.
- ec_scalar_inv_montgomery(group, &s_inv_mont, &s);
- ec_scalar_from_montgomery(group, &s_inv_mont, &s_inv_mont);
+ ec_scalar_inv_montgomery_vartime(group, &s_inv_mont, &s);
// u1 = m * s^-1 mod order
// u2 = r * s^-1 mod order
@@ -234,24 +190,10 @@
OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
goto err;
}
- if (!EC_POINT_get_affine_coordinates_GFp(group, point, X, NULL, ctx)) {
- OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
- goto err;
- }
- if (!field_element_to_scalar(group, X)) {
- OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
- goto err;
- }
- // The signature is correct iff |X| is equal to |sig->r|.
- if (BN_ucmp(X, sig->r) != 0) {
- OPENSSL_PUT_ERROR(ECDSA, ECDSA_R_BAD_SIGNATURE);
- goto err;
- }
- ret = 1;
+ ret = ec_cmp_x_coordinate(group, point, sig->r, ctx);
err:
- BN_CTX_end(ctx);
BN_CTX_free(ctx);
EC_POINT_free(point);
return ret;
@@ -320,7 +262,7 @@
goto err;
}
- if (!field_element_to_scalar(group, r)) {
+ if (!ec_field_element_to_scalar(group, r)) {
goto err;
}
} while (BN_is_zero(r));
diff --git a/third_party/fiat/p256.c b/third_party/fiat/p256.c
index 2d40ea0..b6f3f49 100644
--- a/third_party/fiat/p256.c
+++ b/third_party/fiat/p256.c
@@ -1836,6 +1836,8 @@
out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
+ out->scalar_inv_montgomery_vartime = ec_GFp_simple_mont_inv_mod_ord_vartime;
+ out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate;
};
#undef BORINGSSL_NISTP256_64BIT