Ensure result affine coordinates in nistz256 are fully reduced. Revert 3f3358ac150465fafffaf1c51c2928dd2b2018a9. Add documentation clarifying the misunderstanding that lead to the mistake, and make use of the recently-added |bn_set_words|. Change-Id: I58814bace3db3b0b44e2dfe09c44918a4710c621 Reviewed-on: https://boringssl-review.googlesource.com/8831 Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/ec/p256-x86_64.c b/crypto/ec/p256-x86_64.c index 470f450..3f509db 100644 --- a/crypto/ec/p256-x86_64.c +++ b/crypto/ec/p256-x86_64.c
@@ -54,20 +54,28 @@ typedef P256_POINT_AFFINE PRECOMP256_ROW[64]; -/* Functions implemented in assembly */ +/* Arithmetic on field elements using Almost Montgomery Multiplication. The + * "almost" means, in particular, that the inputs and outputs of these + * functions are in the range [0, 2**BN_BITS2), not [0, P). Only + * |ecp_nistz256_from_mont| outputs a fully reduced value in [0, P). Almost + * Montgomery Arithmetic is described clearly in "Efficient Software + * Implementations of Modular Exponentiation" by Shay Gueron. */ -/* Modular neg: res = -a mod P */ +/* Modular neg: res = -a mod P, where res is not fully reduced. */ void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]); -/* Montgomery mul: res = a*b*2^-256 mod P */ +/* Montgomery mul: res = a*b*2^-256 mod P, where res is not fully reduced. */ void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS], const BN_ULONG b[P256_LIMBS]); -/* Montgomery sqr: res = a*a*2^-256 mod P */ +/* Montgomery sqr: res = a*a*2^-256 mod P, where res is not fully reduced. */ void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]); -/* Convert a number from Montgomery domain, by multiplying with 1 */ +/* Convert a number from Montgomery domain, by multiplying with 1, where res + * will be fully reduced mod P. */ void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS], const BN_ULONG in[P256_LIMBS]); + + /* Functions that perform constant time access to the precomputed tables */ void ecp_nistz256_select_w5(P256_POINT *val, const P256_POINT *in_t, int index); void ecp_nistz256_select_w7(P256_POINT_AFFINE *val, @@ -519,33 +527,30 @@ ecp_nistz256_mod_inverse(z_inv3, point_z); ecp_nistz256_sqr_mont(z_inv2, z_inv3); - /* Instead of using |ecp_nistz256_from_mont| to convert the |x| coordinate - * and then calling |ecp_nistz256_from_mont| again to convert the |y| - * coordinate below, convert the common factor |z_inv2| once now, saving one - * reduction. */ - ecp_nistz256_from_mont(z_inv2, z_inv2); + /* Unlike the |BN_mod_mul_montgomery|-based implementation, we cannot factor + * out the two calls to |ecp_nistz256_from_mont| into one call, because + * |ecp_nistz256_from_mont| must be the last operation to ensure that the + * result is fully reduced mod P. */ if (x != NULL) { - if (bn_wexpand(x, P256_LIMBS) == NULL) { + BN_ULONG x_aff[P256_LIMBS]; + ecp_nistz256_mul_mont(x_aff, z_inv2, point_x); + ecp_nistz256_from_mont(x_aff, x_aff); + if (!bn_set_words(x, x_aff, P256_LIMBS)) { OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE); return 0; } - x->top = P256_LIMBS; - x->neg = 0; - ecp_nistz256_mul_mont(x->d, z_inv2, point_x); - bn_correct_top(x); } if (y != NULL) { + BN_ULONG y_aff[P256_LIMBS]; ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2); - if (bn_wexpand(y, P256_LIMBS) == NULL) { + ecp_nistz256_mul_mont(y_aff, z_inv3, point_y); + ecp_nistz256_from_mont(y_aff, y_aff); + if (!bn_set_words(y, y_aff, P256_LIMBS)) { OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE); return 0; } - y->top = P256_LIMBS; - y->neg = 0; - ecp_nistz256_mul_mont(y->d, z_inv3, point_y); - bn_correct_top(y); } return 1;