Make ECDSA signing 10% faster and plug some timing leaks.

None of the asymmetric crypto we inherented from OpenSSL is
constant-time because of BIGNUM. BIGNUM chops leading zeros off the
front of everything, so we end up leaking information about the first
word, in theory. BIGNUM functions additionally tend to take the full
range of inputs and then call into BN_nnmod at various points.

All our secret values should be acted on in constant-time, but k in
ECDSA is a particularly sensitive value. So, ecdsa_sign_setup, in an
attempt to mitigate the BIGNUM leaks, would add a couple copies of the
order.

This does not work at all. k is used to compute two values: k^-1 and kG.
The first operation when computing k^-1 is to call BN_nnmod if k is out
of range. The entry point to our tuned constant-time curve
implementations is to call BN_nnmod if the scalar has too many bits,
which this causes. The result is both corrections are immediately undone
but cause us to do more variable-time work in the meantime.

Replace all these computations around k with the word-based functions
added in the various preceding CLs. In doing so, replace the BN_mod_mul
calls (which internally call BN_nnmod) with Montgomery reduction. We can
avoid taking k^-1 out of Montgomery form, which combines nicely with
Brian Smith's trick in 3426d1011946b26ff1bb2fd98a081ba4753c9cc8. Along
the way, we avoid some unnecessary mallocs.

BIGNUM still affects the private key itself, as well as the EC_POINTs.
But this should hopefully be much better now. Also it's 10% faster:

Before:
Did 15000 ECDSA P-224 signing operations in 1069117us (14030.3 ops/sec)
Did 18000 ECDSA P-256 signing operations in 1053908us (17079.3 ops/sec)
Did 1078 ECDSA P-384 signing operations in 1087853us (990.9 ops/sec)
Did 473 ECDSA P-521 signing operations in 1069835us (442.1 ops/sec)

After:
Did 16000 ECDSA P-224 signing operations in 1064799us (15026.3 ops/sec)
Did 19000 ECDSA P-256 signing operations in 1007839us (18852.2 ops/sec)
Did 1078 ECDSA P-384 signing operations in 1079413us (998.7 ops/sec)
Did 484 ECDSA P-521 signing operations in 1083616us (446.7 ops/sec)

Change-Id: I2a25e90fc99dac13c0616d0ea45e125a4bd8cca1
Reviewed-on: https://boringssl-review.googlesource.com/23075
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/err/ec.errordata b/crypto/err/ec.errordata
index fae9480..de8ee6c 100644
--- a/crypto/err/ec.errordata
+++ b/crypto/err/ec.errordata
@@ -17,6 +17,7 @@
 EC,111,INVALID_FORM
 EC,112,INVALID_GROUP_ORDER
 EC,113,INVALID_PRIVATE_KEY
+EC,133,INVALID_SCALAR
 EC,114,MISSING_PARAMETERS
 EC,115,MISSING_PRIVATE_KEY
 EC,116,NON_NAMED_CURVE
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 8fa8ed2..75efbfa 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -277,6 +277,17 @@
 int bn_in_range_words(const BN_ULONG *a, BN_ULONG min_inclusive,
                       const BN_ULONG *max_exclusive, size_t len);
 
+// bn_rand_range_words sets |out| to a uniformly distributed random number from
+// |min_inclusive| to |max_exclusive|. Both |out| and |max_exclusive| are |len|
+// words long.
+//
+// This function runs in time independent of the result, but |min_inclusive| and
+// |max_exclusive| are public data. (Information about the range is unavoidably
+// leaked by how many iterations it took to select a number.)
+int bn_rand_range_words(BN_ULONG *out, BN_ULONG min_inclusive,
+                        const BN_ULONG *max_exclusive, size_t len,
+                        const uint8_t additional_data[32]);
+
 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                 const BN_ULONG *np, const BN_ULONG *n0, int num);
 
diff --git a/crypto/fipsmodule/bn/random.c b/crypto/fipsmodule/bn/random.c
index e081a11..61499af 100644
--- a/crypto/fipsmodule/bn/random.c
+++ b/crypto/fipsmodule/bn/random.c
@@ -113,7 +113,6 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>
 #include <openssl/rand.h>
-#include <openssl/sha.h>
 #include <openssl/type_check.h>
 
 #include "internal.h"
@@ -225,16 +224,9 @@
          !bn_less_than_word(a, len, min_inclusive);
 }
 
-// bn_rand_range_words sets |out| to a uniformly distributed random number from
-// |min_inclusive| to |max_exclusive|. Both |out| and |max_exclusive| are |len|
-// words long.
-//
-// This function runs in time independent of the result, but |min_inclusive| and
-// |max_exclusive| are public data. (Information about the range is unavoidably
-// leaked by how many iterations it took to select a number.)
-static int bn_rand_range_words(BN_ULONG *out, BN_ULONG min_inclusive,
-                               const BN_ULONG *max_exclusive, size_t len,
-                               const uint8_t additional_data[32]) {
+int bn_rand_range_words(BN_ULONG *out, BN_ULONG min_inclusive,
+                        const BN_ULONG *max_exclusive, size_t len,
+                        const uint8_t additional_data[32]) {
   // This function implements the equivalent of steps 4 through 7 of FIPS 186-4
   // appendices B.4.2 and B.5.2. When called in those contexts, |max_exclusive|
   // is n and |min_inclusive| is one.
@@ -284,30 +276,20 @@
   return 1;
 }
 
-static int bn_rand_range_with_additional_data(
-    BIGNUM *r, BN_ULONG min_inclusive, const BIGNUM *max_exclusive,
-    const uint8_t additional_data[32]) {
+int BN_rand_range_ex(BIGNUM *r, BN_ULONG min_inclusive,
+                     const BIGNUM *max_exclusive) {
   if (!bn_wexpand(r, max_exclusive->top) ||
       !bn_rand_range_words(r->d, min_inclusive, max_exclusive->d,
-                           max_exclusive->top, additional_data)) {
+                           max_exclusive->top, kDefaultAdditionalData)) {
     return 0;
   }
 
-  // TODO(davidben): |bn_correct_top| is not constant-time. Make
-  // |BN_generate_dsa_nonce| call |bn_rand_range_words| directly and then inline
-  // this function into |BN_rand_range_ex|.
   r->neg = 0;
   r->top = max_exclusive->top;
   bn_correct_top(r);
   return 1;
 }
 
-int BN_rand_range_ex(BIGNUM *r, BN_ULONG min_inclusive,
-                     const BIGNUM *max_exclusive) {
-  return bn_rand_range_with_additional_data(r, min_inclusive, max_exclusive,
-                                            kDefaultAdditionalData);
-}
-
 int BN_rand_range(BIGNUM *r, const BIGNUM *range) {
   return BN_rand_range_ex(r, 0, range);
 }
@@ -315,35 +297,3 @@
 int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range) {
   return BN_rand_range(r, range);
 }
-
-int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, const BIGNUM *priv,
-                          const uint8_t *message, size_t message_len,
-                          BN_CTX *ctx) {
-  // We copy |priv| into a local buffer to avoid furthur exposing its
-  // length.
-  uint8_t private_bytes[96];
-  size_t todo = sizeof(priv->d[0]) * priv->top;
-  if (todo > sizeof(private_bytes)) {
-    // No reasonable DSA or ECDSA key should have a private key
-    // this large and we don't handle this case in order to avoid
-    // leaking the length of the private key.
-    OPENSSL_PUT_ERROR(BN, BN_R_PRIVATE_KEY_TOO_LARGE);
-    return 0;
-  }
-  OPENSSL_memcpy(private_bytes, priv->d, todo);
-  OPENSSL_memset(private_bytes + todo, 0, sizeof(private_bytes) - todo);
-
-  // Pass a SHA512 hash of the private key and message as additional data into
-  // the RBG. This is a hardening measure against entropy failure.
-  OPENSSL_COMPILE_ASSERT(SHA512_DIGEST_LENGTH >= 32,
-                         additional_data_is_too_large_for_sha512);
-  SHA512_CTX sha;
-  uint8_t digest[SHA512_DIGEST_LENGTH];
-  SHA512_Init(&sha);
-  SHA512_Update(&sha, private_bytes, sizeof(private_bytes));
-  SHA512_Update(&sha, message, message_len);
-  SHA512_Final(digest, &sha);
-
-  // Select a value k from [1, range-1], following FIPS 186-4 appendix B.5.2.
-  return bn_rand_range_with_additional_data(out, 1, range, digest);
-}
diff --git a/crypto/fipsmodule/ec/ec.c b/crypto/fipsmodule/ec/ec.c
index 14c7450..bcf5afc 100644
--- a/crypto/fipsmodule/ec/ec.c
+++ b/crypto/fipsmodule/ec/ec.c
@@ -77,6 +77,7 @@
 
 #include "internal.h"
 #include "../../internal.h"
+#include "../bn/internal.h"
 #include "../delocate.h"
 
 
@@ -829,7 +830,53 @@
   // nothing to multiply. But, nobody should be calling this function with
   // nothing to multiply in the first place.
   if ((g_scalar == NULL && p_scalar == NULL) ||
-      ((p == NULL) != (p_scalar == NULL)))  {
+      (p == NULL) != (p_scalar == NULL))  {
+    OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER);
+    return 0;
+  }
+
+  // We cannot easily process arbitrary scalars in constant-time, and there is
+  // no need to do so. Require that scalars be the same size as the order.
+  //
+  // One could require they be fully reduced, but some consumers try to check
+  // that |order| * |pubkey| is the identity. This comes from following NIST SP
+  // 800-56A section 5.6.2.3.2. (Though all our curves have cofactor one, so
+  // this check isn't useful.)
+  int ret = 0;
+  EC_SCALAR g_scalar_storage, p_scalar_storage;
+  EC_SCALAR *g_scalar_arg = NULL, *p_scalar_arg = NULL;
+  unsigned order_bits = BN_num_bits(&group->order);
+  if (g_scalar != NULL) {
+    if (BN_is_negative(g_scalar) || BN_num_bits(g_scalar) > order_bits ||
+        !ec_bignum_to_scalar(group, &g_scalar_storage, g_scalar)) {
+      OPENSSL_PUT_ERROR(EC, EC_R_INVALID_SCALAR);
+      goto err;
+    }
+    g_scalar_arg = &g_scalar_storage;
+  }
+
+  if (p_scalar != NULL) {
+    if (BN_is_negative(p_scalar) || BN_num_bits(p_scalar) > order_bits ||
+        !ec_bignum_to_scalar(group, &p_scalar_storage, p_scalar)) {
+      OPENSSL_PUT_ERROR(EC, EC_R_INVALID_SCALAR);
+      goto err;
+    }
+    p_scalar_arg = &p_scalar_storage;
+  }
+
+  ret = ec_point_mul_scalar(group, r, g_scalar_arg, p, p_scalar_arg, ctx);
+
+err:
+  OPENSSL_cleanse(&g_scalar_storage, sizeof(g_scalar_storage));
+  OPENSSL_cleanse(&p_scalar_storage, sizeof(p_scalar_storage));
+  return ret;
+}
+
+int ec_point_mul_scalar(const EC_GROUP *group, EC_POINT *r,
+                        const EC_SCALAR *g_scalar, const EC_POINT *p,
+                        const EC_SCALAR *p_scalar, BN_CTX *ctx) {
+  if ((g_scalar == NULL && p_scalar == NULL) ||
+      (p == NULL) != (p_scalar == NULL))  {
     OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER);
     return 0;
   }
@@ -884,3 +931,20 @@
 
   return OPENSSL_NUM_BUILT_IN_CURVES;
 }
+
+int ec_bignum_to_scalar(const EC_GROUP *group, EC_SCALAR *out,
+                        const BIGNUM *in) {
+  if (BN_is_negative(in) || in->top > group->order.top) {
+    OPENSSL_PUT_ERROR(EC, EC_R_INVALID_SCALAR);
+    return 0;
+  }
+  OPENSSL_memset(out->words, 0, group->order.top * sizeof(BN_ULONG));
+  OPENSSL_memcpy(out->words, in->d, in->top * sizeof(BN_ULONG));
+  return 1;
+}
+
+int ec_random_nonzero_scalar(const EC_GROUP *group, EC_SCALAR *out,
+                             const uint8_t additional_data[32]) {
+  return bn_rand_range_words(out->words, 1, group->order.d, group->order.top,
+                             additional_data);
+}
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
index 8a78497..7374e8b 100644
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -91,6 +91,16 @@
 OPENSSL_COMPILE_ASSERT(EC_MAX_SCALAR_WORDS <= BN_SMALL_MAX_WORDS,
                        bn_small_functions_applicable);
 
+// An EC_SCALAR is a |BN_num_bits(order)|-bit integer. Only the first
+// |order->top| words are used. An |EC_SCALAR| is specific to an |EC_GROUP| and
+// must not be mixed between groups. Unless otherwise specified, it is fully
+// reduced modulo the |order|.
+typedef union {
+  // bytes is the representation of the scalar in little-endian order.
+  uint8_t bytes[EC_MAX_SCALAR_BYTES];
+  BN_ULONG words[EC_MAX_SCALAR_WORDS];
+} EC_SCALAR;
+
 struct ec_method_st {
   int (*group_init)(EC_GROUP *);
   void (*group_finish)(EC_GROUP *);
@@ -104,8 +114,8 @@
   // Computes |r = p_scalar*p| if g_scalar is null. At least one of |g_scalar|
   // and |p_scalar| must be non-null, and |p| must be non-null if |p_scalar| is
   // non-null.
-  int (*mul)(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
-             const EC_POINT *p, const BIGNUM *p_scalar, BN_CTX *ctx);
+  int (*mul)(const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar,
+             const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx);
 
   // 'field_mul' and 'field_sqr' can be used by 'add' and 'dbl' so that the
   // same implementations of point operations can be used with different
@@ -163,8 +173,28 @@
 
 EC_GROUP *ec_group_new(const EC_METHOD *meth);
 
-int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
-                const EC_POINT *p, const BIGNUM *p_scalar, BN_CTX *ctx);
+// ec_bignum_to_scalar converts |in| to an |EC_SCALAR| and writes it to |*out|.
+// |in| must be non-negative and have at most |BN_num_bits(&group->order)| bits.
+// It returns one on success and zero on error. It does not ensure |in| is fully
+// reduced.
+int ec_bignum_to_scalar(const EC_GROUP *group, EC_SCALAR *out,
+                        const BIGNUM *in);
+
+// ec_random_nonzero_scalar sets |out| to a uniformly selected random value from
+// 1 to |group->order| - 1. It returns one on success and zero on error.
+int ec_random_nonzero_scalar(const EC_GROUP *group, EC_SCALAR *out,
+                             const uint8_t additional_data[32]);
+
+// ec_point_mul_scalar sets |r| to generator * |g_scalar| + |p| *
+// |p_scalar|. Unlike other functions which take |EC_SCALAR|, |g_scalar| and
+// |p_scalar| need not be fully reduced. They need only contain as many bits as
+// the order.
+int ec_point_mul_scalar(const EC_GROUP *group, EC_POINT *r,
+                        const EC_SCALAR *g_scalar, const EC_POINT *p,
+                        const EC_SCALAR *p_scalar, BN_CTX *ctx);
+
+int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar,
+                const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx);
 
 // method functions in simple.c
 int ec_GFp_simple_group_init(EC_GROUP *);
diff --git a/crypto/fipsmodule/ec/p224-64.c b/crypto/fipsmodule/ec/p224-64.c
index 26a94b9..ba25d22 100644
--- a/crypto/fipsmodule/ec/p224-64.c
+++ b/crypto/fipsmodule/ec/p224-64.c
@@ -1038,14 +1038,13 @@
 }
 
 static int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
-                                      const BIGNUM *g_scalar, const EC_POINT *p,
-                                      const BIGNUM *p_scalar, BN_CTX *ctx) {
+                                      const EC_SCALAR *g_scalar,
+                                      const EC_POINT *p,
+                                      const EC_SCALAR *p_scalar, BN_CTX *ctx) {
   int ret = 0;
   BN_CTX *new_ctx = NULL;
   BIGNUM *x, *y, *z, *tmp_scalar;
-  p224_felem_bytearray g_secret, p_secret;
   p224_felem p_pre_comp[17][3];
-  p224_felem_bytearray tmp;
   p224_felem x_in, y_in, z_in, x_out, y_out, z_out;
 
   if (ctx == NULL) {
@@ -1067,23 +1066,7 @@
   if (p != NULL && p_scalar != NULL) {
     // We treat NULL scalars as 0, and NULL points as points at infinity, i.e.,
     // they contribute nothing to the linear combination.
-    OPENSSL_memset(&p_secret, 0, sizeof(p_secret));
     OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
-    size_t num_bytes;
-    // reduce g_scalar to 0 <= g_scalar < 2^224
-    if (BN_num_bits(p_scalar) > 224 || BN_is_negative(p_scalar)) {
-      // this is an unusual input, and we don't guarantee
-      // constant-timeness
-      if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
-        OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
-        goto err;
-      }
-      num_bytes = BN_bn2bin(tmp_scalar, tmp);
-    } else {
-      num_bytes = BN_bn2bin(p_scalar, tmp);
-    }
-
-    p224_flip_endian(p_secret, tmp, num_bytes);
     // precompute multiples
     if (!p224_BN_to_felem(x_out, &p->X) ||
         !p224_BN_to_felem(y_out, &p->Y) ||
@@ -1109,26 +1092,10 @@
     }
   }
 
-  if (g_scalar != NULL) {
-    OPENSSL_memset(g_secret, 0, sizeof(g_secret));
-    size_t num_bytes;
-    // reduce g_scalar to 0 <= g_scalar < 2^224
-    if (BN_num_bits(g_scalar) > 224 || BN_is_negative(g_scalar)) {
-      // this is an unusual input, and we don't guarantee constant-timeness
-      if (!BN_nnmod(tmp_scalar, g_scalar, &group->order, ctx)) {
-        OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
-        goto err;
-      }
-      num_bytes = BN_bn2bin(tmp_scalar, tmp);
-    } else {
-      num_bytes = BN_bn2bin(g_scalar, tmp);
-    }
-
-    p224_flip_endian(g_secret, tmp, num_bytes);
-  }
-  p224_batch_mul(
-      x_out, y_out, z_out, (p != NULL && p_scalar != NULL) ? p_secret : NULL,
-      g_scalar != NULL ? g_secret : NULL, (const p224_felem(*)[3])p_pre_comp);
+  p224_batch_mul(x_out, y_out, z_out,
+                 (p != NULL && p_scalar != NULL) ? p_scalar->bytes : NULL,
+                 g_scalar != NULL ? g_scalar->bytes : NULL,
+                 (const p224_felem(*)[3])p_pre_comp);
 
   // reduce the output to its unique minimal representation
   p224_felem_contract(x_in, x_out);
diff --git a/crypto/fipsmodule/ec/p256-64.c b/crypto/fipsmodule/ec/p256-64.c
index 04ae56b..d4a8ff6 100644
--- a/crypto/fipsmodule/ec/p256-64.c
+++ b/crypto/fipsmodule/ec/p256-64.c
@@ -1582,14 +1582,13 @@
 }
 
 static int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
-                                      const BIGNUM *g_scalar, const EC_POINT *p,
-                                      const BIGNUM *p_scalar, BN_CTX *ctx) {
+                                      const EC_SCALAR *g_scalar,
+                                      const EC_POINT *p,
+                                      const EC_SCALAR *p_scalar, BN_CTX *ctx) {
   int ret = 0;
   BN_CTX *new_ctx = NULL;
   BIGNUM *x, *y, *z, *tmp_scalar;
-  felem_bytearray g_secret, p_secret;
   smallfelem p_pre_comp[17][3];
-  felem_bytearray tmp;
   smallfelem x_in, y_in, z_in;
   felem x_out, y_out, z_out;
 
@@ -1611,21 +1610,7 @@
   if (p != NULL && p_scalar != NULL) {
     // We treat NULL scalars as 0, and NULL points as points at infinity, i.e.,
     // they contribute nothing to the linear combination.
-    OPENSSL_memset(&p_secret, 0, sizeof(p_secret));
     OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
-    size_t num_bytes;
-    // Reduce g_scalar to 0 <= g_scalar < 2^256.
-    if (BN_num_bits(p_scalar) > 256 || BN_is_negative(p_scalar)) {
-      // This is an unusual input, and we don't guarantee constant-timeness.
-      if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
-        OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
-        goto err;
-      }
-      num_bytes = BN_bn2bin(tmp_scalar, tmp);
-    } else {
-      num_bytes = BN_bn2bin(p_scalar, tmp);
-    }
-    flip_endian(p_secret, tmp, num_bytes);
     // Precompute multiples.
     if (!BN_to_felem(x_out, &p->X) ||
         !BN_to_felem(y_out, &p->Y) ||
@@ -1650,28 +1635,10 @@
     }
   }
 
-  if (g_scalar != NULL) {
-    size_t num_bytes;
-
-    OPENSSL_memset(g_secret, 0, sizeof(g_secret));
-    // reduce g_scalar to 0 <= g_scalar < 2^256
-    if (BN_num_bits(g_scalar) > 256 || BN_is_negative(g_scalar)) {
-      // this is an unusual input, and we don't guarantee
-      // constant-timeness.
-      if (!BN_nnmod(tmp_scalar, g_scalar, &group->order, ctx)) {
-        OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
-        goto err;
-      }
-      num_bytes = BN_bn2bin(tmp_scalar, tmp);
-    } else {
-      num_bytes = BN_bn2bin(g_scalar, tmp);
-    }
-    flip_endian(g_secret, tmp, num_bytes);
-  }
   batch_mul(x_out, y_out, z_out,
-            (p != NULL && p_scalar != NULL) ? p_secret : NULL,
-            g_scalar != NULL ? g_secret : NULL,
-            (const smallfelem(*)[3]) &p_pre_comp);
+            (p != NULL && p_scalar != NULL) ? p_scalar->bytes : NULL,
+            g_scalar != NULL ? g_scalar->bytes : NULL,
+            (const smallfelem(*)[3]) & p_pre_comp);
 
   // reduce the output to its unique minimal representation
   felem_contract(x_in, x_out);
diff --git a/crypto/fipsmodule/ec/p256-x86_64.c b/crypto/fipsmodule/ec/p256-x86_64.c
index 0004add..a9b603a 100644
--- a/crypto/fipsmodule/ec/p256-x86_64.c
+++ b/crypto/fipsmodule/ec/p256-x86_64.c
@@ -216,8 +216,8 @@
 
 // r = p * p_scalar
 static int ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
-                                     const EC_POINT *p, const BIGNUM *p_scalar,
-                                     BN_CTX *ctx) {
+                                     const EC_POINT *p,
+                                     const EC_SCALAR *p_scalar) {
   assert(p != NULL);
   assert(p_scalar != NULL);
 
@@ -229,55 +229,8 @@
   // ~1599 ((96 * 16) + 63) bytes of stack space.
   alignas(64) P256_POINT table[16];
   uint8_t p_str[33];
-
-
-  int ret = 0;
-  BN_CTX *new_ctx = NULL;
-  int ctx_started = 0;
-
-  if (BN_num_bits(p_scalar) > 256 || BN_is_negative(p_scalar)) {
-    if (ctx == NULL) {
-      new_ctx = BN_CTX_new();
-      if (new_ctx == NULL) {
-        OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
-        goto err;
-      }
-      ctx = new_ctx;
-    }
-    BN_CTX_start(ctx);
-    ctx_started = 1;
-    BIGNUM *mod = BN_CTX_get(ctx);
-    if (mod == NULL) {
-      OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
-      goto err;
-    }
-    if (!BN_nnmod(mod, p_scalar, &group->order, ctx)) {
-      OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
-      goto err;
-    }
-    p_scalar = mod;
-  }
-
-  int j;
-  for (j = 0; j < p_scalar->top * BN_BYTES; j += BN_BYTES) {
-    BN_ULONG d = p_scalar->d[j / BN_BYTES];
-
-    p_str[j + 0] = d & 0xff;
-    p_str[j + 1] = (d >> 8) & 0xff;
-    p_str[j + 2] = (d >> 16) & 0xff;
-    p_str[j + 3] = (d >>= 24) & 0xff;
-    if (BN_BYTES == 8) {
-      d >>= 8;
-      p_str[j + 4] = d & 0xff;
-      p_str[j + 5] = (d >> 8) & 0xff;
-      p_str[j + 6] = (d >> 16) & 0xff;
-      p_str[j + 7] = (d >> 24) & 0xff;
-    }
-  }
-
-  for (; j < 33; j++) {
-    p_str[j] = 0;
-  }
+  OPENSSL_memcpy(p_str, p_scalar->bytes, 32);
+  p_str[32] = 0;
 
   // table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
   // not stored. All other values are actually stored with an offset of -1 in
@@ -288,7 +241,7 @@
       !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Y, &p->Y) ||
       !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Z, &p->Z)) {
     OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE);
-    goto err;
+    return 0;
   }
 
   ecp_nistz256_point_double(&row[2 - 1], &row[1 - 1]);
@@ -354,19 +307,13 @@
 
   ecp_nistz256_point_add(r, r, &h);
 
-  ret = 1;
-
-err:
-  if (ctx_started) {
-    BN_CTX_end(ctx);
-  }
-  BN_CTX_free(new_ctx);
-  return ret;
+  return 1;
 }
 
-static int ecp_nistz256_points_mul(
-    const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
-    const EC_POINT *p_, const BIGNUM *p_scalar, BN_CTX *ctx) {
+static int ecp_nistz256_points_mul(const EC_GROUP *group, EC_POINT *r,
+                                   const EC_SCALAR *g_scalar,
+                                   const EC_POINT *p_,
+                                   const EC_SCALAR *p_scalar, BN_CTX *ctx) {
   assert((p_ != NULL) == (p_scalar != NULL));
 
   static const unsigned kWindowSize = 7;
@@ -377,54 +324,10 @@
     P256_POINT_AFFINE a;
   } t, p;
 
-  int ret = 0;
-  BN_CTX *new_ctx = NULL;
-  int ctx_started = 0;
-
   if (g_scalar != NULL) {
-    if (BN_num_bits(g_scalar) > 256 || BN_is_negative(g_scalar)) {
-      if (ctx == NULL) {
-        new_ctx = BN_CTX_new();
-        if (new_ctx == NULL) {
-          goto err;
-        }
-        ctx = new_ctx;
-      }
-      BN_CTX_start(ctx);
-      ctx_started = 1;
-      BIGNUM *tmp_scalar = BN_CTX_get(ctx);
-      if (tmp_scalar == NULL) {
-        goto err;
-      }
-
-      if (!BN_nnmod(tmp_scalar, g_scalar, &group->order, ctx)) {
-        OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
-        goto err;
-      }
-      g_scalar = tmp_scalar;
-    }
-
-    uint8_t p_str[33] = {0};
-    int i;
-    for (i = 0; i < g_scalar->top * BN_BYTES; i += BN_BYTES) {
-      BN_ULONG d = g_scalar->d[i / BN_BYTES];
-
-      p_str[i + 0] = d & 0xff;
-      p_str[i + 1] = (d >> 8) & 0xff;
-      p_str[i + 2] = (d >> 16) & 0xff;
-      p_str[i + 3] = (d >>= 24) & 0xff;
-      if (BN_BYTES == 8) {
-        d >>= 8;
-        p_str[i + 4] = d & 0xff;
-        p_str[i + 5] = (d >> 8) & 0xff;
-        p_str[i + 6] = (d >> 16) & 0xff;
-        p_str[i + 7] = (d >> 24) & 0xff;
-      }
-    }
-
-    for (; i < (int) sizeof(p_str); i++) {
-      p_str[i] = 0;
-    }
+    uint8_t p_str[33];
+    OPENSSL_memcpy(p_str, g_scalar->bytes, 32);
+    p_str[32] = 0;
 
     // First window
     unsigned wvalue = (p_str[0] << 1) & kMask;
@@ -445,7 +348,7 @@
     OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z));
     copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1));
 
-    for (i = 1; i < 37; i++) {
+    for (int i = 1; i < 37; i++) {
       unsigned off = (index - 1) / 8;
       wvalue = p_str[off] | p_str[off + 1] << 8;
       wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
@@ -469,8 +372,8 @@
       out = &p.p;
     }
 
-    if (!ecp_nistz256_windowed_mul(group, out, p_, p_scalar, ctx)) {
-      goto err;
+    if (!ecp_nistz256_windowed_mul(group, out, p_, p_scalar)) {
+      return 0;
     }
 
     if (!p_is_infinity) {
@@ -485,14 +388,7 @@
     return 0;
   }
 
-  ret = 1;
-
-err:
-  if (ctx_started) {
-    BN_CTX_end(ctx);
-  }
-  BN_CTX_free(new_ctx);
-  return ret;
+  return 1;
 }
 
 static int ecp_nistz256_get_affine(const EC_GROUP *group, const EC_POINT *point,
diff --git a/crypto/fipsmodule/ec/wnaf.c b/crypto/fipsmodule/ec/wnaf.c
index 842a8fb..e3b6437 100644
--- a/crypto/fipsmodule/ec/wnaf.c
+++ b/crypto/fipsmodule/ec/wnaf.c
@@ -122,11 +122,6 @@
     sign = -1;
   }
 
-  if (scalar->d == NULL || scalar->top == 0) {
-    OPENSSL_PUT_ERROR(EC, ERR_R_INTERNAL_ERROR);
-    goto err;
-  }
-
   len = BN_num_bits(scalar);
   // The modified wNAF may be one digit longer than binary representation
   // (*ret_len will be set to the actual length, i.e. at most
@@ -236,8 +231,9 @@
   return 1;
 }
 
-int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
-                const EC_POINT *p, const BIGNUM *p_scalar, BN_CTX *ctx) {
+int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r,
+                const EC_SCALAR *g_scalar_raw, const EC_POINT *p,
+                const EC_SCALAR *p_scalar_raw, BN_CTX *ctx) {
   BN_CTX *new_ctx = NULL;
   const EC_POINT *generator = NULL;
   EC_POINT *tmp = NULL;
@@ -262,13 +258,32 @@
       goto err;
     }
   }
+  BN_CTX_start(ctx);
+
+  // Convert from |EC_SCALAR| to |BIGNUM|. |BIGNUM| is not constant-time, but
+  // neither is the rest of this function.
+  BIGNUM *g_scalar = NULL, *p_scalar = NULL;
+  if (g_scalar_raw != NULL) {
+    g_scalar = BN_CTX_get(ctx);
+    if (g_scalar == NULL ||
+        !bn_set_words(g_scalar, g_scalar_raw->words, group->order.top)) {
+      goto err;
+    }
+  }
+  if (p_scalar_raw != NULL) {
+    p_scalar = BN_CTX_get(ctx);
+    if (p_scalar == NULL ||
+        !bn_set_words(p_scalar, p_scalar_raw->words, group->order.top)) {
+      goto err;
+    }
+  }
 
   // TODO: This function used to take |points| and |scalars| as arrays of
   // |num| elements. The code below should be simplified to work in terms of |p|
   // and |p_scalar|.
   size_t num = p != NULL ? 1 : 0;
   const EC_POINT **points = p != NULL ? &p : NULL;
-  const BIGNUM **scalars = p != NULL ? &p_scalar : NULL;
+  BIGNUM **scalars = p != NULL ? &p_scalar : NULL;
 
   total_num = num;
 
@@ -433,6 +448,9 @@
   ret = 1;
 
 err:
+  if (ctx != NULL) {
+    BN_CTX_end(ctx);
+  }
   BN_CTX_free(new_ctx);
   EC_POINT_free(tmp);
   OPENSSL_free(wsize);
diff --git a/crypto/fipsmodule/ecdsa/ecdsa.c b/crypto/fipsmodule/ecdsa/ecdsa.c
index 7af35f7..90393f3 100644
--- a/crypto/fipsmodule/ecdsa/ecdsa.c
+++ b/crypto/fipsmodule/ecdsa/ecdsa.c
@@ -58,38 +58,39 @@
 #include <openssl/bn.h>
 #include <openssl/err.h>
 #include <openssl/mem.h>
+#include <openssl/sha.h>
+#include <openssl/type_check.h>
 
 #include "../bn/internal.h"
 #include "../ec/internal.h"
 #include "../../internal.h"
 
 
-// digest_to_bn interprets |digest_len| bytes from |digest| as a big-endian
-// number and sets |out| to that value. It then truncates |out| so that it's,
-// at most, as long as |order|. It returns one on success and zero otherwise.
-static int digest_to_bn(BIGNUM *out, const uint8_t *digest, size_t digest_len,
-                        const BIGNUM *order) {
-  size_t num_bits;
-
-  num_bits = BN_num_bits(order);
-  // Need to truncate digest if it is too long: first truncate whole
-  // bytes.
+// digest_to_scalar interprets |digest_len| bytes from |digest| as a scalar for
+// ECDSA. Note this value is not fully reduced modulo the order, only the
+// correct number of bits.
+static void digest_to_scalar(const EC_GROUP *group, EC_SCALAR *out,
+                             const uint8_t *digest, size_t digest_len) {
+  const BIGNUM *order = &group->order;
+  size_t num_bits = BN_num_bits(order);
+  // Need to truncate digest if it is too long: first truncate whole bytes.
   if (8 * digest_len > num_bits) {
     digest_len = (num_bits + 7) / 8;
   }
-  if (!BN_bin2bn(digest, digest_len, out)) {
-    OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
-    return 0;
+  OPENSSL_memset(out, 0, sizeof(EC_SCALAR));
+  for (size_t i = 0; i < digest_len; i++) {
+    out->bytes[i] = digest[digest_len - 1 - i];
   }
 
   // If still too long truncate remaining bits with a shift
-  if ((8 * digest_len > num_bits) &&
-      !BN_rshift(out, out, 8 - (num_bits & 0x7))) {
-    OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
-    return 0;
+  if (8 * digest_len > num_bits) {
+    size_t shift = 8 - (num_bits & 0x7);
+    for (int i = 0; i < order->top - 1; i++) {
+      out->words[i] =
+          (out->words[i] >> shift) | (out->words[i + 1] << (BN_BITS2 - shift));
+    }
+    out->words[order->top - 1] >>= shift;
   }
-
-  return 1;
 }
 
 ECDSA_SIG *ECDSA_SIG_new(void) {
@@ -182,7 +183,9 @@
     OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
     goto err;
   }
-  if (!digest_to_bn(m, digest, digest_len, order)) {
+  EC_SCALAR m_scalar;
+  digest_to_scalar(group, &m_scalar, digest, digest_len);
+  if (!bn_set_words(m, m_scalar.words, order->top)) {
     goto err;
   }
   // u1 = m * tmp mod order
@@ -228,35 +231,27 @@
   return ret;
 }
 
-static int ecdsa_sign_setup(const EC_KEY *eckey, BN_CTX *ctx, BIGNUM **kinvp,
-                            BIGNUM **rp, const uint8_t *digest,
-                            size_t digest_len) {
-  BIGNUM *k = NULL, *kinv = NULL, *r = NULL, *tmp = NULL;
+static int ecdsa_sign_setup(const EC_KEY *eckey, BN_CTX *ctx,
+                            EC_SCALAR *out_kinv_mont, BIGNUM **rp,
+                            const uint8_t *digest, size_t digest_len,
+                            const EC_SCALAR *priv_key) {
   EC_POINT *tmp_point = NULL;
-  const EC_GROUP *group;
   int ret = 0;
-
-  if (eckey == NULL || (group = EC_KEY_get0_group(eckey)) == NULL) {
-    OPENSSL_PUT_ERROR(ECDSA, ERR_R_PASSED_NULL_PARAMETER);
-    return 0;
-  }
-
-  k = BN_new();
-  kinv = BN_new();  // this value is later returned in *kinvp
-  r = BN_new();  // this value is later returned in *rp
-  tmp = BN_new();
-  if (k == NULL || kinv == NULL || r == NULL || tmp == NULL) {
+  EC_SCALAR k;
+  BIGNUM *r = BN_new();  // this value is later returned in *rp
+  BIGNUM *tmp = BN_new();
+  if (r == NULL || tmp == NULL) {
     OPENSSL_PUT_ERROR(ECDSA, ERR_R_MALLOC_FAILURE);
     goto err;
   }
+  const EC_GROUP *group = EC_KEY_get0_group(eckey);
+  const BIGNUM *order = EC_GROUP_get0_order(group);
   tmp_point = EC_POINT_new(group);
   if (tmp_point == NULL) {
     OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
     goto err;
   }
 
-  const BIGNUM *order = EC_GROUP_get0_order(group);
-
   // Check that the size of the group order is FIPS compliant (FIPS 186-4
   // B.5.2).
   if (BN_num_bits(order) < 160) {
@@ -267,67 +262,55 @@
   do {
     // Include the private key and message digest in the k generation.
     if (eckey->fixed_k != NULL) {
-      if (!BN_copy(k, eckey->fixed_k)) {
+      if (!ec_bignum_to_scalar(group, &k, eckey->fixed_k)) {
         goto err;
       }
-    } else if (!BN_generate_dsa_nonce(k, order, EC_KEY_get0_private_key(eckey),
-                                      digest, digest_len, ctx)) {
-      OPENSSL_PUT_ERROR(ECDSA, ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED);
-      goto err;
-    }
-
-    // Compute the inverse of k. The order is a prime, so use Fermat's Little
-    // Theorem.
-    if (!bn_mod_inverse_prime(kinv, k, order, ctx, group->order_mont)) {
-      OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
-      goto err;
-    }
-
-    // We do not want timing information to leak the length of k,
-    // so we compute G*k using an equivalent scalar of fixed
-    // bit-length.
-
-    if (!BN_add(k, k, order)) {
-      goto err;
-    }
-    if (BN_num_bits(k) <= BN_num_bits(order)) {
-      if (!BN_add(k, k, order)) {
+    } else {
+      // Pass a SHA512 hash of the private key and digest as additional data
+      // into the RBG. This is a hardening measure against entropy failure.
+      OPENSSL_COMPILE_ASSERT(SHA512_DIGEST_LENGTH >= 32,
+                             additional_data_is_too_large_for_sha512);
+      SHA512_CTX sha;
+      uint8_t additional_data[SHA512_DIGEST_LENGTH];
+      SHA512_Init(&sha);
+      SHA512_Update(&sha, priv_key->words, order->top * sizeof(BN_ULONG));
+      SHA512_Update(&sha, digest, digest_len);
+      SHA512_Final(additional_data, &sha);
+      if (!ec_random_nonzero_scalar(group, &k, additional_data)) {
         goto err;
       }
     }
 
-    // compute r the x-coordinate of generator * k
-    if (!EC_POINT_mul(group, tmp_point, k, NULL, NULL, ctx)) {
-      OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
+    // Compute k^-1. We leave it in the Montgomery domain as an optimization for
+    // later operations.
+    if (!bn_to_montgomery_small(out_kinv_mont->words, order->top, k.words,
+                                order->top, group->order_mont) ||
+        !bn_mod_inverse_prime_mont_small(out_kinv_mont->words, order->top,
+                                         out_kinv_mont->words, order->top,
+                                         group->order_mont)) {
       goto err;
     }
-    if (!EC_POINT_get_affine_coordinates_GFp(group, tmp_point, tmp, NULL,
+
+    // Compute r, the x-coordinate of generator * k.
+    if (!ec_point_mul_scalar(group, tmp_point, &k, NULL, NULL, ctx) ||
+        !EC_POINT_get_affine_coordinates_GFp(group, tmp_point, tmp, NULL,
                                              ctx)) {
-      OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
       goto err;
     }
 
     if (!BN_nnmod(r, tmp, order, ctx)) {
-      OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
       goto err;
     }
   } while (BN_is_zero(r));
 
-  // clear old values if necessary
   BN_clear_free(*rp);
-  BN_clear_free(*kinvp);
-
-  // save the pre-computed values
   *rp = r;
-  *kinvp = kinv;
+  r = NULL;
   ret = 1;
 
 err:
-  BN_clear_free(k);
-  if (!ret) {
-    BN_clear_free(kinv);
-    BN_clear_free(r);
-  }
+  OPENSSL_cleanse(&k, sizeof(k));
+  BN_clear_free(r);
   EC_POINT_free(tmp_point);
   BN_clear_free(tmp);
   return ret;
@@ -335,64 +318,73 @@
 
 ECDSA_SIG *ECDSA_do_sign(const uint8_t *digest, size_t digest_len,
                          const EC_KEY *eckey) {
-  int ok = 0;
-  BIGNUM *kinv = NULL, *s, *m = NULL, *tmp = NULL;
-  BN_CTX *ctx = NULL;
-  const EC_GROUP *group;
-  ECDSA_SIG *ret;
-  const BIGNUM *priv_key;
-
   if (eckey->ecdsa_meth && eckey->ecdsa_meth->sign) {
     OPENSSL_PUT_ERROR(ECDSA, ECDSA_R_NOT_IMPLEMENTED);
     return NULL;
   }
 
-  group = EC_KEY_get0_group(eckey);
-  priv_key = EC_KEY_get0_private_key(eckey);
-
-  if (group == NULL || priv_key == NULL) {
+  const EC_GROUP *group = EC_KEY_get0_group(eckey);
+  const BIGNUM *priv_key_bn = EC_KEY_get0_private_key(eckey);
+  if (group == NULL || priv_key_bn == NULL) {
     OPENSSL_PUT_ERROR(ECDSA, ERR_R_PASSED_NULL_PARAMETER);
     return NULL;
   }
+  const BIGNUM *order = EC_GROUP_get0_order(group);
 
-  ret = ECDSA_SIG_new();
-  if (!ret) {
+  int ok = 0;
+  ECDSA_SIG *ret = ECDSA_SIG_new();
+  BN_CTX *ctx = BN_CTX_new();
+  EC_SCALAR kinv_mont, priv_key, r_mont, s, tmp, m;
+  if (ret == NULL || ctx == NULL) {
     OPENSSL_PUT_ERROR(ECDSA, ERR_R_MALLOC_FAILURE);
     return NULL;
   }
-  s = ret->s;
 
-  if ((ctx = BN_CTX_new()) == NULL ||
-      (tmp = BN_new()) == NULL ||
-      (m = BN_new()) == NULL) {
-    OPENSSL_PUT_ERROR(ECDSA, ERR_R_MALLOC_FAILURE);
-    goto err;
-  }
-
-  const BIGNUM *order = EC_GROUP_get0_order(group);
-
-  if (!digest_to_bn(m, digest, digest_len, order)) {
+  digest_to_scalar(group, &m, digest, digest_len);
+  if (!ec_bignum_to_scalar(group, &priv_key, priv_key_bn)) {
     goto err;
   }
   for (;;) {
-    if (!ecdsa_sign_setup(eckey, ctx, &kinv, &ret->r, digest, digest_len)) {
-      OPENSSL_PUT_ERROR(ECDSA, ERR_R_ECDSA_LIB);
+    if (!ecdsa_sign_setup(eckey, ctx, &kinv_mont, &ret->r, digest, digest_len,
+                          &priv_key)) {
       goto err;
     }
 
-    if (!BN_mod_mul(tmp, priv_key, ret->r, order, ctx)) {
-      OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
+    // Compute priv_key * r (mod order). Note if only one parameter is in the
+    // Montgomery domain, |bn_mod_mul_montgomery_small| will compute the answer
+    // in the normal domain.
+    if (!ec_bignum_to_scalar(group, &r_mont, ret->r) ||
+        !bn_to_montgomery_small(r_mont.words, order->top, r_mont.words,
+                                order->top, group->order_mont) ||
+        !bn_mod_mul_montgomery_small(s.words, order->top, priv_key.words,
+                                     order->top, r_mont.words, order->top,
+                                     group->order_mont)) {
       goto err;
     }
-    if (!BN_mod_add_quick(s, tmp, m, order)) {
-      OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
+
+    // Compute s += m in constant time. Reduce one copy of |order| if necessary.
+    // Note this does not leave |s| fully reduced. We have
+    // |m| < 2^BN_num_bits(order), so subtracting |order| leaves
+    // 0 <= |s| < 2^BN_num_bits(order).
+    BN_ULONG carry = bn_add_words(s.words, s.words, m.words, order->top);
+    BN_ULONG v = bn_sub_words(tmp.words, s.words, order->d, order->top) - carry;
+    v = 0u - v;
+    for (int i = 0; i < order->top; i++) {
+      s.words[i] = constant_time_select_w(v, s.words[i], tmp.words[i]);
+    }
+
+    // Finally, multiply s by k^-1. That was retained in Montgomery form, so the
+    // same technique as the previous multiplication works. Although the
+    // previous step did not fully reduce |s|, |bn_mod_mul_montgomery_small|
+    // only requires the product not exceed R * |order|. |kinv_mont| is fully
+    // reduced and |s| < 2^BN_num_bits(order) <= R, so this holds.
+    if (!bn_mod_mul_montgomery_small(s.words, order->top, s.words, order->top,
+                                     kinv_mont.words, order->top,
+                                     group->order_mont) ||
+        !bn_set_words(ret->s, s.words, order->top)) {
       goto err;
     }
-    if (!BN_mod_mul(s, s, kinv, order, ctx)) {
-      OPENSSL_PUT_ERROR(ECDSA, ERR_R_BN_LIB);
-      goto err;
-    }
-    if (!BN_is_zero(s)) {
+    if (!BN_is_zero(ret->s)) {
       // s != 0 => we have a valid signature
       break;
     }
@@ -406,8 +398,11 @@
     ret = NULL;
   }
   BN_CTX_free(ctx);
-  BN_clear_free(m);
-  BN_clear_free(tmp);
-  BN_clear_free(kinv);
+  OPENSSL_cleanse(&kinv_mont, sizeof(kinv_mont));
+  OPENSSL_cleanse(&priv_key, sizeof(priv_key));
+  OPENSSL_cleanse(&r_mont, sizeof(r_mont));
+  OPENSSL_cleanse(&s, sizeof(s));
+  OPENSSL_cleanse(&tmp, sizeof(tmp));
+  OPENSSL_cleanse(&m, sizeof(m));
   return ret;
 }
diff --git a/include/openssl/bn.h b/include/openssl/bn.h
index 85fdae4..bb32c2f 100644
--- a/include/openssl/bn.h
+++ b/include/openssl/bn.h
@@ -618,17 +618,6 @@
 // BN_pseudo_rand_range is an alias for BN_rand_range.
 OPENSSL_EXPORT int BN_pseudo_rand_range(BIGNUM *rnd, const BIGNUM *range);
 
-// BN_generate_dsa_nonce generates a random number 0 <= out < range. Unlike
-// BN_rand_range, it also includes the contents of |priv| and |message| in the
-// generation so that an RNG failure isn't fatal as long as |priv| remains
-// secret. This is intended for use in DSA and ECDSA where an RNG weakness
-// leads directly to private key exposure unless this function is used.
-// It returns one on success and zero on error.
-OPENSSL_EXPORT int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range,
-                                         const BIGNUM *priv,
-                                         const uint8_t *message,
-                                         size_t message_len, BN_CTX *ctx);
-
 // BN_GENCB holds a callback function that is used by generation functions that
 // can take a very long time to complete. Use |BN_GENCB_set| to initialise a
 // |BN_GENCB| structure.
diff --git a/include/openssl/ec.h b/include/openssl/ec.h
index dee41b7..b34605f 100644
--- a/include/openssl/ec.h
+++ b/include/openssl/ec.h
@@ -402,5 +402,6 @@
 #define EC_R_GROUP_MISMATCH 130
 #define EC_R_INVALID_COFACTOR 131
 #define EC_R_PUBLIC_KEY_VALIDATION_FAILED 132
+#define EC_R_INVALID_SCALAR 133
 
 #endif  // OPENSSL_HEADER_EC_H