Cap Montgomery moduli to 8 KiB.

We have two places where the current cap on BIGNUM sizes (64 MiB) is too
large, both involving Montgomery reduction: bn_mul_mont allocates a
spare value on the stack, and BN_mod_exp_mont_constime needs to allocate
a buffer of up to 64 contiguous values, which may overflow an int.

Make BN_MONT_CTX reject any BIGNUM larger than 8 KiB. This is 65,536
bits which is well above our maximum RSA key size, 16,384 bits. Ideally
we'd just apply this in bn_wexpand, to all BIGNUMs across the board, but
we found one caller that depends on creating an 8 MiB BIGNUM.

Update-Note: This will not affect any cryptography implemented by
BoringSSL, such as RSA, but other callers may run into this limit. If
necessary, we can raise this a bit, but the stack allocation means we
don't want to go *significantly* beyond what's in this CL.

Fixed: 541
Change-Id: Ia00f3ea6714a5042434f446943db55a533752dc5
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/55266
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc
index 4e35645..9d9e1d3 100644
--- a/crypto/fipsmodule/bn/bn_test.cc
+++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -2775,6 +2775,27 @@
   BN_free(BN_mod_sqrt(nullptr, bn2140142.get(), bn4588033.get(), ctx()));
 }
 
+// Test that constructing Montgomery contexts for large bignums is not possible.
+// Our Montgomery reduction implementation stack-allocates temporaries, so we
+// cap how large of moduli we accept.
+TEST_F(BNTest, MontgomeryLarge) {
+  std::vector<uint8_t> large_bignum_bytes(16 * 1024, 0xff);
+  bssl::UniquePtr<BIGNUM> large_bignum(
+      BN_bin2bn(large_bignum_bytes.data(), large_bignum_bytes.size(), nullptr));
+  ASSERT_TRUE(large_bignum);
+  bssl::UniquePtr<BN_MONT_CTX> mont(
+      BN_MONT_CTX_new_for_modulus(large_bignum.get(), ctx()));
+  EXPECT_FALSE(mont);
+
+  // The same limit should apply when |BN_mod_exp_mont_consttime| internally
+  // constructs a |BN_MONT_CTX|.
+  bssl::UniquePtr<BIGNUM> r(BN_new());
+  ASSERT_TRUE(r);
+  EXPECT_FALSE(BN_mod_exp_mont_consttime(r.get(), BN_value_one(),
+                                         large_bignum.get(), large_bignum.get(),
+                                         ctx(), nullptr));
+}
+
 #if defined(OPENSSL_BN_ASM_MONT) && defined(SUPPORTS_ABI_TEST)
 TEST_F(BNTest, BNMulMontABI) {
   for (size_t words : {4, 5, 6, 7, 8, 16, 32}) {
diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c
index afa8e71..859cf65 100644
--- a/crypto/fipsmodule/bn/exponentiation.c
+++ b/crypto/fipsmodule/bn/exponentiation.c
@@ -109,6 +109,7 @@
 #include <openssl/bn.h>
 
 #include <assert.h>
+#include <limits.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -868,7 +869,7 @@
 // be revised now that our implementation is no longer cache-time-dependent.
 #define BN_window_bits_for_ctime_exponent_size(b) \
   ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
-#define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
+#define BN_MAX_MOD_EXP_CTIME_WINDOW (6)
 
 // This variant of |BN_mod_exp_mont| uses fixed windows and fixed memory access
 // patterns to protect secret exponents (cf. the hyper-threading timing attacks
@@ -950,6 +951,16 @@
 
   // Get the window size to use with size of p.
   int window = BN_window_bits_for_ctime_exponent_size(bits);
+  assert(window <= BN_MAX_MOD_EXP_CTIME_WINDOW);
+
+  // Calculating |powerbuf_len| below cannot overflow because of the bound on
+  // Montgomery reduction.
+  assert((size_t)top <= BN_MONTGOMERY_MAX_WORDS);
+  static_assert(
+      BN_MONTGOMERY_MAX_WORDS <=
+          INT_MAX / sizeof(BN_ULONG) / ((1 << BN_MAX_MOD_EXP_CTIME_WINDOW) + 3),
+      "powerbuf_len may overflow");
+
 #if defined(OPENSSL_BN_ASM_MONT5)
   if (window >= 5) {
     window = 5;  // ~5% improvement for RSA2048 sign, and even for RSA4096
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index f7adfe9..85009a9 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -350,6 +350,12 @@
 int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive,
                          const BIGNUM *max_exclusive);
 
+// BN_MONTGOMERY_MAX_WORDS is the maximum numer of words allowed in a |BIGNUM|
+// used with Montgomery reduction. Ideally this limit would be applied to all
+// |BIGNUM|s, in |bn_wexpand|, but the exactfloat library needs to create 8 MiB
+// values for other operations.
+#define BN_MONTGOMERY_MAX_WORDS (8 * 1024 / sizeof(BN_ULONG))
+
 #if !defined(OPENSSL_NO_ASM) &&                         \
     (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
      defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
@@ -362,11 +368,13 @@
 // If at least one of |ap| or |bp| is fully reduced, |rp| will be fully reduced.
 // If neither is fully-reduced, the output may not be either.
 //
+// This function allocates |num| words on the stack, so |num| should be at most
+// |BN_MONTGOMERY_MAX_WORDS|.
+//
 // TODO(davidben): The x86_64 implementation expects a 32-bit input and masks
 // off upper bits. The aarch64 implementation expects a 64-bit input and does
 // not. |size_t| is the safer option but not strictly correct for x86_64. But
-// this function implicitly already has a bound on the size of |num| because it
-// internally creates |num|-sized stack allocation.
+// the |BN_MONTGOMERY_MAX_WORDS| bound makes this moot.
 //
 // See also discussion in |ToWord| in abi_test.h for notes on smaller-than-word
 // inputs.
diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c
index d04e91a..92bfa5e 100644
--- a/crypto/fipsmodule/bn/montgomery.c
+++ b/crypto/fipsmodule/bn/montgomery.c
@@ -172,6 +172,10 @@
     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
     return 0;
   }
+  if (!bn_fits_in_words(mod, BN_MONTGOMERY_MAX_WORDS)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
+    return 0;
+  }
 
   // Save the modulus.
   if (!BN_copy(&mont->N, mod)) {
@@ -428,6 +432,9 @@
     if (!bn_wexpand(r, num)) {
       return 0;
     }
+    // This bound is implied by |bn_mont_ctx_set_N_and_n0|. |bn_mul_mont|
+    // allocates |num| words on the stack, so |num| cannot be too large.
+    assert((size_t)num <= BN_MONTGOMERY_MAX_WORDS);
     if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
       // The check above ensures this won't happen.
       assert(0);