Factor out BN_to_montgomery(1) optimization.

This cuts down on a duplicated place where we mess with bn->top. It also
also better abstracts away what determines the value of R.

(I ordered this wrong and rebasing will be annoying. Specifically, the
question is what happens if the modulus is non-minimal. In
https://boringssl-review.googlesource.com/c/boringssl/+/25250/, R will
be determined by the stored width of mont->N, so we want to use mont's
copy of the modulus. Though, one way or another, the important part is
that it's inside the Montgomery abstraction.)

Bug: 232
Change-Id: I74212e094c8a47f396b87982039e49048a130916
Reviewed-on: https://boringssl-review.googlesource.com/25247
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c
index 63c1c05..bdcc9d3 100644
--- a/crypto/fipsmodule/bn/exponentiation.c
+++ b/crypto/fipsmodule/bn/exponentiation.c
@@ -666,22 +666,7 @@
     }
   }
 
-  // Set |r| to one in Montgomery form. If the high bit of |m| is set, |m| is
-  // close to R and we subtract rather than perform Montgomery reduction.
-  if (m->d[m->top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
-    if (!bn_wexpand(r, m->top)) {
-      goto err;
-    }
-    // r = 2^(top*BN_BITS2) - m
-    r->d[0] = 0 - m->d[0];
-    for (int i = 1; i < m->top; i++) {
-      r->d[i] = ~m->d[i];
-    }
-    r->top = m->top;
-    // The upper words will be zero if the corresponding words of |m| were
-    // 0xfff[...], so call |bn_correct_top|.
-    bn_correct_top(r);
-  } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
+  if (!bn_one_to_montgomery(r, mont, ctx)) {
     goto err;
   }
 
@@ -746,7 +731,6 @@
 int bn_mod_exp_mont_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
                           size_t num_a, const BN_ULONG *p, size_t num_p,
                           const BN_MONT_CTX *mont) {
-  const BN_ULONG *n = mont->N.d;
   size_t num_n = mont->N.top;
   if (num_n != num_a || num_n != num_r || num_n > BN_SMALL_MAX_WORDS) {
     OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
@@ -793,16 +777,7 @@
     }
   }
 
-  // Set |r| to one in Montgomery form. If the high bit of |m| is set, |m| is
-  // close to R and we subtract rather than perform Montgomery reduction.
-  if (n[num_n - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
-    // r = 2^(top*BN_BITS2) - m
-    r[0] = 0 - n[0];
-    for (size_t i = 1; i < num_n; i++) {
-      r[i] = ~n[i];
-    }
-  } else if (!bn_from_montgomery_small(r, num_r, mont->RR.d, mont->RR.top,
-                                       mont)) {
+  if (!bn_one_to_montgomery_small(r, num_r, mont)) {
     goto err;
   }
 
@@ -1118,16 +1093,7 @@
   tmp.neg = am.neg = 0;
   tmp.flags = am.flags = BN_FLG_STATIC_DATA;
 
-// prepare a^0 in Montgomery domain
-// by Shay Gueron's suggestion
-  if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
-    // 2^(top*BN_BITS2) - m
-    tmp.d[0] = 0 - m->d[0];
-    for (i = 1; i < top; i++) {
-      tmp.d[i] = ~m->d[i];
-    }
-    tmp.top = top;
-  } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) {
+  if (!bn_one_to_montgomery(&tmp, mont, ctx)) {
     goto err;
   }
 
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 140353f..83102d8 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -342,6 +342,11 @@
 // otherwise.
 int bn_is_bit_set_words(const BN_ULONG *a, size_t num, unsigned bit);
 
+// bn_one_to_montgomery sets |r| to one in Montgomery form. It returns one on
+// success and zero on error. This function treats the bit width of the modulus
+// as public.
+int bn_one_to_montgomery(BIGNUM *r, const BN_MONT_CTX *mont, BN_CTX *ctx);
+
 
 // Low-level operations for small numbers.
 //
@@ -388,6 +393,13 @@
 int bn_from_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
                              size_t num_a, const BN_MONT_CTX *mont);
 
+// bn_one_to_montgomery_small sets |r| to one in Montgomery form. It returns one
+// on success and zero on error. |num_r| must be the length of the modulus,
+// which is |mont->N.top|. This function treats the bit width of the modulus as
+// public.
+int bn_one_to_montgomery_small(BN_ULONG *r, size_t num_r,
+                               const BN_MONT_CTX *mont);
+
 // bn_mod_mul_montgomery_small sets |r| to |a| * |b| mod |mont->N|. Both inputs
 // and outputs are in the Montgomery domain. |num_r| must be the length of the
 // modulus, which is |mont->N.top|. This function returns one on success and
diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c
index e8505da..6b562a0 100644
--- a/crypto/fipsmodule/bn/montgomery.c
+++ b/crypto/fipsmodule/bn/montgomery.c
@@ -352,6 +352,29 @@
   return ret;
 }
 
+int bn_one_to_montgomery(BIGNUM *r, const BN_MONT_CTX *mont, BN_CTX *ctx) {
+  // If the high bit of |n| is set, R = 2^(top*BN_BITS2) < 2 * |n|, so we
+  // compute R - |n| rather than perform Montgomery reduction.
+  const BIGNUM *n = &mont->N;
+  if (n->top > 0 && (n->d[n->top - 1] >> (BN_BITS2 - 1)) != 0) {
+    if (!bn_wexpand(r, n->top)) {
+      return 0;
+    }
+    r->d[0] = 0 - n->d[0];
+    for (int i = 1; i < n->top; i++) {
+      r->d[i] = ~n->d[i];
+    }
+    r->top = n->top;
+    r->neg = 0;
+    // The upper words will be zero if the corresponding words of |n| were
+    // 0xfff[...], so call |bn_correct_top|.
+    bn_correct_top(r);
+    return 1;
+  }
+
+  return BN_from_montgomery(r, &mont->RR, mont, ctx);
+}
+
 int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
                           const BN_MONT_CTX *mont, BN_CTX *ctx) {
 #if !defined(OPENSSL_BN_ASM_MONT)
@@ -439,6 +462,28 @@
   return ret;
 }
 
+int bn_one_to_montgomery_small(BN_ULONG *r, size_t num_r,
+                               const BN_MONT_CTX *mont) {
+  const BN_ULONG *n = mont->N.d;
+  size_t num_n = mont->N.top;
+  if (num_n == 0 || num_r != num_n) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+    return 0;
+  }
+
+  // If the high bit of |n| is set, R = 2^(num_n*BN_BITS2) < 2 * |n|, so we
+  // compute R - |n| rather than perform Montgomery reduction.
+  if (num_n > 0 && (n[num_n - 1] >> (BN_BITS2 - 1)) != 0) {
+    r[0] = 0 - n[0];
+    for (size_t i = 1; i < num_n; i++) {
+      r[i] = ~n[i];
+    }
+    return 1;
+  }
+
+  return bn_from_montgomery_small(r, num_r, mont->RR.d, mont->RR.top, mont);
+}
+
 int bn_mod_mul_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
                                 size_t num_a, const BN_ULONG *b, size_t num_b,
                                 const BN_MONT_CTX *mont) {