Extract the single-subtraction reduction into a helper function.

We do this in four different places, with the same long comment, and I'm
about to add yet another one.

Change-Id: If28e3f87ea71020d9b07b92e8947f3848473d99d
Reviewed-on: https://boringssl-review.googlesource.com/26964
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/bn/div.c b/crypto/fipsmodule/bn/div.c
index 2a3bcdd..d29ea34 100644
--- a/crypto/fipsmodule/bn/div.c
+++ b/crypto/fipsmodule/bn/div.c
@@ -414,6 +414,35 @@
   return (d->neg ? BN_sub : BN_add)(r, r, d);
 }
 
+BN_ULONG bn_reduce_once(BN_ULONG *r, const BN_ULONG *a, BN_ULONG carry,
+                        const BN_ULONG *m, size_t num) {
+  assert(r != a);
+  // |r| = |a| - |m|. |bn_sub_words| performs the bulk of the subtraction, and
+  // then we apply the borrow to |carry|.
+  carry -= bn_sub_words(r, a, m, num);
+  // We know 0 <= |a| < 2*|m|, so -|m| <= |r| < |m|.
+  //
+  // If 0 <= |r| < |m|, |r| fits in |num| words and |carry| is zero. We then
+  // wish to select |r| as the answer. Otherwise -m <= r < 0 and we wish to
+  // return |r| + |m|, or |a|. |carry| must then be -1 or all ones. In both
+  // cases, |carry| is a suitable input to |bn_select_words|.
+  //
+  // Although |carry| may be one if it was one on input and |bn_sub_words|
+  // returns zero, this would give |r| > |m|, violating our input assumptions.
+  assert(carry == 0 || carry == (BN_ULONG)-1);
+  bn_select_words(r, carry, a /* r < 0 */, r /* r >= 0 */, num);
+  return carry;
+}
+
+BN_ULONG bn_reduce_once_in_place(BN_ULONG *r, BN_ULONG carry, const BN_ULONG *m,
+                                 BN_ULONG *tmp, size_t num) {
+  // See |bn_reduce_once| for why this logic works.
+  carry -= bn_sub_words(tmp, r, m, num);
+  assert(carry == 0 || carry == (BN_ULONG)-1);
+  bn_select_words(r, carry, r /* tmp < 0 */, tmp /* tmp >= 0 */, num);
+  return carry;
+}
+
 // bn_mod_sub_words sets |r| to |a| - |b| (mod |m|), using |tmp| as scratch
 // space. Each array is |num| words long. |a| and |b| must be < |m|. Any pair of
 // |r|, |a|, and |b| may alias.
@@ -431,27 +460,8 @@
 // |r|, |a|, and |b| may alias.
 static void bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
                              const BN_ULONG *m, BN_ULONG *tmp, size_t num) {
-  // tmp = a + b. Note the result fits in |num|+1 words. We store the extra word
-  // in |carry|.
-  BN_ULONG carry = bn_add_words(tmp, a, b, num);
-  // r = a + b - m. We use |bn_sub_words| to perform the bulk of the
-  // subtraction, and then apply the borrow to |carry|.
-  carry -= bn_sub_words(r, tmp, m, num);
-  // |a| and |b| were both fully-reduced, so we know:
-  //
-  //   0 + 0 - m <= r < m + m - m
-  //          -m <= r < m
-  //
-  // If 0 <= |r| < |m|, |r| fits in |num| words and |carry| is zero. We then
-  // wish to select |r| as the answer. Otherwise -m <= r < 0 and we wish to
-  // return |r| + |m|, or |tmp|. |carry| must then be -1 or all ones. In both
-  // cases, |carry| is a suitable input to |bn_select_words|.
-  //
-  // Although |carry| may be one if |bn_add_words| returns one and
-  // |bn_sub_words| returns zero, this would give |r| > |m|, which violates are
-  // input assumptions.
-  assert(carry == 0 || carry == (BN_ULONG)-1);
-  bn_select_words(r, carry, tmp /* r < 0 */, r /* r >= 0 */, num);
+  BN_ULONG carry = bn_add_words(r, a, b, num);
+  bn_reduce_once_in_place(r, carry, m, tmp, num);
 }
 
 int bn_div_consttime(BIGNUM *quotient, BIGNUM *remainder,
@@ -504,27 +514,14 @@
       // extra word in |carry|.
       BN_ULONG carry = bn_add_words(r->d, r->d, r->d, divisor->width);
       r->d[0] |= (numerator->d[i] >> bit) & 1;
-      // tmp = r - divisor. We use |bn_sub_words| to perform the bulk of the
-      // subtraction, and then apply the borrow to |carry|.
-      carry -= bn_sub_words(tmp->d, r->d, divisor->d, divisor->width);
       // |r| was previously fully-reduced, so we know:
-      //
-      //    2*0 - divisor <= tmp <= 2*(divisor-1) + 1 - divisor
-      //         -divisor <= tmp < divisor
-      //
-      // If 0 <= |tmp| < |divisor|, |tmp| fits in |divisor->width| and |carry|
-      // is zero. We then wish to select |tmp|. Otherwise,
-      // -|divisor| <= |tmp| < 0 and we wish to select |tmp| + |divisor|, which
-      // is |r|. |carry| must then be -1 (all ones). In both cases, |carry| is a
-      // suitable input to |bn_select_words|.
-      //
-      // Although |carry| may be one if |bn_add_words| returns one and
-      // |bn_sub_words| returns zero, this would give |r| > |d|, which violates
-      // the loop invariant.
-      bn_select_words(r->d, carry, r->d /* tmp < 0 */, tmp->d /* tmp >= 0 */,
-                      divisor->width);
+      //      2*0 <= r <= 2*(divisor-1) + 1
+      //        0 <= r <= 2*divisor - 1 < 2*divisor.
+      // Thus |r| satisfies the preconditions for |bn_reduce_once_in_place|.
+      BN_ULONG subtracted = bn_reduce_once_in_place(r->d, carry, divisor->d,
+                                                    tmp->d, divisor->width);
       // The corresponding bit of the quotient is set iff we needed to subtract.
-      q->d[i] |= (~carry & 1) << bit;
+      q->d[i] |= (~subtracted & 1) << bit;
     }
   }
 
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index a8ad129..7d96151 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -375,6 +375,21 @@
 OPENSSL_EXPORT int bn_rshift_secret_shift(BIGNUM *r, const BIGNUM *a,
                                           unsigned n, BN_CTX *ctx);
 
+// bn_reduce_once sets |r| to |a| mod |m| where 0 <= |a| < 2*|m|. It returns
+// zero if |a| < |m| and a mask of all ones if |a| >= |m|. Each array is |num|
+// words long, but |a| has an additional word specified by |carry|. |carry| must
+// be zero or one, as implied by the bounds on |a|.
+//
+// |r|, |a|, and |m| may not alias. Use |bn_reduce_once_in_place| if |r| and |a|
+// must alias.
+BN_ULONG bn_reduce_once(BN_ULONG *r, const BN_ULONG *a, BN_ULONG carry,
+                        const BN_ULONG *m, size_t num);
+
+// bn_reduce_once_in_place behaves like |bn_reduce_once| but acts in-place on
+// |r|, using |tmp| as scratch space. |r|, |tmp|, and |m| may not alias.
+BN_ULONG bn_reduce_once_in_place(BN_ULONG *r, BN_ULONG carry, const BN_ULONG *m,
+                                 BN_ULONG *tmp, size_t num);
+
 
 // Constant-time non-modular arithmetic.
 //
diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c
index d6e2ad5..7ce8c4c 100644
--- a/crypto/fipsmodule/bn/montgomery.c
+++ b/crypto/fipsmodule/bn/montgomery.c
@@ -289,18 +289,7 @@
   a += num_n;
 
   // |a| thus requires at most one additional subtraction |n| to be reduced.
-  // Subtract |n| and select the answer in constant time.
-  OPENSSL_COMPILE_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
-                         crypto_word_t_too_small);
-  BN_ULONG v = bn_sub_words(r, a, n, num_n) - carry;
-  // |v| is one if |a| - |n| underflowed or zero if it did not. Note |v| cannot
-  // be -1. That would imply the subtraction did not fit in |num_n| words, and
-  // we know at most one subtraction is needed.
-  v = 0u - v;
-  for (size_t i = 0; i < num_n; i++) {
-    r[i] = constant_time_select_w(v, a[i], r[i]);
-    a[i] = 0;
-  }
+  bn_reduce_once(r, a, carry, n, num_n);
   return 1;
 }