Make copy_from_prebuf constant time.

(Imported from upstream's 708dc2f1291e104fe4eef810bb8ffc1fae5b19c1.)

Performance penalty varies from platform to platform, and even key
length. For rsa2048 sign it was observed to reach almost 10%.

This is part of the fix for CVE-2016-0702.

Change-Id: Ie0860bf3e531196f03102db1bc48eeaf30ab1d58
Reviewed-on: https://boringssl-review.googlesource.com/7241
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/bn/exponentiation.c b/crypto/bn/exponentiation.c
index b168347..3ab574f 100644
--- a/crypto/bn/exponentiation.c
+++ b/crypto/bn/exponentiation.c
@@ -788,29 +788,65 @@
  * pattern as far as cache lines are concerned. The following functions are
  * used to transfer a BIGNUM from/to that table. */
 static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx,
-                          int width) {
-  size_t i, j;
+                          int window) {
+  int i, j;
+  const int width = 1 << window;
+  BN_ULONG *table = (BN_ULONG *) buf;
 
   if (top > b->top) {
     top = b->top; /* this works because 'buf' is explicitly zeroed */
   }
-  for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
-    buf[j] = ((unsigned char *)b->d)[i];
+
+  for (i = 0, j = idx; i < top; i++, j += width)  {
+    table[j] = b->d[i];
   }
 
   return 1;
 }
 
 static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
-                            int width) {
-  size_t i, j;
+                            int window) {
+  int i, j;
+  const int width = 1 << window;
+  volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
 
   if (bn_wexpand(b, top) == NULL) {
     return 0;
   }
 
-  for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
-    ((unsigned char *)b->d)[i] = buf[j];
+  if (window <= 3) {
+    for (i = 0; i < top; i++, table += width) {
+      BN_ULONG acc = 0;
+
+      for (j = 0; j < width; j++) {
+        acc |= table[j] & ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
+      }
+
+      b->d[i] = acc;
+    }
+  } else {
+    int xstride = 1 << (window - 2);
+    BN_ULONG y0, y1, y2, y3;
+
+    i = idx >> (window - 2); /* equivalent of idx / xstride */
+    idx &= xstride - 1;      /* equivalent of idx % xstride */
+
+    y0 = (BN_ULONG)0 - (constant_time_eq_int(i, 0) & 1);
+    y1 = (BN_ULONG)0 - (constant_time_eq_int(i, 1) & 1);
+    y2 = (BN_ULONG)0 - (constant_time_eq_int(i, 2) & 1);
+    y3 = (BN_ULONG)0 - (constant_time_eq_int(i, 3) & 1);
+
+    for (i = 0; i < top; i++, table += width) {
+      BN_ULONG acc = 0;
+
+      for (j = 0; j < xstride; j++) {
+        acc |= ((table[j + 0 * xstride] & y0) | (table[j + 1 * xstride] & y1) |
+                (table[j + 2 * xstride] & y2) | (table[j + 3 * xstride] & y3)) &
+               ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
+      }
+
+      b->d[i] = acc;
+    }
   }
 
   b->top = top;
@@ -1129,8 +1165,8 @@
   } else
 #endif
   {
-    if (!copy_to_prebuf(&tmp, top, powerbuf, 0, numPowers) ||
-        !copy_to_prebuf(&am, top, powerbuf, 1, numPowers)) {
+    if (!copy_to_prebuf(&tmp, top, powerbuf, 0, window) ||
+        !copy_to_prebuf(&am, top, powerbuf, 1, window)) {
       goto err;
     }
 
@@ -1141,13 +1177,13 @@
      */
     if (window > 1) {
       if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx) ||
-          !copy_to_prebuf(&tmp, top, powerbuf, 2, numPowers)) {
+          !copy_to_prebuf(&tmp, top, powerbuf, 2, window)) {
         goto err;
       }
       for (i = 3; i < numPowers; i++) {
         /* Calculate a^i = a^(i-1) * a */
         if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx) ||
-            !copy_to_prebuf(&tmp, top, powerbuf, i, numPowers)) {
+            !copy_to_prebuf(&tmp, top, powerbuf, i, window)) {
           goto err;
         }
       }
@@ -1157,7 +1193,7 @@
     for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) {
       wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
     }
-    if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, numPowers)) {
+    if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, window)) {
       goto err;
     }
 
@@ -1176,7 +1212,7 @@
       }
 
       /* Fetch the appropriate pre-computed value from the pre-buf */
-      if (!copy_from_prebuf(&am, top, powerbuf, wvalue, numPowers)) {
+      if (!copy_from_prebuf(&am, top, powerbuf, wvalue, window)) {
         goto err;
       }