Use |crypto_word_t| and |size_t| more consistently in ECC scalar recoding.

Use |crypto_word_t| as the type for secret values in scalar recoding.
Use |size_t| as the type of array indexes in scalar recoding. Use
explicit casts where a larger type is (losslessly) truncated to a
smaller type. With this change, |uint64_t| is no longer used in the
p256.c when building in 32-bit mode, |unsigned| is not used in any of
the affected modules, and |uint8_t| and |char| are no longer used for
secret values in the ECC recoding.

When given the choice of doing non-array-indexing arithmetic (e.g. shifts)
on |size_t| values or |crypto_word_t| values, prefer doing it on
|crypto_word_t| values. More generally, try to use |size_t| only for
sizes and array indexes.

This is part of a bigger project to minimize the use of types other than
|crypto_word_t| for secret values. This is also part of a larger project
make the ECC code more consistent.

Avoid changing the loop indexing in the P-256 scalar multiplication from
|int| to |size_t|. The P-224 code does use |size_t| but it is less clear
than the P-256 code where |i - 1| results in a negative/underflowed
value when |i| is zero.

Change-Id: I78cb404455c2340a4f8c9688d36c0d425bfcc50b
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/41685
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
index 836d3ca..18aabb0 100644
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -703,7 +703,8 @@
 int ec_GFp_mont_felem_from_bytes(const EC_GROUP *group, EC_FELEM *out,
                                  const uint8_t *in, size_t len);
 
-void ec_GFp_nistp_recode_scalar_bits(uint8_t *sign, uint8_t *digit, uint8_t in);
+void ec_GFp_nistp_recode_scalar_bits(crypto_word_t *sign, crypto_word_t *digit,
+                                     crypto_word_t in);
 
 const EC_METHOD *EC_GFp_nistp224_method(void);
 const EC_METHOD *EC_GFp_nistp256_method(void);
diff --git a/crypto/fipsmodule/ec/p224-64.c b/crypto/fipsmodule/ec/p224-64.c
index c106de0..da4308c 100644
--- a/crypto/fipsmodule/ec/p224-64.c
+++ b/crypto/fipsmodule/ec/p224-64.c
@@ -866,7 +866,7 @@
 }
 
 // p224_get_bit returns the |i|th bit in |in|
-static char p224_get_bit(const p224_felem_bytearray in, size_t i) {
+static crypto_word_t p224_get_bit(const p224_felem_bytearray in, size_t i) {
   if (i >= 224) {
     return 0;
   }
@@ -977,13 +977,13 @@
 
     // Add every 5 doublings.
     if (i % 5 == 0) {
-      uint64_t bits = p224_get_bit(scalar->bytes, i + 4) << 5;
+      crypto_word_t bits = p224_get_bit(scalar->bytes, i + 4) << 5;
       bits |= p224_get_bit(scalar->bytes, i + 3) << 4;
       bits |= p224_get_bit(scalar->bytes, i + 2) << 3;
       bits |= p224_get_bit(scalar->bytes, i + 1) << 2;
       bits |= p224_get_bit(scalar->bytes, i) << 1;
       bits |= p224_get_bit(scalar->bytes, i - 1);
-      uint8_t sign, digit;
+      crypto_word_t sign, digit;
       ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
 
       // Select the point to add or subtract.
@@ -1022,7 +1022,7 @@
     }
 
     // First, look 28 bits upwards.
-    uint64_t bits = p224_get_bit(scalar->bytes, i + 196) << 3;
+    crypto_word_t bits = p224_get_bit(scalar->bytes, i + 196) << 3;
     bits |= p224_get_bit(scalar->bytes, i + 140) << 2;
     bits |= p224_get_bit(scalar->bytes, i + 84) << 1;
     bits |= p224_get_bit(scalar->bytes, i + 28);
@@ -1080,14 +1080,15 @@
     // Add multiples of the generator.
     if (i <= 27) {
       // First, look 28 bits upwards.
-      uint64_t bits = p224_get_bit(g_scalar->bytes, i + 196) << 3;
+      crypto_word_t bits = p224_get_bit(g_scalar->bytes, i + 196) << 3;
       bits |= p224_get_bit(g_scalar->bytes, i + 140) << 2;
       bits |= p224_get_bit(g_scalar->bytes, i + 84) << 1;
       bits |= p224_get_bit(g_scalar->bytes, i + 28);
 
+      size_t index = (size_t)bits;
       p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
-                     g_p224_pre_comp[1][bits][0], g_p224_pre_comp[1][bits][1],
-                     g_p224_pre_comp[1][bits][2]);
+                     g_p224_pre_comp[1][index][0], g_p224_pre_comp[1][index][1],
+                     g_p224_pre_comp[1][index][2]);
       assert(!skip);
 
       // Second, look at the current position.
@@ -1095,20 +1096,21 @@
       bits |= p224_get_bit(g_scalar->bytes, i + 112) << 2;
       bits |= p224_get_bit(g_scalar->bytes, i + 56) << 1;
       bits |= p224_get_bit(g_scalar->bytes, i);
+      index = (size_t)bits;
       p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
-                     g_p224_pre_comp[0][bits][0], g_p224_pre_comp[0][bits][1],
-                     g_p224_pre_comp[0][bits][2]);
+                     g_p224_pre_comp[0][index][0], g_p224_pre_comp[0][index][1],
+                     g_p224_pre_comp[0][index][2]);
     }
 
     // Incorporate |p_scalar| every 5 doublings.
     if (i % 5 == 0) {
-      uint64_t bits = p224_get_bit(p_scalar->bytes, i + 4) << 5;
+      crypto_word_t bits = p224_get_bit(p_scalar->bytes, i + 4) << 5;
       bits |= p224_get_bit(p_scalar->bytes, i + 3) << 4;
       bits |= p224_get_bit(p_scalar->bytes, i + 2) << 3;
       bits |= p224_get_bit(p_scalar->bytes, i + 1) << 2;
       bits |= p224_get_bit(p_scalar->bytes, i) << 1;
       bits |= p224_get_bit(p_scalar->bytes, i - 1);
-      uint8_t sign, digit;
+      crypto_word_t sign, digit;
       ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
 
       // Select the point to add or subtract.
diff --git a/crypto/fipsmodule/ec/p256-x86_64.c b/crypto/fipsmodule/ec/p256-x86_64.c
index 973130f..29ae193 100644
--- a/crypto/fipsmodule/ec/p256-x86_64.c
+++ b/crypto/fipsmodule/ec/p256-x86_64.c
@@ -50,8 +50,8 @@
 
 // Recode window to a signed digit, see |ec_GFp_nistp_recode_scalar_bits| in
 // util.c for details
-static unsigned booth_recode_w5(unsigned in) {
-  unsigned s, d;
+static crypto_word_t booth_recode_w5(crypto_word_t in) {
+  crypto_word_t s, d;
 
   s = ~((in >> 5) - 1);
   d = (1 << 6) - in - 1;
@@ -61,8 +61,8 @@
   return (d << 1) + (s & 1);
 }
 
-static unsigned booth_recode_w7(unsigned in) {
-  unsigned s, d;
+static crypto_word_t booth_recode_w7(crypto_word_t in) {
+  crypto_word_t s, d;
 
   s = ~((in >> 7) - 1);
   d = (1 << 8) - in - 1;
@@ -194,8 +194,8 @@
   assert(p_scalar != NULL);
   assert(group->field.width == P256_LIMBS);
 
-  static const unsigned kWindowSize = 5;
-  static const unsigned kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
+  static const size_t kWindowSize = 5;
+  static const crypto_word_t kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
 
   // A |P256_POINT| is (3 * 32) = 96 bytes, and the 64-byte alignment should
   // add no more than 63 bytes of overhead. Thus, |table| should require
@@ -232,17 +232,17 @@
 
   BN_ULONG tmp[P256_LIMBS];
   alignas(32) P256_POINT h;
-  unsigned index = 255;
-  unsigned wvalue = p_str[(index - 1) / 8];
+  size_t index = 255;
+  crypto_word_t wvalue = p_str[(index - 1) / 8];
   wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
 
   ecp_nistz256_select_w5(r, table, booth_recode_w5(wvalue) >> 1);
 
   while (index >= 5) {
     if (index != 255) {
-      unsigned off = (index - 1) / 8;
+      size_t off = (index - 1) / 8;
 
-      wvalue = p_str[off] | p_str[off + 1] << 8;
+      wvalue = (crypto_word_t)p_str[off] | (crypto_word_t)p_str[off + 1] << 8;
       wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
 
       wvalue = booth_recode_w5(wvalue);
@@ -283,21 +283,22 @@
   P256_POINT_AFFINE a;
 } p256_point_union_t;
 
-static unsigned calc_first_wvalue(unsigned *index, const uint8_t p_str[33]) {
-  static const unsigned kWindowSize = 7;
-  static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
+static crypto_word_t calc_first_wvalue(size_t *index, const uint8_t p_str[33]) {
+  static const size_t kWindowSize = 7;
+  static const crypto_word_t kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
   *index = kWindowSize;
 
-  unsigned wvalue = (p_str[0] << 1) & kMask;
+  crypto_word_t wvalue = (p_str[0] << 1) & kMask;
   return booth_recode_w7(wvalue);
 }
 
-static unsigned calc_wvalue(unsigned *index, const uint8_t p_str[33]) {
-  static const unsigned kWindowSize = 7;
-  static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
+static crypto_word_t calc_wvalue(size_t *index, const uint8_t p_str[33]) {
+  static const size_t kWindowSize = 7;
+  static const crypto_word_t kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
 
-  const unsigned off = (*index - 1) / 8;
-  unsigned wvalue = p_str[off] | p_str[off + 1] << 8;
+  const size_t off = (*index - 1) / 8;
+  crypto_word_t wvalue =
+      (crypto_word_t)p_str[off] | (crypto_word_t)p_str[off + 1] << 8;
   wvalue = (wvalue >> ((*index - 1) % 8)) & kMask;
   *index += kWindowSize;
 
@@ -325,8 +326,8 @@
   p_str[32] = 0;
 
   // First window
-  unsigned index = 0;
-  unsigned wvalue = calc_first_wvalue(&index, p_str);
+  size_t index = 0;
+  crypto_word_t wvalue = calc_first_wvalue(&index, p_str);
 
   ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], wvalue >> 1);
   ecp_nistz256_neg(p.p.Z, p.p.Y);
@@ -370,8 +371,8 @@
   p_str[32] = 0;
 
   // First window
-  unsigned index = 0;
-  unsigned wvalue = calc_first_wvalue(&index, p_str);
+  size_t index = 0;
+  size_t wvalue = calc_first_wvalue(&index, p_str);
 
   // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
   // is infinity and |ONE| otherwise. |p| was computed from the table, so it
diff --git a/crypto/fipsmodule/ec/p256.c b/crypto/fipsmodule/ec/p256.c
index 9355ac1..9f5694c 100644
--- a/crypto/fipsmodule/ec/p256.c
+++ b/crypto/fipsmodule/ec/p256.c
@@ -67,7 +67,7 @@
 
 static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
                            const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
-  for (int i = 0; i < FIAT_P256_NLIMBS; i++) {
+  for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) {
     out[i] = in1[i];
   }
 }
@@ -393,7 +393,7 @@
 }
 
 // fiat_p256_get_bit returns the |i|th bit in |in|
-static char fiat_p256_get_bit(const uint8_t *in, int i) {
+static crypto_word_t fiat_p256_get_bit(const uint8_t *in, int i) {
   if (i < 0 || i >= 256) {
     return 0;
   }
@@ -498,20 +498,20 @@
 
     // do other additions every 5 doublings
     if (i % 5 == 0) {
-      uint64_t bits = fiat_p256_get_bit(scalar->bytes, i + 4) << 5;
+      crypto_word_t bits = fiat_p256_get_bit(scalar->bytes, i + 4) << 5;
       bits |= fiat_p256_get_bit(scalar->bytes, i + 3) << 4;
       bits |= fiat_p256_get_bit(scalar->bytes, i + 2) << 3;
       bits |= fiat_p256_get_bit(scalar->bytes, i + 1) << 2;
       bits |= fiat_p256_get_bit(scalar->bytes, i) << 1;
       bits |= fiat_p256_get_bit(scalar->bytes, i - 1);
-      uint8_t sign, digit;
+      crypto_word_t sign, digit;
       ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
 
       // select the point to add or subtract, in constant time.
-      fiat_p256_select_point(digit, 17, (const fiat_p256_felem(*)[3])p_pre_comp,
-                             tmp);
+      fiat_p256_select_point((fiat_p256_limb_t)digit, 17,
+                             (const fiat_p256_felem(*)[3])p_pre_comp, tmp);
       fiat_p256_opp(ftmp, tmp[1]);  // (X, -Y, Z) is the negative point.
-      fiat_p256_cmovznz(tmp[1], sign, tmp[1], ftmp);
+      fiat_p256_cmovznz(tmp[1], (fiat_p256_limb_t)sign, tmp[1], ftmp);
 
       if (!skip) {
         fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
@@ -543,12 +543,13 @@
     }
 
     // First, look 32 bits upwards.
-    uint64_t bits = fiat_p256_get_bit(scalar->bytes, i + 224) << 3;
+    crypto_word_t bits = fiat_p256_get_bit(scalar->bytes, i + 224) << 3;
     bits |= fiat_p256_get_bit(scalar->bytes, i + 160) << 2;
     bits |= fiat_p256_get_bit(scalar->bytes, i + 96) << 1;
     bits |= fiat_p256_get_bit(scalar->bytes, i + 32);
     // Select the point to add, in constant time.
-    fiat_p256_select_point_affine(bits, 15, fiat_p256_g_pre_comp[1], tmp);
+    fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
+                                  fiat_p256_g_pre_comp[1], tmp);
 
     if (!skip) {
       fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
@@ -566,7 +567,8 @@
     bits |= fiat_p256_get_bit(scalar->bytes, i + 64) << 1;
     bits |= fiat_p256_get_bit(scalar->bytes, i);
     // Select the point to add, in constant time.
-    fiat_p256_select_point_affine(bits, 15, fiat_p256_g_pre_comp[0], tmp);
+    fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
+                                  fiat_p256_g_pre_comp[0], tmp);
     fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
                         tmp[0], tmp[1], tmp[2]);
   }
@@ -613,14 +615,15 @@
     // constant-time lookup.
     if (i <= 31) {
       // First, look 32 bits upwards.
-      uint64_t bits = fiat_p256_get_bit(g_scalar->bytes, i + 224) << 3;
+      crypto_word_t bits = fiat_p256_get_bit(g_scalar->bytes, i + 224) << 3;
       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 160) << 2;
       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 96) << 1;
       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 32);
       if (bits != 0) {
+        size_t index = (size_t)(bits - 1);
         fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
-                            1 /* mixed */, fiat_p256_g_pre_comp[1][bits - 1][0],
-                            fiat_p256_g_pre_comp[1][bits - 1][1],
+                            1 /* mixed */, fiat_p256_g_pre_comp[1][index][0],
+                            fiat_p256_g_pre_comp[1][index][1],
                             fiat_p256_one);
         skip = 0;
       }
@@ -631,9 +634,10 @@
       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 64) << 1;
       bits |= fiat_p256_get_bit(g_scalar->bytes, i);
       if (bits != 0) {
+        size_t index = (size_t)(bits - 1);
         fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
-                            1 /* mixed */, fiat_p256_g_pre_comp[0][bits - 1][0],
-                            fiat_p256_g_pre_comp[0][bits - 1][1],
+                            1 /* mixed */, fiat_p256_g_pre_comp[0][index][0],
+                            fiat_p256_g_pre_comp[0][index][1],
                             fiat_p256_one);
         skip = 0;
       }
@@ -642,7 +646,7 @@
     int digit = p_wNAF[i];
     if (digit != 0) {
       assert(digit & 1);
-      int idx = digit < 0 ? (-digit) >> 1 : digit >> 1;
+      size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1);
       fiat_p256_felem *y = &p_pre_comp[idx][1], tmp;
       if (digit < 0) {
         fiat_p256_opp(tmp, p_pre_comp[idx][1]);
diff --git a/crypto/fipsmodule/ec/simple_mul.c b/crypto/fipsmodule/ec/simple_mul.c
index 127e2b3..0e6384e 100644
--- a/crypto/fipsmodule/ec/simple_mul.c
+++ b/crypto/fipsmodule/ec/simple_mul.c
@@ -108,7 +108,7 @@
   if (i > 0) {
     window |= bn_is_bit_set_words(scalar->words, width, i - 1);
   }
-  uint8_t sign, digit;
+  crypto_word_t sign, digit;
   ec_GFp_nistp_recode_scalar_bits(&sign, &digit, window);
 
   // Select the entry in constant-time.
@@ -121,7 +121,7 @@
   // Negate if necessary.
   EC_FELEM neg_Y;
   ec_felem_neg(group, &neg_Y, &out->Y);
-  BN_ULONG sign_mask = sign;
+  crypto_word_t sign_mask = sign;
   sign_mask = 0u - sign_mask;
   ec_felem_select(group, &out->Y, sign_mask, &neg_Y, &out->Y);
 }
diff --git a/crypto/fipsmodule/ec/util.c b/crypto/fipsmodule/ec/util.c
index 4f39f18..c4323f2 100644
--- a/crypto/fipsmodule/ec/util.c
+++ b/crypto/fipsmodule/ec/util.c
@@ -240,9 +240,9 @@
 //   P-384: ...01110011; w = 2, 5, 6, 7 are okay
 //   P-256: ...01010001; w = 5, 7 are okay
 //   P-224: ...00111101; w = 3, 4, 5, 6 are okay
-void ec_GFp_nistp_recode_scalar_bits(uint8_t *sign, uint8_t *digit,
-                                     uint8_t in) {
-  uint8_t s, d;
+void ec_GFp_nistp_recode_scalar_bits(crypto_word_t *sign, crypto_word_t *digit,
+                                     crypto_word_t in) {
+  crypto_word_t s, d;
 
   s = ~((in >> 5) - 1); /* sets all bits to MSB(in), 'in' seen as
                           * 6-bit value */