Remove unions in BLAKE2b implementation

Bug: 574
Change-Id: Ief8030a4c9257a05defdc2dc02d496c63f06626a
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/59545
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/blake2/blake2.c b/crypto/blake2/blake2.c
index 848ed4f..14bbe5b 100644
--- a/crypto/blake2/blake2.c
+++ b/crypto/blake2/blake2.c
@@ -55,10 +55,13 @@
   v[b] = CRYPTO_rotr_u64(v[b] ^ v[c], 63);
 }
 
-static void blake2b_transform(
-    BLAKE2B_CTX *b2b,
-    const uint64_t block_words[BLAKE2B_CBLOCK / sizeof(uint64_t)],
-    size_t num_bytes, int is_final_block) {
+static uint64_t blake2b_load(const uint8_t block[BLAKE2B_CBLOCK], size_t i) {
+  return CRYPTO_load_u64_le(block + 8 * i);
+}
+
+static void blake2b_transform(BLAKE2B_CTX *b2b,
+                              const uint8_t block[BLAKE2B_CBLOCK],
+                              size_t num_bytes, int is_final_block) {
   // https://tools.ietf.org/html/rfc7693#section-3.2
   uint64_t v[16];
   static_assert(sizeof(v) == sizeof(b2b->h) + sizeof(kIV), "");
@@ -78,14 +81,22 @@
 
   for (int round = 0; round < 12; round++) {
     const uint8_t *const s = &kSigma[16 * (round % 10)];
-    blake2b_mix(v, 0, 4, 8, 12, block_words[s[0]], block_words[s[1]]);
-    blake2b_mix(v, 1, 5, 9, 13, block_words[s[2]], block_words[s[3]]);
-    blake2b_mix(v, 2, 6, 10, 14, block_words[s[4]], block_words[s[5]]);
-    blake2b_mix(v, 3, 7, 11, 15, block_words[s[6]], block_words[s[7]]);
-    blake2b_mix(v, 0, 5, 10, 15, block_words[s[8]], block_words[s[9]]);
-    blake2b_mix(v, 1, 6, 11, 12, block_words[s[10]], block_words[s[11]]);
-    blake2b_mix(v, 2, 7, 8, 13, block_words[s[12]], block_words[s[13]]);
-    blake2b_mix(v, 3, 4, 9, 14, block_words[s[14]], block_words[s[15]]);
+    blake2b_mix(v, 0, 4, 8, 12, blake2b_load(block, s[0]),
+                blake2b_load(block, s[1]));
+    blake2b_mix(v, 1, 5, 9, 13, blake2b_load(block, s[2]),
+                blake2b_load(block, s[3]));
+    blake2b_mix(v, 2, 6, 10, 14, blake2b_load(block, s[4]),
+                blake2b_load(block, s[5]));
+    blake2b_mix(v, 3, 7, 11, 15, blake2b_load(block, s[6]),
+                blake2b_load(block, s[7]));
+    blake2b_mix(v, 0, 5, 10, 15, blake2b_load(block, s[8]),
+                blake2b_load(block, s[9]));
+    blake2b_mix(v, 1, 6, 11, 12, blake2b_load(block, s[10]),
+                blake2b_load(block, s[11]));
+    blake2b_mix(v, 2, 7, 8, 13, blake2b_load(block, s[12]),
+                blake2b_load(block, s[13]));
+    blake2b_mix(v, 3, 4, 9, 14, blake2b_load(block, s[14]),
+                blake2b_load(block, s[15]));
   }
 
   for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(b2b->h); i++) {
@@ -111,11 +122,11 @@
   }
 
   const uint8_t *data = in_data;
-  size_t todo = sizeof(b2b->block.bytes) - b2b->block_used;
+  size_t todo = sizeof(b2b->block) - b2b->block_used;
   if (todo > len) {
     todo = len;
   }
-  OPENSSL_memcpy(&b2b->block.bytes[b2b->block_used], data, todo);
+  OPENSSL_memcpy(&b2b->block[b2b->block_used], data, todo);
   b2b->block_used += todo;
   data += todo;
   len -= todo;
@@ -126,26 +137,24 @@
 
   // More input remains therefore we must have filled |b2b->block|.
   assert(b2b->block_used == BLAKE2B_CBLOCK);
-  blake2b_transform(b2b, b2b->block.words, BLAKE2B_CBLOCK,
+  blake2b_transform(b2b, b2b->block, BLAKE2B_CBLOCK,
                     /*is_final_block=*/0);
   b2b->block_used = 0;
 
   while (len > BLAKE2B_CBLOCK) {
-    uint64_t block_words[BLAKE2B_CBLOCK / sizeof(uint64_t)];
-    OPENSSL_memcpy(block_words, data, sizeof(block_words));
-    blake2b_transform(b2b, block_words, BLAKE2B_CBLOCK, /*is_final_block=*/0);
+    blake2b_transform(b2b, data, BLAKE2B_CBLOCK, /*is_final_block=*/0);
     data += BLAKE2B_CBLOCK;
     len -= BLAKE2B_CBLOCK;
   }
 
-  OPENSSL_memcpy(b2b->block.bytes, data, len);
+  OPENSSL_memcpy(b2b->block, data, len);
   b2b->block_used = len;
 }
 
 void BLAKE2B256_Final(uint8_t out[BLAKE2B256_DIGEST_LENGTH], BLAKE2B_CTX *b2b) {
-  OPENSSL_memset(&b2b->block.bytes[b2b->block_used], 0,
-                 sizeof(b2b->block.bytes) - b2b->block_used);
-  blake2b_transform(b2b, b2b->block.words, b2b->block_used,
+  OPENSSL_memset(&b2b->block[b2b->block_used], 0,
+                 sizeof(b2b->block) - b2b->block_used);
+  blake2b_transform(b2b, b2b->block, b2b->block_used,
                     /*is_final_block=*/1);
   static_assert(BLAKE2B256_DIGEST_LENGTH <= sizeof(b2b->h), "");
   memcpy(out, b2b->h, BLAKE2B256_DIGEST_LENGTH);
diff --git a/include/openssl/blake2.h b/include/openssl/blake2.h
index 9ec1e6c..03e3a46 100644
--- a/include/openssl/blake2.h
+++ b/include/openssl/blake2.h
@@ -28,10 +28,7 @@
 struct blake2b_state_st {
   uint64_t h[8];
   uint64_t t_low, t_high;
-  union {
-    uint8_t bytes[BLAKE2B_CBLOCK];
-    uint64_t words[16];
-  } block;
+  uint8_t block[BLAKE2B_CBLOCK];
   size_t block_used;
 };