Make md32_common.h single-included and use an unsized helper for SHA-256.

Similar to
https://boringssl-review.googlesource.com/c/boringssl/+/46405,
SHA256_Final and SHA224_Final hit array size warnings in the new GCC.
The array sizes are, strictly speaking, purely decoration, but this is a
good warning so we should be clean with it on.

That same change is difficult to apply to md32_common.h because
md32_common.h generates the functions for us. md32_common.h is already
strange in that it is multiply-included and changes behavior based on
macros defined by the caller.

Instead, replace it with inline functions, which are a bit more
conventional and typesafe. This allows each hash function to define the
function prototype. Use this to add an unsized helper for SHA-256.

Bug: 402
Change-Id: I61bc30fb58c54dd40a55c9b1ebf3fb9adde5e038
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/47807
Reviewed-by: Adam Langley <agl@google.com>
Reviewed-by: Peter Foley <pefoley@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/digest/md32_common.h b/crypto/fipsmodule/digest/md32_common.h
index 8a14108..129ec48 100644
--- a/crypto/fipsmodule/digest/md32_common.h
+++ b/crypto/fipsmodule/digest/md32_common.h
@@ -46,6 +46,9 @@
  * OF THE POSSIBILITY OF SUCH DAMAGE.
  * ==================================================================== */
 
+#ifndef OPENSSL_HEADER_DIGEST_MD32_COMMON_H
+#define OPENSSL_HEADER_DIGEST_MD32_COMMON_H
+
 #include <openssl/base.h>
 
 #include <assert.h>
@@ -59,22 +62,15 @@
 
 // This is a generic 32-bit "collector" for message digest algorithms. It
 // collects input character stream into chunks of 32-bit values and invokes the
-// block function that performs the actual hash calculations. To make use of
-// this mechanism, the following macros must be defined before including
-// md32_common.h.
+// block function that performs the actual hash calculations.
 //
-// One of |DATA_ORDER_IS_BIG_ENDIAN| or |DATA_ORDER_IS_LITTLE_ENDIAN| must be
-// defined to specify the byte order of the input stream.
-//
-// |HASH_CBLOCK| must be defined as the integer block size, in bytes.
-//
-// |HASH_CTX| must be defined as the name of the context structure, which must
-// have at least the following members:
+// To make use of this mechanism, the hash context should be defined with the
+// following parameters.
 //
 //     typedef struct <name>_state_st {
 //       uint32_t h[<chaining length> / sizeof(uint32_t)];
 //       uint32_t Nl, Nh;
-//       uint8_t data[HASH_CBLOCK];
+//       uint8_t data[<block size>];
 //       unsigned num;
 //       ...
 //     } <NAME>_CTX;
@@ -83,136 +79,117 @@
 // any truncation (e.g. 64 for SHA-224 and SHA-256, 128 for SHA-384 and
 // SHA-512).
 //
-// |HASH_UPDATE| must be defined as the name of the "Update" function to
-// generate.
-//
-// |HASH_FINAL| must be defined as the name of "Final" function to generate.
-//
-// |HASH_BLOCK_DATA_ORDER| must be defined as the name of the "Block" function.
-// That function must be implemented manually. It must be capable of operating
-// on *unaligned* input data in its original (data) byte order. It must have
-// this signature:
-//
-//     void HASH_BLOCK_DATA_ORDER(uint32_t *state, const uint8_t *data,
-//                                size_t num);
-//
-// It must update the hash state |state| with |num| blocks of data from |data|,
-// where each block is |HASH_CBLOCK| bytes; i.e. |data| points to a array of
-// |HASH_CBLOCK * num| bytes. |state| points to the |h| member of a |HASH_CTX|,
-// and so will have |<chaining length> / sizeof(uint32_t)| elements.
-//
-// |HASH_MAKE_STRING(c, s)| must be defined as a block statement that converts
-// the hash state |c->h| into the output byte order, storing the result in |s|.
+// |h| is the hash state and is updated by a function of type
+// |crypto_md32_block_func|. |data| is the partial unprocessed block and has
+// |num| bytes. |Nl| and |Nh| maintain the number of bits processed so far.
 
-#if !defined(DATA_ORDER_IS_BIG_ENDIAN) && !defined(DATA_ORDER_IS_LITTLE_ENDIAN)
-#error "DATA_ORDER must be defined!"
-#endif
+// A crypto_md32_block_func should incorporate |num_blocks| of input from |data|
+// into |state|. It is assumed the caller has sized |state| and |data| for the
+// hash function.
+typedef void (*crypto_md32_block_func)(uint32_t *state, const uint8_t *data,
+                                       size_t num_blocks);
 
-#ifndef HASH_CBLOCK
-#error "HASH_CBLOCK must be defined!"
-#endif
-#ifndef HASH_CTX
-#error "HASH_CTX must be defined!"
-#endif
-
-#ifndef HASH_UPDATE
-#error "HASH_UPDATE must be defined!"
-#endif
-#ifndef HASH_FINAL
-#error "HASH_FINAL must be defined!"
-#endif
-
-#ifndef HASH_BLOCK_DATA_ORDER
-#error "HASH_BLOCK_DATA_ORDER must be defined!"
-#endif
-
-#ifndef HASH_MAKE_STRING
-#error "HASH_MAKE_STRING must be defined!"
-#endif
-
-int HASH_UPDATE(HASH_CTX *c, const void *data_, size_t len) {
-  const uint8_t *data = data_;
-
+// crypto_md32_update adds |len| bytes from |in| to the digest. |data| must be a
+// buffer of length |block_size| with the first |*num| bytes containing a
+// partial block. This function combines the partial block with |in| and
+// incorporates any complete blocks into the digest state |h|. It then updates
+// |data| and |*num| with the new partial block and updates |*Nh| and |*Nl| with
+// the data consumed.
+static inline void crypto_md32_update(crypto_md32_block_func block_func,
+                                      uint32_t *h, uint8_t *data,
+                                      size_t block_size, unsigned *num,
+                                      uint32_t *Nh, uint32_t *Nl,
+                                      const uint8_t *in, size_t len) {
   if (len == 0) {
-    return 1;
+    return;
   }
 
-  uint32_t l = c->Nl + (((uint32_t)len) << 3);
-  if (l < c->Nl) {
+  uint32_t l = *Nl + (((uint32_t)len) << 3);
+  if (l < *Nl) {
     // Handle carries.
-    c->Nh++;
+    (*Nh)++;
   }
-  c->Nh += (uint32_t)(len >> 29);
-  c->Nl = l;
+  *Nh += (uint32_t)(len >> 29);
+  *Nl = l;
 
-  size_t n = c->num;
+  size_t n = *num;
   if (n != 0) {
-    if (len >= HASH_CBLOCK || len + n >= HASH_CBLOCK) {
-      OPENSSL_memcpy(c->data + n, data, HASH_CBLOCK - n);
-      HASH_BLOCK_DATA_ORDER(c->h, c->data, 1);
-      n = HASH_CBLOCK - n;
-      data += n;
+    if (len >= block_size || len + n >= block_size) {
+      OPENSSL_memcpy(data + n, in, block_size - n);
+      block_func(h, data, 1);
+      n = block_size - n;
+      in += n;
       len -= n;
-      c->num = 0;
-      // Keep |c->data| zeroed when unused.
-      OPENSSL_memset(c->data, 0, HASH_CBLOCK);
+      *num = 0;
+      // Keep |data| zeroed when unused.
+      OPENSSL_memset(data, 0, block_size);
     } else {
-      OPENSSL_memcpy(c->data + n, data, len);
-      c->num += (unsigned)len;
-      return 1;
+      OPENSSL_memcpy(data + n, in, len);
+      *num += (unsigned)len;
+      return;
     }
   }
 
-  n = len / HASH_CBLOCK;
+  n = len / block_size;
   if (n > 0) {
-    HASH_BLOCK_DATA_ORDER(c->h, data, n);
-    n *= HASH_CBLOCK;
-    data += n;
+    block_func(h, in, n);
+    n *= block_size;
+    in += n;
     len -= n;
   }
 
   if (len != 0) {
-    c->num = (unsigned)len;
-    OPENSSL_memcpy(c->data, data, len);
+    *num = (unsigned)len;
+    OPENSSL_memcpy(data, in, len);
   }
-  return 1;
 }
 
-
-int HASH_FINAL(uint8_t out[HASH_DIGEST_LENGTH], HASH_CTX *c) {
-  // |c->data| always has room for at least one byte. A full block would have
+// crypto_md32_final incorporates the partial block and trailing length into the
+// digest state |h|. The trailing length is encoded in little-endian if
+// |is_big_endian| is zero and big-endian otherwise. |data| must be a buffer of
+// length |block_size| with the first |*num| bytes containing a partial block.
+// |Nh| and |Nl| contain the total number of bits processed. On return, this
+// function clears the partial block in |data| and
+// |*num|.
+//
+// This function does not serialize |h| into a final digest. This is the
+// responsibility of the caller.
+static inline void crypto_md32_final(crypto_md32_block_func block_func,
+                                     uint32_t *h, uint8_t *data,
+                                     size_t block_size, unsigned *num,
+                                     uint32_t Nh, uint32_t Nl,
+                                     int is_big_endian) {
+  // |data| always has room for at least one byte. A full block would have
   // been consumed.
-  size_t n = c->num;
-  assert(n < HASH_CBLOCK);
-  c->data[n] = 0x80;
+  size_t n = *num;
+  assert(n < block_size);
+  data[n] = 0x80;
   n++;
 
   // Fill the block with zeros if there isn't room for a 64-bit length.
-  if (n > (HASH_CBLOCK - 8)) {
-    OPENSSL_memset(c->data + n, 0, HASH_CBLOCK - n);
+  if (n > block_size - 8) {
+    OPENSSL_memset(data + n, 0, block_size - n);
     n = 0;
-    HASH_BLOCK_DATA_ORDER(c->h, c->data, 1);
+    block_func(h, data, 1);
   }
-  OPENSSL_memset(c->data + n, 0, HASH_CBLOCK - 8 - n);
+  OPENSSL_memset(data + n, 0, block_size - 8 - n);
 
   // Append a 64-bit length to the block and process it.
-  uint8_t *p = c->data + HASH_CBLOCK - 8;
-#if defined(DATA_ORDER_IS_BIG_ENDIAN)
-  CRYPTO_store_u32_be(p, c->Nh);
-  CRYPTO_store_u32_be(p + 4, c->Nl);
-#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
-  CRYPTO_store_u32_le(p, c->Nl);
-  CRYPTO_store_u32_le(p + 4, c->Nh);
-#endif
-  HASH_BLOCK_DATA_ORDER(c->h, c->data, 1);
-  c->num = 0;
-  OPENSSL_memset(c->data, 0, HASH_CBLOCK);
-
-  HASH_MAKE_STRING(c, out);
-  return 1;
+  if (is_big_endian) {
+    CRYPTO_store_u32_be(data + block_size - 8, Nh);
+    CRYPTO_store_u32_be(data + block_size - 4, Nl);
+  } else {
+    CRYPTO_store_u32_le(data + block_size - 8, Nl);
+    CRYPTO_store_u32_le(data + block_size - 4, Nh);
+  }
+  block_func(h, data, 1);
+  *num = 0;
+  OPENSSL_memset(data, 0, block_size);
 }
 
 
 #if defined(__cplusplus)
 }  // extern C
 #endif
+
+#endif  // OPENSSL_HEADER_DIGEST_MD32_COMMON_H
diff --git a/crypto/fipsmodule/md4/md4.c b/crypto/fipsmodule/md4/md4.c
index 9d229bf..8779402 100644
--- a/crypto/fipsmodule/md4/md4.c
+++ b/crypto/fipsmodule/md4/md4.c
@@ -60,6 +60,7 @@
 #include <string.h>
 
 #include "../../internal.h"
+#include "../digest/md32_common.h"
 
 
 uint8_t *MD4(const uint8_t *data, size_t len, uint8_t out[MD4_DIGEST_LENGTH]) {
@@ -88,27 +89,22 @@
   md4_block_data_order(c->h, data, 1);
 }
 
-#define DATA_ORDER_IS_LITTLE_ENDIAN
+int MD4_Update(MD4_CTX *c, const void *data, size_t len) {
+  crypto_md32_update(&md4_block_data_order, c->h, c->data, MD4_CBLOCK, &c->num,
+                     &c->Nh, &c->Nl, data, len);
+  return 1;
+}
 
-#define HASH_CTX MD4_CTX
-#define HASH_CBLOCK 64
-#define HASH_DIGEST_LENGTH 16
-#define HASH_UPDATE MD4_Update
-#define HASH_FINAL MD4_Final
-#define HASH_MAKE_STRING(c, s)           \
-  do {                                   \
-    CRYPTO_store_u32_le((s), (c)->h[0]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_le((s), (c)->h[1]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_le((s), (c)->h[2]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_le((s), (c)->h[3]); \
-    (s) += 4;                            \
-  } while (0)
-#define HASH_BLOCK_DATA_ORDER md4_block_data_order
+int MD4_Final(uint8_t out[MD4_DIGEST_LENGTH], MD4_CTX *c) {
+  crypto_md32_final(&md4_block_data_order, c->h, c->data, MD4_CBLOCK, &c->num,
+                    c->Nh, c->Nl, /*is_big_endian=*/0);
 
-#include "../digest/md32_common.h"
+  CRYPTO_store_u32_le(out, c->h[0]);
+  CRYPTO_store_u32_le(out + 4, c->h[1]);
+  CRYPTO_store_u32_le(out + 8, c->h[2]);
+  CRYPTO_store_u32_le(out + 12, c->h[3]);
+  return 1;
+}
 
 // As pointed out by Wei Dai <weidai@eskimo.com>, the above can be
 // simplified to the code below.  Wei attributes these optimizations
@@ -238,14 +234,6 @@
   }
 }
 
-#undef DATA_ORDER_IS_LITTLE_ENDIAN
-#undef HASH_CTX
-#undef HASH_CBLOCK
-#undef HASH_DIGEST_LENGTH
-#undef HASH_UPDATE
-#undef HASH_FINAL
-#undef HASH_MAKE_STRING
-#undef HASH_BLOCK_DATA_ORDER
 #undef F
 #undef G
 #undef H
diff --git a/crypto/fipsmodule/md5/md5.c b/crypto/fipsmodule/md5/md5.c
index e454a84..eba34bc 100644
--- a/crypto/fipsmodule/md5/md5.c
+++ b/crypto/fipsmodule/md5/md5.c
@@ -60,8 +60,9 @@
 
 #include <openssl/mem.h>
 
-#include "internal.h"
 #include "../../internal.h"
+#include "../digest/md32_common.h"
+#include "internal.h"
 
 
 uint8_t *MD5(const uint8_t *data, size_t len, uint8_t out[MD5_DIGEST_LENGTH]) {
@@ -93,27 +94,22 @@
   md5_block_data_order(c->h, data, 1);
 }
 
-#define DATA_ORDER_IS_LITTLE_ENDIAN
+int MD5_Update(MD5_CTX *c, const void *data, size_t len) {
+  crypto_md32_update(&md5_block_data_order, c->h, c->data, MD5_CBLOCK, &c->num,
+                     &c->Nh, &c->Nl, data, len);
+  return 1;
+}
 
-#define HASH_CTX MD5_CTX
-#define HASH_CBLOCK 64
-#define HASH_DIGEST_LENGTH 16
-#define HASH_UPDATE MD5_Update
-#define HASH_FINAL MD5_Final
-#define HASH_MAKE_STRING(c, s)           \
-  do {                                   \
-    CRYPTO_store_u32_le((s), (c)->h[0]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_le((s), (c)->h[1]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_le((s), (c)->h[2]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_le((s), (c)->h[3]); \
-    (s) += 4;                            \
-  } while (0)
-#define HASH_BLOCK_DATA_ORDER md5_block_data_order
+int MD5_Final(uint8_t out[MD5_DIGEST_LENGTH], MD5_CTX *c) {
+  crypto_md32_final(&md5_block_data_order, c->h, c->data, MD5_CBLOCK, &c->num,
+                    c->Nh, c->Nl, /*is_big_endian=*/0);
 
-#include "../digest/md32_common.h"
+  CRYPTO_store_u32_le(out, c->h[0]);
+  CRYPTO_store_u32_le(out + 4, c->h[1]);
+  CRYPTO_store_u32_le(out + 8, c->h[2]);
+  CRYPTO_store_u32_le(out + 12, c->h[3]);
+  return 1;
+}
 
 // As pointed out by Wei Dai <weidai@eskimo.com>, the above can be
 // simplified to the code below.  Wei attributes these optimizations
@@ -280,14 +276,6 @@
 #undef X
 #endif
 
-#undef DATA_ORDER_IS_LITTLE_ENDIAN
-#undef HASH_CTX
-#undef HASH_CBLOCK
-#undef HASH_DIGEST_LENGTH
-#undef HASH_UPDATE
-#undef HASH_FINAL
-#undef HASH_MAKE_STRING
-#undef HASH_BLOCK_DATA_ORDER
 #undef F
 #undef G
 #undef H
diff --git a/crypto/fipsmodule/sha/sha1.c b/crypto/fipsmodule/sha/sha1.c
index 7149d11..c629308 100644
--- a/crypto/fipsmodule/sha/sha1.c
+++ b/crypto/fipsmodule/sha/sha1.c
@@ -60,8 +60,9 @@
 
 #include <openssl/mem.h>
 
-#include "internal.h"
 #include "../../internal.h"
+#include "../digest/md32_common.h"
+#include "internal.h"
 
 
 int SHA1_Init(SHA_CTX *sha) {
@@ -92,28 +93,24 @@
   sha1_block_data_order(c->h, data, 1);
 }
 
-#define DATA_ORDER_IS_BIG_ENDIAN
+int SHA1_Update(SHA_CTX *c, const void *data, size_t len) {
+  crypto_md32_update(&sha1_block_data_order, c->h, c->data, SHA_CBLOCK, &c->num,
+                     &c->Nh, &c->Nl, data, len);
+  return 1;
+}
 
-#define HASH_CTX                SHA_CTX
-#define HASH_CBLOCK             64
-#define HASH_DIGEST_LENGTH      20
-#define HASH_MAKE_STRING(c, s)           \
-  do {                                   \
-    CRYPTO_store_u32_be((s), (c)->h[0]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_be((s), (c)->h[1]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_be((s), (c)->h[2]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_be((s), (c)->h[3]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_be((s), (c)->h[4]); \
-    (s) += 4;                            \
-  } while (0)
+int SHA1_Final(uint8_t out[SHA_DIGEST_LENGTH], SHA_CTX *c) {
+  crypto_md32_final(&sha1_block_data_order, c->h, c->data, SHA_CBLOCK, &c->num,
+                    c->Nh, c->Nl, /*is_big_endian=*/1);
 
-#define HASH_UPDATE SHA1_Update
-#define HASH_FINAL SHA1_Final
-#define HASH_BLOCK_DATA_ORDER sha1_block_data_order
+  CRYPTO_store_u32_be(out, c->h[0]);
+  CRYPTO_store_u32_be(out + 4, c->h[1]);
+  CRYPTO_store_u32_be(out + 8, c->h[2]);
+  CRYPTO_store_u32_be(out + 12, c->h[3]);
+  CRYPTO_store_u32_be(out + 16, c->h[4]);
+  return 1;
+}
+
 #define ROTATE(a, n) (((a) << (n)) | ((a) >> (32 - (n))))
 #define Xupdate(a, ix, ia, ib, ic, id) \
   do {                                 \
@@ -121,8 +118,6 @@
     (ix) = (a) = ROTATE((a), 1);       \
   } while (0)
 
-#include "../digest/md32_common.h"
-
 #define K_00_19 0x5a827999UL
 #define K_20_39 0x6ed9eba1UL
 #define K_40_59 0x8f1bbcdcUL
@@ -343,14 +338,6 @@
 }
 #endif
 
-#undef DATA_ORDER_IS_BIG_ENDIAN
-#undef HASH_CTX
-#undef HASH_CBLOCK
-#undef HASH_DIGEST_LENGTH
-#undef HASH_MAKE_STRING
-#undef HASH_UPDATE
-#undef HASH_FINAL
-#undef HASH_BLOCK_DATA_ORDER
 #undef ROTATE
 #undef Xupdate
 #undef K_00_19
diff --git a/crypto/fipsmodule/sha/sha256.c b/crypto/fipsmodule/sha/sha256.c
index ff961c8..4394f4a 100644
--- a/crypto/fipsmodule/sha/sha256.c
+++ b/crypto/fipsmodule/sha/sha256.c
@@ -60,8 +60,9 @@
 
 #include <openssl/mem.h>
 
-#include "internal.h"
 #include "../../internal.h"
+#include "../digest/md32_common.h"
+#include "internal.h"
 
 
 int SHA224_Init(SHA256_CTX *sha) {
@@ -112,16 +113,6 @@
   return out;
 }
 
-int SHA224_Update(SHA256_CTX *ctx, const void *data, size_t len) {
-  return SHA256_Update(ctx, data, len);
-}
-
-int SHA224_Final(uint8_t out[SHA224_DIGEST_LENGTH], SHA256_CTX *ctx) {
-  // SHA224_Init sets |ctx->md_len| to |SHA224_DIGEST_LENGTH|, so this has a
-  // smaller output.
-  return SHA256_Final(out, ctx);
-}
-
 #ifndef SHA256_ASM
 static void sha256_block_data_order(uint32_t *state, const uint8_t *in,
                                     size_t num);
@@ -131,55 +122,51 @@
   sha256_block_data_order(c->h, data, 1);
 }
 
-#define DATA_ORDER_IS_BIG_ENDIAN
+int SHA256_Update(SHA256_CTX *c, const void *data, size_t len) {
+  crypto_md32_update(&sha256_block_data_order, c->h, c->data, SHA256_CBLOCK,
+                     &c->num, &c->Nh, &c->Nl, data, len);
+  return 1;
+}
 
-#define HASH_CTX SHA256_CTX
-#define HASH_CBLOCK 64
-#define HASH_DIGEST_LENGTH 32
+int SHA224_Update(SHA256_CTX *ctx, const void *data, size_t len) {
+  return SHA256_Update(ctx, data, len);
+}
 
-// Note that FIPS180-2 discusses "Truncation of the Hash Function Output."
-// default: case below covers for it. It's not clear however if it's permitted
-// to truncate to amount of bytes not divisible by 4. I bet not, but if it is,
-// then default: case shall be extended. For reference. Idea behind separate
-// cases for pre-defined lenghts is to let the compiler decide if it's
-// appropriate to unroll small loops.
-//
-// TODO(davidben): The small |md_len| case is one of the few places a low-level
-// hash 'final' function can fail. This should never happen.
-#define HASH_MAKE_STRING(c, s)                              \
-  do {                                                      \
-    unsigned int nn;                                        \
-    switch ((c)->md_len) {                                  \
-      case SHA224_DIGEST_LENGTH:                            \
-        for (nn = 0; nn < SHA224_DIGEST_LENGTH / 4; nn++) { \
-          CRYPTO_store_u32_be((s), (c)->h[nn]);             \
-          (s) += 4;                                         \
-        }                                                   \
-        break;                                              \
-      case SHA256_DIGEST_LENGTH:                            \
-        for (nn = 0; nn < SHA256_DIGEST_LENGTH / 4; nn++) { \
-          CRYPTO_store_u32_be((s), (c)->h[nn]);             \
-          (s) += 4;                                         \
-        }                                                   \
-        break;                                              \
-      default:                                              \
-        if ((c)->md_len > SHA256_DIGEST_LENGTH) {           \
-          return 0;                                         \
-        }                                                   \
-        for (nn = 0; nn < (c)->md_len / 4; nn++) {          \
-          CRYPTO_store_u32_be((s), (c)->h[nn]);             \
-          (s) += 4;                                         \
-        }                                                   \
-        break;                                              \
-    }                                                       \
-  } while (0)
+static int sha256_final_impl(uint8_t *out, SHA256_CTX *c) {
+  crypto_md32_final(&sha256_block_data_order, c->h, c->data, SHA256_CBLOCK,
+                    &c->num, c->Nh, c->Nl, /*is_big_endian=*/1);
 
+  // TODO(davidben): This overflow check one of the few places a low-level hash
+  // 'final' function can fail. SHA-512 does not have a corresponding check.
+  // These functions already misbehave if the caller arbitrarily mutates |c|, so
+  // can we assume one of |SHA256_Init| or |SHA224_Init| was used?
+  if (c->md_len > SHA256_DIGEST_LENGTH) {
+    return 0;
+  }
 
-#define HASH_UPDATE SHA256_Update
-#define HASH_FINAL SHA256_Final
-#define HASH_BLOCK_DATA_ORDER sha256_block_data_order
+  assert(c->md_len % 4 == 0);
+  const size_t out_words = c->md_len / 4;
+  for (size_t i = 0; i < out_words; i++) {
+    CRYPTO_store_u32_be(out, c->h[i]);
+    out += 4;
+  }
+  return 1;
+}
 
-#include "../digest/md32_common.h"
+int SHA256_Final(uint8_t out[SHA256_DIGEST_LENGTH], SHA256_CTX *c) {
+  // Ideally we would assert |sha->md_len| is |SHA256_DIGEST_LENGTH| to match
+  // the size hint, but calling code often pairs |SHA224_Init| with
+  // |SHA256_Final| and expects |sha->md_len| to carry the size over.
+  //
+  // TODO(davidben): Add an assert and fix code to match them up.
+  return sha256_final_impl(out, c);
+}
+int SHA224_Final(uint8_t out[SHA224_DIGEST_LENGTH], SHA256_CTX *ctx) {
+  // SHA224_Init sets |ctx->md_len| to |SHA224_DIGEST_LENGTH|, so this has a
+  // smaller output.
+  assert(ctx->md_len == SHA224_DIGEST_LENGTH);
+  return sha256_final_impl(out, ctx);
+}
 
 #ifndef SHA256_ASM
 static const uint32_t K256[64] = {
@@ -322,14 +309,6 @@
   sha256_block_data_order(state, data, num_blocks);
 }
 
-#undef DATA_ORDER_IS_BIG_ENDIAN
-#undef HASH_CTX
-#undef HASH_CBLOCK
-#undef HASH_DIGEST_LENGTH
-#undef HASH_MAKE_STRING
-#undef HASH_UPDATE
-#undef HASH_FINAL
-#undef HASH_BLOCK_DATA_ORDER
 #undef ROTATE
 #undef Sigma0
 #undef Sigma1
diff --git a/crypto/fipsmodule/sha/sha512.c b/crypto/fipsmodule/sha/sha512.c
index 1f40e7a..befdd52 100644
--- a/crypto/fipsmodule/sha/sha512.c
+++ b/crypto/fipsmodule/sha/sha512.c
@@ -162,7 +162,7 @@
 
 int SHA384_Final(uint8_t out[SHA384_DIGEST_LENGTH], SHA512_CTX *sha) {
   // |SHA384_Init| sets |sha->md_len| to |SHA384_DIGEST_LENGTH|, so this has a
-  // |smaller output.
+  // smaller output.
   assert(sha->md_len == SHA384_DIGEST_LENGTH);
   return sha512_final_impl(out, sha);
 }
@@ -237,7 +237,7 @@
 int SHA512_Final(uint8_t out[SHA512_DIGEST_LENGTH], SHA512_CTX *sha) {
   // Ideally we would assert |sha->md_len| is |SHA512_DIGEST_LENGTH| to match
   // the size hint, but calling code often pairs |SHA384_Init| with
-  // |SHA512_Final| and expects |sha->md_len| to carry the over.
+  // |SHA512_Final| and expects |sha->md_len| to carry the size over.
   //
   // TODO(davidben): Add an assert and fix code to match them up.
   return sha512_final_impl(out, sha);
@@ -270,9 +270,8 @@
   assert(sha->md_len % 8 == 0);
   const size_t out_words = sha->md_len / 8;
   for (size_t i = 0; i < out_words; i++) {
-    const uint64_t t = CRYPTO_bswap8(sha->h[i]);
-    memcpy(out, &t, sizeof(t));
-    out += sizeof(t);
+    CRYPTO_store_u64_be(out, sha->h[i]);
+    out += 8;
   }
 
   return 1;
diff --git a/decrepit/ripemd/ripemd.c b/decrepit/ripemd/ripemd.c
index ac6bd83..9120cdd 100644
--- a/decrepit/ripemd/ripemd.c
+++ b/decrepit/ripemd/ripemd.c
@@ -59,6 +59,7 @@
 #include <string.h>
 
 #include "../../crypto/internal.h"
+#include "../../crypto/fipsmodule/digest/md32_common.h"
 
 
 #define RIPEMD160_A 0x67452301L
@@ -85,31 +86,24 @@
   ripemd160_block_data_order(c->h, data, 1);
 }
 
-#define DATA_ORDER_IS_LITTLE_ENDIAN
+int RIPEMD160_Update(RIPEMD160_CTX *c, const void *data, size_t len) {
+  crypto_md32_update(&ripemd160_block_data_order, c->h, c->data,
+                     RIPEMD160_CBLOCK, &c->num, &c->Nh, &c->Nl, data, len);
+  return 1;
+}
 
-#define HASH_LONG uint32_t
-#define HASH_CTX RIPEMD160_CTX
-#define HASH_CBLOCK RIPEMD160_CBLOCK
-#define HASH_DIGEST_LENGTH RIPEMD160_DIGEST_LENGTH
-#define HASH_UPDATE RIPEMD160_Update
-#define HASH_FINAL RIPEMD160_Final
-#define HASH_MAKE_STRING(c, s)           \
-  do {                                   \
-    CRYPTO_store_u32_le((s), (c)->h[0]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_le((s), (c)->h[1]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_le((s), (c)->h[2]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_le((s), (c)->h[3]); \
-    (s) += 4;                            \
-    CRYPTO_store_u32_le((s), (c)->h[4]); \
-    (s) += 4;                            \
-  } while (0)
+int RIPEMD160_Final(uint8_t out[RIPEMD160_DIGEST_LENGTH], RIPEMD160_CTX *c) {
+  crypto_md32_final(&ripemd160_block_data_order, c->h, c->data,
+                    RIPEMD160_CBLOCK, &c->num, c->Nh, c->Nl,
+                    /*is_big_endian=*/0);
 
-#define HASH_BLOCK_DATA_ORDER ripemd160_block_data_order
-
-#include "../../crypto/fipsmodule/digest/md32_common.h"
+  CRYPTO_store_u32_le(out, c->h[0]);
+  CRYPTO_store_u32_le(out + 4, c->h[1]);
+  CRYPTO_store_u32_le(out + 8, c->h[2]);
+  CRYPTO_store_u32_le(out + 12, c->h[3]);
+  CRYPTO_store_u32_le(out + 16, c->h[4]);
+  return 1;
+}
 
 // Transformed F2 and F4 are courtesy of Wei Dai <weidai@eskimo.com>
 #define F1(x, y, z) ((x) ^ (y) ^ (z))