Revert "Improve crypto/digest/md32_common.h mechanism."

This reverts commit 00461cf201b66205653fd6159ae260f453738641.

Sadly it broke wpa_supplicant.
diff --git a/crypto/cipher/tls_cbc.c b/crypto/cipher/tls_cbc.c
index c541db3..8bca2f3 100644
--- a/crypto/cipher/tls_cbc.c
+++ b/crypto/cipher/tls_cbc.c
@@ -229,11 +229,11 @@
  * typically does. */
 static void tls1_sha1_final_raw(void *ctx, uint8_t *md_out) {
   SHA_CTX *sha1 = ctx;
-  u32toBE(sha1->h[0], md_out);
-  u32toBE(sha1->h[1], md_out);
-  u32toBE(sha1->h[2], md_out);
-  u32toBE(sha1->h[3], md_out);
-  u32toBE(sha1->h[4], md_out);
+  u32toBE(sha1->h0, md_out);
+  u32toBE(sha1->h1, md_out);
+  u32toBE(sha1->h2, md_out);
+  u32toBE(sha1->h3, md_out);
+  u32toBE(sha1->h4, md_out);
 }
 #define LARGEST_DIGEST_CTX SHA_CTX
 
diff --git a/crypto/digest/md32_common.h b/crypto/digest/md32_common.h
index d213476..14607fb 100644
--- a/crypto/digest/md32_common.h
+++ b/crypto/digest/md32_common.h
@@ -58,54 +58,49 @@
 
 #define asm __asm__
 
-/* This is a generic 32-bit "collector" for message digest algorithms. It
- * collects input character stream into chunks of 32-bit values and invokes the
- * block function that performs the actual hash calculations. To make use of
- * this mechanism, the following macros must be defined before including
- * md32_common.h.
+/* This is a generic 32 bit "collector" for message digest algorithms.
+ * Whenever needed it collects input character stream into chunks of
+ * 32 bit values and invokes a block function that performs actual hash
+ * calculations.
  *
- * One of |DATA_ORDER_IS_BIG_ENDIAN| or |DATA_ORDER_IS_LITTLE_ENDIAN| must be
- * defined to specify the byte order of the input stream.
+ * Porting guide.
  *
- * |HASH_CBLOCK| must be defined as the integer block size, in bytes.
+ * Obligatory macros:
  *
- * |HASH_CTX| must be defined as the name of the context structure, which must
- * have at least the following members:
+ * DATA_ORDER_IS_BIG_ENDIAN or DATA_ORDER_IS_LITTLE_ENDIAN
+ *	this macro defines byte order of input stream.
+ * HASH_CBLOCK
+ *	size of a unit chunk HASH_BLOCK operates on.
+ * HASH_LONG
+ *	has to be at least 32 bit wide.
+ * HASH_CTX
+ *	context structure that at least contains following
+ *	members:
+ *		typedef struct {
+ *			...
+ *			HASH_LONG	Nl,Nh;
+ *			either {
+ *			HASH_LONG	data[HASH_LBLOCK];
+ *			unsigned char	data[HASH_CBLOCK];
+ *			};
+ *			unsigned int	num;
+ *			...
+ *			} HASH_CTX;
+ *	data[] vector is expected to be zeroed upon first call to
+ *	HASH_UPDATE.
+ * HASH_UPDATE
+ *	name of "Update" function, implemented here.
+ * HASH_TRANSFORM
+ *	name of "Transform" function, implemented here.
+ * HASH_FINAL
+ *	name of "Final" function, implemented here.
+ * HASH_BLOCK_DATA_ORDER
+ *	name of "block" function capable of treating *unaligned* input
+ *	message in original (data) byte order, implemented externally.
+ * HASH_MAKE_STRING
+ *	macro convering context variables to an ASCII hash string.
  *
- *     typedef struct <name>_state_st {
- *       uint32_t h[<chaining length> / sizeof(uint32_t)];
- *       uint32_t Nl,Nh;
- *       uint32_t data[HASH_CBLOCK / sizeof(uint32_t)];
- *       unsigned int num
- *       ...
- *     } <NAME>_CTX;
- *
- * <chaining length> is the output length of the hash in bytes, before
- * any truncation (e.g. 64 for SHA-224 and SHA-256, 128 for SHA-384 and SHA-512).
- *
- * |HASH_UPDATE| must be defined as the name of the "Update" function to
- * generate.
- *
- * |HASH_TRANSFORM| must be defined as the  the name of the "Transform"
- * function to generate.
- *
- * |HASH_FINAL| must be defined as the name of "Final" function to generate.
- *
- * |HASH_BLOCK_DATA_ORDER| must be defined as the name of the "Block" function.
- * That function must be implemented manually. It must be capable of operating
- * on *unaligned* input data in its original (data) byte order. It must have
- * this signature:
- *
- *     void HASH_BLOCK_DATA_ORDER(uint32_t *state, const uint8_t *data,
- *                                size_t num);
- *
- * It must update the hash state |state| with |num| blocks of data from |data|,
- * where each block is |HASH_CBLOCK| bytes; i.e. |data| points to a array of
- * |HASH_CBLOCK * num| bytes. |state| points to the |h| member of a |HASH_CTX|,
- * and so will have |<chaining length> / sizeof(uint32_t)| elements.
- *
- * |HASH_MAKE_STRING(c, s)| must be defined as a block statement that converts
- * the hash state |c->h| into the output byte order, storing the result in |s|.
+ *					<appro@fy.chalmers.se>
  */
 
 #if !defined(DATA_ORDER_IS_BIG_ENDIAN) && !defined(DATA_ORDER_IS_LITTLE_ENDIAN)
@@ -115,6 +110,9 @@
 #ifndef HASH_CBLOCK
 #error "HASH_CBLOCK must be defined!"
 #endif
+#ifndef HASH_LONG
+#error "HASH_LONG must be defined!"
+#endif
 #ifndef HASH_CTX
 #error "HASH_CTX must be defined!"
 #endif
@@ -245,17 +243,17 @@
 	{
 	const uint8_t *data=data_;
 	uint8_t *p;
-	uint32_t l;
+	HASH_LONG l;
 	size_t n;
 
 	if (len==0) return 1;
 
-	l=(c->Nl+(((uint32_t)len)<<3))&0xffffffffUL;
+	l=(c->Nl+(((HASH_LONG)len)<<3))&0xffffffffUL;
 	/* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
 	 * Wei Dai <weidai@eskimo.com> for pointing it out. */
 	if (l < c->Nl) /* overflow */
 		c->Nh++;
-	c->Nh+=(uint32_t)(len>>29);	/* might cause compiler warning on 16-bit */
+	c->Nh+=(HASH_LONG)(len>>29);	/* might cause compiler warning on 16-bit */
 	c->Nl=l;
 
 	n = c->num;
@@ -266,7 +264,7 @@
 		if (len >= HASH_CBLOCK || len+n >= HASH_CBLOCK)
 			{
 			memcpy (p+n,data,HASH_CBLOCK-n);
-			HASH_BLOCK_DATA_ORDER (c->h,p,1);
+			HASH_BLOCK_DATA_ORDER (c,p,1);
 			n      = HASH_CBLOCK-n;
 			data  += n;
 			len   -= n;
@@ -284,7 +282,7 @@
 	n = len/HASH_CBLOCK;
 	if (n > 0)
 		{
-		HASH_BLOCK_DATA_ORDER (c->h,data,n);
+		HASH_BLOCK_DATA_ORDER (c,data,n);
 		n    *= HASH_CBLOCK;
 		data += n;
 		len  -= n;
@@ -302,7 +300,7 @@
 
 void HASH_TRANSFORM (HASH_CTX *c, const uint8_t *data)
 	{
-	HASH_BLOCK_DATA_ORDER (c->h,data,1);
+	HASH_BLOCK_DATA_ORDER (c,data,1);
 	}
 
 
@@ -318,7 +316,7 @@
 		{
 		memset (p+n,0,HASH_CBLOCK-n);
 		n=0;
-		HASH_BLOCK_DATA_ORDER (c->h,p,1);
+		HASH_BLOCK_DATA_ORDER (c,p,1);
 		}
 	memset (p+n,0,HASH_CBLOCK-8-n);
 
@@ -331,7 +329,7 @@
 	(void)HOST_l2c(c->Nh,p);
 #endif
 	p -= HASH_CBLOCK;
-	HASH_BLOCK_DATA_ORDER (c->h,p,1);
+	HASH_BLOCK_DATA_ORDER (c,p,1);
 	c->num=0;
 	memset (p,0,HASH_CBLOCK);
 
diff --git a/crypto/md4/md4.c b/crypto/md4/md4.c
index 0a8ea1d..5ef9ae5 100644
--- a/crypto/md4/md4.c
+++ b/crypto/md4/md4.c
@@ -64,17 +64,18 @@
 
 int MD4_Init(MD4_CTX *md4) {
   memset(md4, 0, sizeof(MD4_CTX));
-  md4->h[0] = 0x67452301UL;
-  md4->h[1] = 0xefcdab89UL;
-  md4->h[2] = 0x98badcfeUL;
-  md4->h[3] = 0x10325476UL;
+  md4->A = 0x67452301UL;
+  md4->B = 0xefcdab89UL;
+  md4->C = 0x98badcfeUL;
+  md4->D = 0x10325476UL;
   return 1;
 }
 
-void md4_block_data_order(uint32_t *state, const uint8_t *data, size_t num);
+void md4_block_data_order (MD4_CTX *md4, const void *p, size_t num);
 
 #define DATA_ORDER_IS_LITTLE_ENDIAN
 
+#define HASH_LONG uint32_t
 #define HASH_CTX MD4_CTX
 #define HASH_CBLOCK 64
 #define HASH_UPDATE MD4_Update
@@ -83,13 +84,13 @@
 #define HASH_MAKE_STRING(c, s) \
   do {                         \
     uint32_t ll;               \
-    ll = (c)->h[0];            \
+    ll = (c)->A;               \
     (void) HOST_l2c(ll, (s));  \
-    ll = (c)->h[1];            \
+    ll = (c)->B;               \
     (void) HOST_l2c(ll, (s));  \
-    ll = (c)->h[2];            \
+    ll = (c)->C;               \
     (void) HOST_l2c(ll, (s));  \
-    ll = (c)->h[3];            \
+    ll = (c)->D;               \
     (void) HOST_l2c(ll, (s));  \
   } while (0)
 #define HASH_BLOCK_DATA_ORDER md4_block_data_order
@@ -121,14 +122,15 @@
     a = ROTATE(a, s);                  \
   };
 
-void md4_block_data_order(uint32_t *state, const uint8_t *data, size_t num) {
+void md4_block_data_order(MD4_CTX *c, const void *data_, size_t num) {
+  const uint8_t *data = data_;
   uint32_t A, B, C, D, l;
   uint32_t X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15;
 
-  A = state[0];
-  B = state[1];
-  C = state[2];
-  D = state[3];
+  A = c->A;
+  B = c->B;
+  C = c->C;
+  D = c->D;
 
   for (; num--;) {
     HOST_c2l(data, l);
@@ -215,9 +217,9 @@
     R2(C, D, A, B, X7, 11, 0x6ED9EBA1L);
     R2(B, C, D, A, X15, 15, 0x6ED9EBA1L);
 
-    A = state[0] += A;
-    B = state[1] += B;
-    C = state[2] += C;
-    D = state[3] += D;
+    A = c->A += A;
+    B = c->B += B;
+    C = c->C += C;
+    D = c->D += D;
   }
 }
diff --git a/crypto/md5/md5.c b/crypto/md5/md5.c
index f27e62d..6ad8d12 100644
--- a/crypto/md5/md5.c
+++ b/crypto/md5/md5.c
@@ -79,10 +79,10 @@
 
 int MD5_Init(MD5_CTX *md5) {
   memset(md5, 0, sizeof(MD5_CTX));
-  md5->h[0] = 0x67452301UL;
-  md5->h[1] = 0xefcdab89UL;
-  md5->h[2] = 0x98badcfeUL;
-  md5->h[3] = 0x10325476UL;
+  md5->A = 0x67452301UL;
+  md5->B = 0xefcdab89UL;
+  md5->C = 0x98badcfeUL;
+  md5->D = 0x10325476UL;
   return 1;
 }
 
@@ -93,10 +93,11 @@
 #endif
 
 
-void md5_block_data_order(uint32_t *state, const uint8_t *data, size_t num);
+void md5_block_data_order(MD5_CTX *md5, const void *p, size_t num);
 
 #define DATA_ORDER_IS_LITTLE_ENDIAN
 
+#define HASH_LONG uint32_t
 #define HASH_CTX MD5_CTX
 #define HASH_CBLOCK 64
 #define HASH_UPDATE MD5_Update
@@ -105,13 +106,13 @@
 #define HASH_MAKE_STRING(c, s) \
   do {                         \
     uint32_t ll;               \
-    ll = (c)->h[0];            \
+    ll = (c)->A;               \
     (void) HOST_l2c(ll, (s));  \
-    ll = (c)->h[1];            \
+    ll = (c)->B;               \
     (void) HOST_l2c(ll, (s));  \
-    ll = (c)->h[2];            \
+    ll = (c)->C;               \
     (void) HOST_l2c(ll, (s));  \
-    ll = (c)->h[3];            \
+    ll = (c)->D;               \
     (void) HOST_l2c(ll, (s));  \
   } while (0)
 #define HASH_BLOCK_DATA_ORDER md5_block_data_order
@@ -151,16 +152,17 @@
 #ifdef X
 #undef X
 #endif
-void md5_block_data_order(uint32_t *state, const uint8_t *data, size_t num) {
+void md5_block_data_order(MD5_CTX *md5, const void *in_data, size_t num) {
+  const uint8_t *data = in_data;
   uint32_t A, B, C, D, l;
   uint32_t XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, XX8, XX9, XX10, XX11, XX12,
       XX13, XX14, XX15;
 #define X(i) XX##i
 
-  A = state[0];
-  B = state[1];
-  C = state[2];
-  D = state[3];
+  A = md5->A;
+  B = md5->B;
+  C = md5->C;
+  D = md5->D;
 
   for (; num--;) {
     HOST_c2l(data, l);
@@ -264,10 +266,10 @@
     R3(C, D, A, B, X(2), 15, 0x2ad7d2bbL);
     R3(B, C, D, A, X(9), 21, 0xeb86d391L);
 
-    A = state[0] += A;
-    B = state[1] += B;
-    C = state[2] += C;
-    D = state[3] += D;
+    A = md5->A += A;
+    B = md5->B += B;
+    C = md5->C += C;
+    D = md5->D += D;
   }
 }
 #endif
diff --git a/crypto/sha/sha1.c b/crypto/sha/sha1.c
index b3318c5..c03e608 100644
--- a/crypto/sha/sha1.c
+++ b/crypto/sha/sha1.c
@@ -69,11 +69,11 @@
 
 int SHA1_Init(SHA_CTX *sha) {
   memset(sha, 0, sizeof(SHA_CTX));
-  sha->h[0] = 0x67452301UL;
-  sha->h[1] = 0xefcdab89UL;
-  sha->h[2] = 0x98badcfeUL;
-  sha->h[3] = 0x10325476UL;
-  sha->h[4] = 0xc3d2e1f0UL;
+  sha->h0 = 0x67452301UL;
+  sha->h1 = 0xefcdab89UL;
+  sha->h2 = 0x98badcfeUL;
+  sha->h3 = 0x10325476UL;
+  sha->h4 = 0xc3d2e1f0UL;
   return 1;
 }
 
@@ -96,20 +96,21 @@
 
 #define DATA_ORDER_IS_BIG_ENDIAN
 
+#define HASH_LONG               uint32_t
 #define HASH_CTX                SHA_CTX
 #define HASH_CBLOCK             64
 #define HASH_MAKE_STRING(c, s) \
   do {                         \
     uint32_t ll;               \
-    ll = (c)->h[0];            \
+    ll = (c)->h0;              \
     (void) HOST_l2c(ll, (s));  \
-    ll = (c)->h[1];            \
+    ll = (c)->h1;              \
     (void) HOST_l2c(ll, (s));  \
-    ll = (c)->h[2];            \
+    ll = (c)->h2;              \
     (void) HOST_l2c(ll, (s));  \
-    ll = (c)->h[3];            \
+    ll = (c)->h3;              \
     (void) HOST_l2c(ll, (s));  \
-    ll = (c)->h[4];            \
+    ll = (c)->h4;              \
     (void) HOST_l2c(ll, (s));  \
   } while (0)
 
@@ -123,7 +124,7 @@
 #ifndef SHA1_ASM
 static
 #endif
-void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num);
+void sha1_block_data_order(SHA_CTX *c, const void *p, size_t num);
 
 #include "../digest/md32_common.h"
 
@@ -185,17 +186,17 @@
 #define X(i)	XX##i
 
 #if !defined(SHA1_ASM)
-static void sha1_block_data_order(uint32_t *state, const uint8_t *data,
-                                  size_t num) {
+static void HASH_BLOCK_DATA_ORDER(SHA_CTX *c, const void *p, size_t num) {
+  const uint8_t *data = p;
   register uint32_t A, B, C, D, E, T, l;
   uint32_t XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, XX8, XX9, XX10,
       XX11, XX12, XX13, XX14, XX15;
 
-  A = state[0];
-  B = state[1];
-  C = state[2];
-  D = state[3];
-  E = state[4];
+  A = c->h0;
+  B = c->h1;
+  C = c->h2;
+  D = c->h3;
+  E = c->h4;
 
   for (;;) {
     const union {
@@ -203,7 +204,7 @@
       char little;
     } is_endian = {1};
 
-    if (!is_endian.little && ((uintptr_t)data % 4) == 0) {
+    if (!is_endian.little && ((size_t)p % 4) == 0) {
       const uint32_t *W = (const uint32_t *)data;
 
       X(0) = W[0];
@@ -360,21 +361,21 @@
     BODY_60_79(78, A, B, C, D, E, T, X(14), X(0), X(6), X(11));
     BODY_60_79(79, T, A, B, C, D, E, X(15), X(1), X(7), X(12));
 
-    state[0] = (state[0] + E) & 0xffffffffL;
-    state[1] = (state[1] + T) & 0xffffffffL;
-    state[2] = (state[2] + A) & 0xffffffffL;
-    state[3] = (state[3] + B) & 0xffffffffL;
-    state[4] = (state[4] + C) & 0xffffffffL;
+    c->h0 = (c->h0 + E) & 0xffffffffL;
+    c->h1 = (c->h1 + T) & 0xffffffffL;
+    c->h2 = (c->h2 + A) & 0xffffffffL;
+    c->h3 = (c->h3 + B) & 0xffffffffL;
+    c->h4 = (c->h4 + C) & 0xffffffffL;
 
     if (--num == 0) {
       break;
     }
 
-    A = state[0];
-    B = state[1];
-    C = state[2];
-    D = state[3];
-    E = state[4];
+    A = c->h0;
+    B = c->h1;
+    C = c->h2;
+    D = c->h3;
+    E = c->h4;
   }
 }
 #endif
diff --git a/crypto/sha/sha256.c b/crypto/sha/sha256.c
index 53480dd..8276bbb 100644
--- a/crypto/sha/sha256.c
+++ b/crypto/sha/sha256.c
@@ -135,6 +135,7 @@
 
 #define DATA_ORDER_IS_BIG_ENDIAN
 
+#define HASH_LONG uint32_t
 #define HASH_CTX SHA256_CTX
 #define HASH_CBLOCK 64
 
@@ -184,12 +185,12 @@
 #ifndef SHA256_ASM
 static
 #endif
-void sha256_block_data_order(uint32_t *state, const uint8_t *in, size_t num);
+void sha256_block_data_order(SHA256_CTX *ctx, const void *in, size_t num);
 
 #include "../digest/md32_common.h"
 
 #ifndef SHA256_ASM
-static const uint32_t K256[64] = {
+static const HASH_LONG K256[64] = {
     0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL,
     0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL,
     0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL,
@@ -233,28 +234,29 @@
     ROUND_00_15(i, a, b, c, d, e, f, g, h);            \
   } while (0)
 
-static void sha256_block_data_order(uint32_t *state, const uint8_t *data,
+static void sha256_block_data_order(SHA256_CTX *ctx, const void *in,
                                     size_t num) {
   uint32_t a, b, c, d, e, f, g, h, s0, s1, T1;
-  uint32_t X[16];
+  HASH_LONG X[16];
   int i;
+  const uint8_t *data = in;
   const union {
     long one;
     char little;
   } is_endian = {1};
 
   while (num--) {
-    a = state[0];
-    b = state[1];
-    c = state[2];
-    d = state[3];
-    e = state[4];
-    f = state[5];
-    g = state[6];
-    h = state[7];
+    a = ctx->h[0];
+    b = ctx->h[1];
+    c = ctx->h[2];
+    d = ctx->h[3];
+    e = ctx->h[4];
+    f = ctx->h[5];
+    g = ctx->h[6];
+    h = ctx->h[7];
 
-    if (!is_endian.little && ((uintptr_t)data % 4) == 0) {
-      const uint32_t *W = (const uint32_t *)data;
+    if (!is_endian.little && sizeof(HASH_LONG) == 4 && ((size_t)in % 4) == 0) {
+      const HASH_LONG *W = (const HASH_LONG *)data;
 
       T1 = X[0] = W[0];
       ROUND_00_15(0, a, b, c, d, e, f, g, h);
@@ -291,7 +293,7 @@
 
       data += HASH_CBLOCK;
     } else {
-      uint32_t l;
+      HASH_LONG l;
 
       HOST_c2l(data, l);
       T1 = X[0] = l;
@@ -354,14 +356,14 @@
       ROUND_16_63(i + 7, b, c, d, e, f, g, h, a, X);
     }
 
-    state[0] += a;
-    state[1] += b;
-    state[2] += c;
-    state[3] += d;
-    state[4] += e;
-    state[5] += f;
-    state[6] += g;
-    state[7] += h;
+    ctx->h[0] += a;
+    ctx->h[1] += b;
+    ctx->h[2] += c;
+    ctx->h[3] += d;
+    ctx->h[4] += e;
+    ctx->h[5] += f;
+    ctx->h[6] += g;
+    ctx->h[7] += h;
   }
 }
 
diff --git a/crypto/sha/sha512.c b/crypto/sha/sha512.c
index 5e77a1e..57c96ab 100644
--- a/crypto/sha/sha512.c
+++ b/crypto/sha/sha512.c
@@ -65,15 +65,27 @@
 
 /* IMPLEMENTATION NOTES.
  *
- * The 32-bit hash algorithms share a common byte-order neutral collector and
- * padding function implementations that operate on unaligned data,
- * ../md32_common.h. This SHA-512 implementation does not. Reasons
+ * As you might have noticed 32-bit hash algorithms:
+ *
+ * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
+ * - optimized versions implement two transform functions: one operating
+ *   on [aligned] data in host byte order and one - on data in input
+ *   stream byte order;
+ * - share common byte-order neutral collector and padding function
+ *   implementations, ../md32_common.h;
+ *
+ * Neither of the above applies to this SHA-512 implementations. Reasons
  * [in reverse order] are:
  *
- * - It's the only 64-bit hash algorithm for the moment of this writing,
+ * - it's the only 64-bit hash algorithm for the moment of this writing,
  *   there is no need for common collector/padding implementation [yet];
- * - By supporting only a transform function that operates on *aligned* data
- *   the collector/padding function is simpler and easier to optimize. */
+ * - by supporting only one transform function [which operates on
+ *   *aligned* data in input stream byte order, big-endian in this case]
+ *   we minimize burden of maintenance in two ways: a) collector/padding
+ *   function is simpler; b) only one transform function to stare at;
+ * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
+ *   apply a number of optimizations to mitigate potential performance
+ *   penalties caused by previous design decision; */
 
 #if !defined(OPENSSL_NO_ASM) &&                         \
     (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
@@ -151,7 +163,7 @@
 #if !defined(SHA512_ASM)
 static
 #endif
-void sha512_block_data_order(uint64_t *state, const uint64_t *W, size_t num);
+void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
 
 
 int SHA384_Final(uint8_t *md, SHA512_CTX *sha) {
@@ -169,7 +181,7 @@
     data = c->u.p;
   }
 #endif
-  sha512_block_data_order(c->h, (uint64_t *)data, 1);
+  sha512_block_data_order(c, data, 1);
 }
 
 int SHA512_Update(SHA512_CTX *c, const void *in_data, size_t len) {
@@ -201,7 +213,7 @@
       memcpy(p + c->num, data, n), c->num = 0;
       len -= n;
       data += n;
-      sha512_block_data_order(c->h, (uint64_t *)p, 1);
+      sha512_block_data_order(c, p, 1);
     }
   }
 
@@ -210,14 +222,14 @@
     if ((size_t)data % sizeof(c->u.d[0]) != 0) {
       while (len >= sizeof(c->u)) {
         memcpy(p, data, sizeof(c->u));
-        sha512_block_data_order(c->h, (uint64_t *)p, 1);
+        sha512_block_data_order(c, p, 1);
         len -= sizeof(c->u);
         data += sizeof(c->u);
       }
     } else
 #endif
     {
-      sha512_block_data_order(c->h, (uint64_t *)data, len / sizeof(c->u));
+      sha512_block_data_order(c, data, len / sizeof(c->u));
       data += len;
       len %= sizeof(c->u);
       data -= len;
@@ -241,7 +253,7 @@
   if (n > (sizeof(sha->u) - 16)) {
     memset(p + n, 0, sizeof(sha->u) - n);
     n = 0;
-    sha512_block_data_order(sha->h, (uint64_t *)p, 1);
+    sha512_block_data_order(sha, p, 1);
   }
 
   memset(p + n, 0, sizeof(sha->u) - 16 - n);
@@ -262,7 +274,7 @@
   p[sizeof(sha->u) - 15] = (uint8_t)(sha->Nh >> 48);
   p[sizeof(sha->u) - 16] = (uint8_t)(sha->Nh >> 56);
 
-  sha512_block_data_order(sha->h, (uint64_t *)p, 1);
+  sha512_block_data_order(sha, p, 1);
 
   if (md == NULL) {
     /* TODO(davidben): This NULL check is absent in other low-level hash 'final'
@@ -431,22 +443,23 @@
  * This code should give better results on 32-bit CPU with less than
  * ~24 registers, both size and performance wise...
  */
-static void sha512_block_data_order(uint64_t *state, const uint64_t *W,
+static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
                                     size_t num) {
+  const uint64_t *W = in;
   uint64_t A, E, T;
   uint64_t X[9 + 80], *F;
   int i;
 
   while (num--) {
     F = X + 80;
-    A = state[0];
-    F[1] = state[1];
-    F[2] = state[2];
-    F[3] = state[3];
-    E = state[4];
-    F[5] = state[5];
-    F[6] = state[6];
-    F[7] = state[7];
+    A = ctx->h[0];
+    F[1] = ctx->h[1];
+    F[2] = ctx->h[2];
+    F[3] = ctx->h[3];
+    E = ctx->h[4];
+    F[5] = ctx->h[5];
+    F[6] = ctx->h[6];
+    F[7] = ctx->h[7];
 
     for (i = 0; i < 16; i++, F--) {
       T = PULL64(W[i]);
@@ -471,14 +484,14 @@
       A = T + Sigma0(A) + Maj(A, F[1], F[2]);
     }
 
-    state[0] += A;
-    state[1] += F[1];
-    state[2] += F[2];
-    state[3] += F[3];
-    state[4] += E;
-    state[5] += F[5];
-    state[6] += F[6];
-    state[7] += F[7];
+    ctx->h[0] += A;
+    ctx->h[1] += F[1];
+    ctx->h[2] += F[2];
+    ctx->h[3] += F[3];
+    ctx->h[4] += E;
+    ctx->h[5] += F[5];
+    ctx->h[6] += F[6];
+    ctx->h[7] += F[7];
 
     W += 16;
   }
@@ -504,22 +517,23 @@
     ROUND_00_15(i + j, a, b, c, d, e, f, g, h);        \
   } while (0)
 
-static void sha512_block_data_order(uint64_t *state, const uint64_t *W,
+static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
                                     size_t num) {
+  const uint64_t *W = in;
   uint64_t a, b, c, d, e, f, g, h, s0, s1, T1;
   uint64_t X[16];
   int i;
 
   while (num--) {
 
-    a = state[0];
-    b = state[1];
-    c = state[2];
-    d = state[3];
-    e = state[4];
-    f = state[5];
-    g = state[6];
-    h = state[7];
+    a = ctx->h[0];
+    b = ctx->h[1];
+    c = ctx->h[2];
+    d = ctx->h[3];
+    e = ctx->h[4];
+    f = ctx->h[5];
+    g = ctx->h[6];
+    h = ctx->h[7];
 
     T1 = X[0] = PULL64(W[0]);
     ROUND_00_15(0, a, b, c, d, e, f, g, h);
@@ -573,14 +587,14 @@
       ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
     }
 
-    state[0] += a;
-    state[1] += b;
-    state[2] += c;
-    state[3] += d;
-    state[4] += e;
-    state[5] += f;
-    state[6] += g;
-    state[7] += h;
+    ctx->h[0] += a;
+    ctx->h[1] += b;
+    ctx->h[2] += c;
+    ctx->h[3] += d;
+    ctx->h[4] += e;
+    ctx->h[5] += f;
+    ctx->h[6] += g;
+    ctx->h[7] += h;
 
     W += 16;
   }
diff --git a/include/openssl/md4.h b/include/openssl/md4.h
index e363b73..1db7499 100644
--- a/include/openssl/md4.h
+++ b/include/openssl/md4.h
@@ -88,7 +88,7 @@
 OPENSSL_EXPORT void MD4_Transform(MD4_CTX *md4, const uint8_t *block);
 
 struct md4_state_st {
-  uint32_t h[4];
+  uint32_t A, B, C, D;
   uint32_t Nl, Nh;
   uint32_t data[16];
   unsigned int num;
diff --git a/include/openssl/md5.h b/include/openssl/md5.h
index 87c3ba4..9b13922 100644
--- a/include/openssl/md5.h
+++ b/include/openssl/md5.h
@@ -93,7 +93,7 @@
 OPENSSL_EXPORT void MD5_Transform(MD5_CTX *md5, const uint8_t *block);
 
 struct md5_state_st {
-  uint32_t h[4];
+  uint32_t A, B, C, D;
   uint32_t Nl, Nh;
   uint32_t data[16];
   unsigned int num;
diff --git a/include/openssl/sha.h b/include/openssl/sha.h
index 58f5424..ac2ab75 100644
--- a/include/openssl/sha.h
+++ b/include/openssl/sha.h
@@ -98,7 +98,7 @@
 OPENSSL_EXPORT void SHA1_Transform(SHA_CTX *sha, const uint8_t *block);
 
 struct sha_state_st {
-  uint32_t h[5];
+  uint32_t h0, h1, h2, h3, h4;
   uint32_t Nl, Nh;
   uint32_t data[16];
   unsigned int num;