Improve crypto/digest/md32_common.h mechanism.

The documentation in md32_common.h is now (more) correct with respect
to the most important details of the layout of |HASH_CTX|. The
documentation explaining why sha512.c doesn't use md32_common.h is now
more accurate as well.

Before, the C implementations of HASH_BLOCK_DATA_ORDER took a pointer
to the |HASH_CTX| and the assembly language implementations took a
pointer to the hash state |h| member of |HASH_CTX|. (This worked
because |h| is always the first member of |HASH_CTX|.) Now, the C
implementations take a pointer directly to |h| too.

The definitions of |MD4_CTX|, |MD5_CTX|, and |SHA1_CTX| were changed to
be consistent with |SHA256_CTX| and |SHA512_CTX| in storing the hash
state in an array. This will break source compatibility with any
external code that accesses the hash state directly, but will not
affect binary compatibility.

The second parameter of |HASH_BLOCK_DATA_ORDER| is now of type
|const uint8_t *|; previously it was |void *| and all implementations
had a |uint8_t *data| variable to access it as an array of bytes.

This change paves the way for future refactorings such as automatically
generating the |*_Init| functions and/or sharing one I-U-F
implementation across all digest algorithms.

Change-Id: I6e9dd09ff057c67941021d324a4fa1d39f58b0db
Reviewed-on: https://boringssl-review.googlesource.com/6405
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/digest/md32_common.h b/crypto/digest/md32_common.h
index 14607fb..d213476 100644
--- a/crypto/digest/md32_common.h
+++ b/crypto/digest/md32_common.h
@@ -58,49 +58,54 @@
 
 #define asm __asm__
 
-/* This is a generic 32 bit "collector" for message digest algorithms.
- * Whenever needed it collects input character stream into chunks of
- * 32 bit values and invokes a block function that performs actual hash
- * calculations.
+/* This is a generic 32-bit "collector" for message digest algorithms. It
+ * collects input character stream into chunks of 32-bit values and invokes the
+ * block function that performs the actual hash calculations. To make use of
+ * this mechanism, the following macros must be defined before including
+ * md32_common.h.
  *
- * Porting guide.
+ * One of |DATA_ORDER_IS_BIG_ENDIAN| or |DATA_ORDER_IS_LITTLE_ENDIAN| must be
+ * defined to specify the byte order of the input stream.
  *
- * Obligatory macros:
+ * |HASH_CBLOCK| must be defined as the integer block size, in bytes.
  *
- * DATA_ORDER_IS_BIG_ENDIAN or DATA_ORDER_IS_LITTLE_ENDIAN
- *	this macro defines byte order of input stream.
- * HASH_CBLOCK
- *	size of a unit chunk HASH_BLOCK operates on.
- * HASH_LONG
- *	has to be at least 32 bit wide.
- * HASH_CTX
- *	context structure that at least contains following
- *	members:
- *		typedef struct {
- *			...
- *			HASH_LONG	Nl,Nh;
- *			either {
- *			HASH_LONG	data[HASH_LBLOCK];
- *			unsigned char	data[HASH_CBLOCK];
- *			};
- *			unsigned int	num;
- *			...
- *			} HASH_CTX;
- *	data[] vector is expected to be zeroed upon first call to
- *	HASH_UPDATE.
- * HASH_UPDATE
- *	name of "Update" function, implemented here.
- * HASH_TRANSFORM
- *	name of "Transform" function, implemented here.
- * HASH_FINAL
- *	name of "Final" function, implemented here.
- * HASH_BLOCK_DATA_ORDER
- *	name of "block" function capable of treating *unaligned* input
- *	message in original (data) byte order, implemented externally.
- * HASH_MAKE_STRING
- *	macro convering context variables to an ASCII hash string.
+ * |HASH_CTX| must be defined as the name of the context structure, which must
+ * have at least the following members:
  *
- *					<appro@fy.chalmers.se>
+ *     typedef struct <name>_state_st {
+ *       uint32_t h[<chaining length> / sizeof(uint32_t)];
+ *       uint32_t Nl,Nh;
+ *       uint32_t data[HASH_CBLOCK / sizeof(uint32_t)];
+ *       unsigned int num
+ *       ...
+ *     } <NAME>_CTX;
+ *
+ * <chaining length> is the output length of the hash in bytes, before
+ * any truncation (e.g. 64 for SHA-224 and SHA-256, 128 for SHA-384 and SHA-512).
+ *
+ * |HASH_UPDATE| must be defined as the name of the "Update" function to
+ * generate.
+ *
+ * |HASH_TRANSFORM| must be defined as the  the name of the "Transform"
+ * function to generate.
+ *
+ * |HASH_FINAL| must be defined as the name of "Final" function to generate.
+ *
+ * |HASH_BLOCK_DATA_ORDER| must be defined as the name of the "Block" function.
+ * That function must be implemented manually. It must be capable of operating
+ * on *unaligned* input data in its original (data) byte order. It must have
+ * this signature:
+ *
+ *     void HASH_BLOCK_DATA_ORDER(uint32_t *state, const uint8_t *data,
+ *                                size_t num);
+ *
+ * It must update the hash state |state| with |num| blocks of data from |data|,
+ * where each block is |HASH_CBLOCK| bytes; i.e. |data| points to a array of
+ * |HASH_CBLOCK * num| bytes. |state| points to the |h| member of a |HASH_CTX|,
+ * and so will have |<chaining length> / sizeof(uint32_t)| elements.
+ *
+ * |HASH_MAKE_STRING(c, s)| must be defined as a block statement that converts
+ * the hash state |c->h| into the output byte order, storing the result in |s|.
  */
 
 #if !defined(DATA_ORDER_IS_BIG_ENDIAN) && !defined(DATA_ORDER_IS_LITTLE_ENDIAN)
@@ -110,9 +115,6 @@
 #ifndef HASH_CBLOCK
 #error "HASH_CBLOCK must be defined!"
 #endif
-#ifndef HASH_LONG
-#error "HASH_LONG must be defined!"
-#endif
 #ifndef HASH_CTX
 #error "HASH_CTX must be defined!"
 #endif
@@ -243,17 +245,17 @@
 	{
 	const uint8_t *data=data_;
 	uint8_t *p;
-	HASH_LONG l;
+	uint32_t l;
 	size_t n;
 
 	if (len==0) return 1;
 
-	l=(c->Nl+(((HASH_LONG)len)<<3))&0xffffffffUL;
+	l=(c->Nl+(((uint32_t)len)<<3))&0xffffffffUL;
 	/* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
 	 * Wei Dai <weidai@eskimo.com> for pointing it out. */
 	if (l < c->Nl) /* overflow */
 		c->Nh++;
-	c->Nh+=(HASH_LONG)(len>>29);	/* might cause compiler warning on 16-bit */
+	c->Nh+=(uint32_t)(len>>29);	/* might cause compiler warning on 16-bit */
 	c->Nl=l;
 
 	n = c->num;
@@ -264,7 +266,7 @@
 		if (len >= HASH_CBLOCK || len+n >= HASH_CBLOCK)
 			{
 			memcpy (p+n,data,HASH_CBLOCK-n);
-			HASH_BLOCK_DATA_ORDER (c,p,1);
+			HASH_BLOCK_DATA_ORDER (c->h,p,1);
 			n      = HASH_CBLOCK-n;
 			data  += n;
 			len   -= n;
@@ -282,7 +284,7 @@
 	n = len/HASH_CBLOCK;
 	if (n > 0)
 		{
-		HASH_BLOCK_DATA_ORDER (c,data,n);
+		HASH_BLOCK_DATA_ORDER (c->h,data,n);
 		n    *= HASH_CBLOCK;
 		data += n;
 		len  -= n;
@@ -300,7 +302,7 @@
 
 void HASH_TRANSFORM (HASH_CTX *c, const uint8_t *data)
 	{
-	HASH_BLOCK_DATA_ORDER (c,data,1);
+	HASH_BLOCK_DATA_ORDER (c->h,data,1);
 	}
 
 
@@ -316,7 +318,7 @@
 		{
 		memset (p+n,0,HASH_CBLOCK-n);
 		n=0;
-		HASH_BLOCK_DATA_ORDER (c,p,1);
+		HASH_BLOCK_DATA_ORDER (c->h,p,1);
 		}
 	memset (p+n,0,HASH_CBLOCK-8-n);
 
@@ -329,7 +331,7 @@
 	(void)HOST_l2c(c->Nh,p);
 #endif
 	p -= HASH_CBLOCK;
-	HASH_BLOCK_DATA_ORDER (c,p,1);
+	HASH_BLOCK_DATA_ORDER (c->h,p,1);
 	c->num=0;
 	memset (p,0,HASH_CBLOCK);