Document and fix up name hashing functions

These return 32-bit hashes, so they should return a platform-independent
uint32_t. I've categorized X509_issuer_name_hash and friends under
"convenience" functions. X509_NAME_hash and X509_NAME_hash_old are as
yet unclassified. Since the hash function is only relevant to
X509_LOOKUP_hash_dir, I'm thinking I'll put them with that, once that's
organized.

While I'm here, simplify the implementations of these functions. The
hash operation itself can be made infallible and allocation-free easily.
However the function itself is still fallible (and non-const, and not
thread-safe) due to the cached encoding mess. X509Test.NameHash captures
existing hash values, so we'd notice if this changed the output.

Update-Note: This is source-compatible for C/C++, including with
-Wconversion, but some bindings need a patch in cl/588632028 to be
compatible.

Bug: 426
Change-Id: I9bfd3f1093ab15c44d8cb2d81d53aeb3d6e49fc9
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/64647
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Bob Beck <bbe@google.com>
diff --git a/crypto/x509/by_dir.c b/crypto/x509/by_dir.c
index 64cec53..f5e4787 100644
--- a/crypto/x509/by_dir.c
+++ b/crypto/x509/by_dir.c
@@ -54,6 +54,7 @@
  * copied and put under another distribution licence
  * [including the GNU Public Licence.] */
 
+#include <inttypes.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -68,7 +69,7 @@
 #include "internal.h"
 
 typedef struct lookup_dir_hashes_st {
-  unsigned long hash;
+  uint32_t hash;
   int suffix;
 } BY_DIR_HASH;
 
@@ -246,8 +247,8 @@
   int ok = 0;
   size_t i;
   int j, k;
-  unsigned long h;
-  unsigned long hash_array[2];
+  uint32_t h;
+  uint32_t hash_array[2];
   int hash_index;
   BUF_MEM *b = NULL;
   X509_OBJECT stmp, *tmp;
@@ -309,7 +310,8 @@
         hent = NULL;
       }
       for (;;) {
-        snprintf(b->data, b->max, "%s/%08lx.%s%d", ent->dir, h, postfix, k);
+        snprintf(b->data, b->max, "%s/%08" PRIx32 ".%s%d", ent->dir, h, postfix,
+                 k);
 #ifndef OPENSSL_NO_POSIX_IO
 #if defined(_WIN32) && !defined(stat)
 #define stat _stat
diff --git a/crypto/x509/internal.h b/crypto/x509/internal.h
index afc6cd7..b277eb0 100644
--- a/crypto/x509/internal.h
+++ b/crypto/x509/internal.h
@@ -105,7 +105,6 @@
   STACK_OF(X509_NAME_ENTRY) *entries;
   int modified;  // true if 'bytes' needs to be built
   BUF_MEM *bytes;
-  // unsigned long hash; Keep the hash around for lookups
   unsigned char *canon_enc;
   int canon_enclen;
 } /* X509_NAME */;
diff --git a/crypto/x509/x509_cmp.c b/crypto/x509/x509_cmp.c
index a934e39..714212a 100644
--- a/crypto/x509/x509_cmp.c
+++ b/crypto/x509/x509_cmp.c
@@ -60,7 +60,9 @@
 #include <openssl/digest.h>
 #include <openssl/err.h>
 #include <openssl/mem.h>
+#include <openssl/md5.h>
 #include <openssl/obj.h>
+#include <openssl/sha.h>
 #include <openssl/stack.h>
 #include <openssl/x509.h>
 
@@ -88,11 +90,11 @@
   return a->cert_info->issuer;
 }
 
-unsigned long X509_issuer_name_hash(X509 *x) {
-  return (X509_NAME_hash(x->cert_info->issuer));
+uint32_t X509_issuer_name_hash(X509 *x) {
+  return X509_NAME_hash(x->cert_info->issuer);
 }
 
-unsigned long X509_issuer_name_hash_old(X509 *x) {
+uint32_t X509_issuer_name_hash_old(X509 *x) {
   return (X509_NAME_hash_old(x->cert_info->issuer));
 }
 
@@ -108,12 +110,12 @@
   return x509->cert_info->serialNumber;
 }
 
-unsigned long X509_subject_name_hash(X509 *x) {
-  return (X509_NAME_hash(x->cert_info->subject));
+uint32_t X509_subject_name_hash(X509 *x) {
+  return X509_NAME_hash(x->cert_info->subject);
 }
 
-unsigned long X509_subject_name_hash_old(X509 *x) {
-  return (X509_NAME_hash_old(x->cert_info->subject));
+uint32_t X509_subject_name_hash_old(X509 *x) {
+  return X509_NAME_hash_old(x->cert_info->subject);
 }
 
 // Compare two certificates: they must be identical for this to work. NB:
@@ -165,44 +167,29 @@
   return OPENSSL_memcmp(a->canon_enc, b->canon_enc, a->canon_enclen);
 }
 
-unsigned long X509_NAME_hash(X509_NAME *x) {
-  unsigned long ret = 0;
-  unsigned char md[SHA_DIGEST_LENGTH];
-
-  // Make sure X509_NAME structure contains valid cached encoding
-  i2d_X509_NAME(x, NULL);
-  if (!EVP_Digest(x->canon_enc, x->canon_enclen, md, NULL, EVP_sha1(), NULL)) {
+uint32_t X509_NAME_hash(X509_NAME *x) {
+  // Make sure the X509_NAME structure contains a valid cached encoding.
+  if (i2d_X509_NAME(x, NULL) < 0) {
     return 0;
   }
 
-  ret = (((unsigned long)md[0]) | ((unsigned long)md[1] << 8L) |
-         ((unsigned long)md[2] << 16L) | ((unsigned long)md[3] << 24L)) &
-        0xffffffffL;
-  return ret;
+  uint8_t md[SHA_DIGEST_LENGTH];
+  SHA1(x->canon_enc, x->canon_enclen, md);
+  return CRYPTO_load_u32_le(md);
 }
 
 // I now DER encode the name and hash it.  Since I cache the DER encoding,
 // this is reasonably efficient.
 
-unsigned long X509_NAME_hash_old(X509_NAME *x) {
-  EVP_MD_CTX md_ctx;
-  unsigned long ret = 0;
-  unsigned char md[16];
-
-  // Make sure X509_NAME structure contains valid cached encoding
-  i2d_X509_NAME(x, NULL);
-  EVP_MD_CTX_init(&md_ctx);
-  // EVP_MD_CTX_set_flags(&md_ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);
-  if (EVP_DigestInit_ex(&md_ctx, EVP_md5(), NULL) &&
-      EVP_DigestUpdate(&md_ctx, x->bytes->data, x->bytes->length) &&
-      EVP_DigestFinal_ex(&md_ctx, md, NULL)) {
-    ret = (((unsigned long)md[0]) | ((unsigned long)md[1] << 8L) |
-           ((unsigned long)md[2] << 16L) | ((unsigned long)md[3] << 24L)) &
-          0xffffffffL;
+uint32_t X509_NAME_hash_old(X509_NAME *x) {
+  // Make sure the X509_NAME structure contains a valid cached encoding.
+  if (i2d_X509_NAME(x, NULL) < 0) {
+    return 0;
   }
-  EVP_MD_CTX_cleanup(&md_ctx);
 
-  return ret;
+  uint8_t md[SHA_DIGEST_LENGTH];
+  MD5((const uint8_t *)x->bytes->data, x->bytes->length, md);
+  return CRYPTO_load_u32_le(md);
 }
 
 X509 *X509_find_by_issuer_and_serial(const STACK_OF(X509) *sk, X509_NAME *name,
diff --git a/crypto/x509/x509_test.cc b/crypto/x509/x509_test.cc
index d30f9bb..f8c930f 100644
--- a/crypto/x509/x509_test.cc
+++ b/crypto/x509/x509_test.cc
@@ -2582,8 +2582,8 @@
 TEST(X509Test, NameHash) {
   struct {
     std::vector<uint8_t> name_der;
-    unsigned long hash;
-    unsigned long hash_old;
+    uint32_t hash;
+    uint32_t hash_old;
   } kTests[] = {
       // SEQUENCE {
       //   SET {
diff --git a/include/openssl/x509.h b/include/openssl/x509.h
index 989b709..66668a6 100644
--- a/include/openssl/x509.h
+++ b/include/openssl/x509.h
@@ -2831,6 +2831,22 @@
 // CRL, only the issuer fields using |X509_NAME_cmp|.
 OPENSSL_EXPORT int X509_CRL_cmp(const X509_CRL *a, const X509_CRL *b);
 
+// X509_issuer_name_hash returns the hash of |x509|'s issuer name with
+// |X509_NAME_hash|.
+OPENSSL_EXPORT uint32_t X509_issuer_name_hash(X509 *x509);
+
+// X509_subject_name_hash returns the hash of |x509|'s subject name with
+// |X509_NAME_hash|.
+OPENSSL_EXPORT uint32_t X509_subject_name_hash(X509 *x509);
+
+// X509_issuer_name_hash returns the hash of |x509|'s issuer name with
+// |X509_NAME_hash_old|.
+OPENSSL_EXPORT uint32_t X509_issuer_name_hash_old(X509 *x509);
+
+// X509_usjbect_name_hash returns the hash of |x509|'s usjbect name with
+// |X509_NAME_hash_old|.
+OPENSSL_EXPORT uint32_t X509_subject_name_hash_old(X509 *x509);
+
 
 // ex_data functions.
 //
@@ -3443,16 +3459,24 @@
 
 OPENSSL_EXPORT int X509_TRUST_set(int *t, int trust);
 
-OPENSSL_EXPORT unsigned long X509_issuer_name_hash(X509 *a);
-
-OPENSSL_EXPORT unsigned long X509_subject_name_hash(X509 *x);
-
-OPENSSL_EXPORT unsigned long X509_issuer_name_hash_old(X509 *a);
-OPENSSL_EXPORT unsigned long X509_subject_name_hash_old(X509 *x);
-
 OPENSSL_EXPORT int X509_cmp(const X509 *a, const X509 *b);
-OPENSSL_EXPORT unsigned long X509_NAME_hash(X509_NAME *x);
-OPENSSL_EXPORT unsigned long X509_NAME_hash_old(X509_NAME *x);
+
+// X509_NAME_hash returns a hash of |name|, or zero on error. This is the new
+// hash used by |X509_LOOKUP_hash_dir|.
+//
+// TODO(https://crbug.com/boringssl/407): This should be const and thread-safe
+// but currently is neither, notably if |name| was modified from its parsed
+// value.
+OPENSSL_EXPORT uint32_t X509_NAME_hash(X509_NAME *name);
+
+// X509_NAME_hash_old returns a hash of |name|, or zero on error. This is the
+// legacy hash used by |X509_LOOKUP_hash_dir|, which is still supported for
+// compatibility.
+//
+// TODO(https://crbug.com/boringssl/407): This should be const and thread-safe
+// but currently is neither, notably if |name| was modified from its parsed
+// value.
+OPENSSL_EXPORT uint32_t X509_NAME_hash_old(X509_NAME *name);
 
 OPENSSL_EXPORT int X509_CRL_match(const X509_CRL *a, const X509_CRL *b);