Rewrite crypto_md32_* with templates instead of function pointers Speculative improvement for b/413675390, but there is insufficient information in the bug to really be sure, and benchmarking does not seem to give consistent results. It seems that crypto_md32_* are not getting inlined when being built with the NDK, which means we're not specializing by block size and we're calling the block data functions indirectly. It's unclear what changed here, as this code has been the same for a while. The root cause might have been a compiler change. Either way, switching to templates avoids tempting the compiler into doing this, without the mess of macros that we had a while ago. Inspecting the assembly, this seems to fix the codegen, but benchmarking performance on my test device is very inconsistent. Change-Id: Ib7c1e97d4d7a3e3b82a8cc5f0418b8b1100c330d Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/78989 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: David Benjamin <davidben@google.com> Auto-Submit: David Benjamin <davidben@google.com>

commit: e88e5069694dd6da6398dab7f020d6c271c3022f [log] [tgz]
author: David Benjamin <davidben@google.com> Wed Apr 30 19:44:56 2025 -0400
committer: Boringssl LUCI CQ <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com> Thu May 01 12:19:04 2025 -0700
tree: 8d3ab3d62d50274e8496a9412832c65938ea3c3f
parent: 7df46880add8c863b8526e5228b0dc28b9a28f59 [diff]
diff --git a/crypto/fipsmodule/digest/md32_common.h b/crypto/fipsmodule/digest/md32_common.h
index b64f20b..9e237b2 100644
--- a/crypto/fipsmodule/digest/md32_common.h
+++ b/crypto/fipsmodule/digest/md32_common.h

@@ -16,14 +16,13 @@
 #define OPENSSL_HEADER_CRYPTO_FIPSMODULE_DIGEST_MD32_COMMON_H
 
 #include <openssl/base.h>
+#include <openssl/span.h>
 
 #include <assert.h>
 
 #include "../../internal.h"
 
-#if defined(__cplusplus)
-extern "C" {
-#endif
+BSSL_NAMESPACE_BEGIN
 
 
 // This is a generic 32-bit "collector" for message digest algorithms. It
@@ -48,70 +47,80 @@
 // |h| is the hash state and is updated by a function of type
 // |crypto_md32_block_func|. |data| is the partial unprocessed block and has
 // |num| bytes. |Nl| and |Nh| maintain the number of bits processed so far.
+//
+// The template parameter is then a traits struct defined as follows:
+//
+//     struct HashTraits {
+//       // HashContext is the hash type defined above.
+//       using HashContext = <NAME>_CTX;
+//
+//       // kBlockSize is the block size of the hash function.
+//       static constexpr size_t kBlockSize = <block size>;
+//
+//       // kLengthIsBigEndian determines whether the final length is encoded in
+//       // big or little endian.
+//       static constexpr bool kLengthIsBigEndian = ...;
+//
+//       // HashBlocks incorporates |num_blocks| blocks of input from |data|
+//       // into |state|. It is assumed the caller has sized |state| and |data|
+//       // for the hash function.
+//       static void HashBlocks(uint32_t *state, const uint8_t *data,
+//                              size_t num_blocks) {
+//         <name>_block_data_order(state, data, num_blocks);
+//       }
+//     };
+//
+// The reason for this formulation is to encourage the compiler to specialize
+// all the code for the block size and block function.
 
-// A crypto_md32_block_func should incorporate |num_blocks| of input from |data|
-// into |state|. It is assumed the caller has sized |state| and |data| for the
-// hash function.
-typedef void (*crypto_md32_block_func)(uint32_t *state, const uint8_t *data,
-                                       size_t num_blocks);
-
-// crypto_md32_update adds |len| bytes from |in| to the digest. |data| must be a
-// buffer of length |block_size| with the first |*num| bytes containing a
-// partial block. This function combines the partial block with |in| and
-// incorporates any complete blocks into the digest state |h|. It then updates
-// |data| and |*num| with the new partial block and updates |*Nh| and |*Nl| with
-// the data consumed.
-static inline void crypto_md32_update(crypto_md32_block_func block_func,
-                                      uint32_t *h, uint8_t *data,
-                                      size_t block_size, unsigned *num,
-                                      uint32_t *Nh, uint32_t *Nl,
-                                      const uint8_t *in, size_t len) {
-  if (len == 0) {
+// crypto_md32_update hashes |in| to |ctx|.
+template <typename Traits>
+inline void crypto_md32_update(typename Traits::HashContext *ctx,
+                               Span<const uint8_t> in) {
+  static_assert(Traits::kBlockSize == sizeof(ctx->data), "block size is wrong");
+  if (in.empty()) {
     return;
   }
 
-  uint32_t l = *Nl + (((uint32_t)len) << 3);
-  if (l < *Nl) {
+  uint32_t l = ctx->Nl + ((static_cast<uint32_t>(in.size())) << 3);
+  if (l < ctx->Nl) {
     // Handle carries.
-    (*Nh)++;
+    ctx->Nh++;
   }
-  *Nh += (uint32_t)(len >> 29);
-  *Nl = l;
+  ctx->Nh += static_cast<uint32_t>(in.size() >> 29);
+  ctx->Nl = l;
 
-  size_t n = *num;
+  size_t n = ctx->num;
   if (n != 0) {
-    if (len >= block_size || len + n >= block_size) {
-      OPENSSL_memcpy(data + n, in, block_size - n);
-      block_func(h, data, 1);
-      n = block_size - n;
-      in += n;
-      len -= n;
-      *num = 0;
+    if (in.size() >= Traits::kBlockSize ||
+        in.size() + n >= Traits::kBlockSize) {
+      OPENSSL_memcpy(ctx->data + n, in.data(), Traits::kBlockSize - n);
+      Traits::HashBlocks(ctx->h, ctx->data, 1);
+      in = in.subspan(Traits::kBlockSize - n);
+      ctx->num = 0;
       // Keep |data| zeroed when unused.
-      OPENSSL_memset(data, 0, block_size);
+      OPENSSL_memset(ctx->data, 0, Traits::kBlockSize);
     } else {
-      OPENSSL_memcpy(data + n, in, len);
-      *num += (unsigned)len;
+      OPENSSL_memcpy(ctx->data + n, in.data(), in.size());
+      ctx->num += static_cast<unsigned>(in.size());
       return;
     }
   }
 
-  n = len / block_size;
+  n = in.size() / Traits::kBlockSize;
   if (n > 0) {
-    block_func(h, in, n);
-    n *= block_size;
-    in += n;
-    len -= n;
+    Traits::HashBlocks(ctx->h, in.data(), n);
+    in = in.subspan(n * Traits::kBlockSize);
   }
 
-  if (len != 0) {
-    *num = (unsigned)len;
-    OPENSSL_memcpy(data, in, len);
+  if (!in.empty()) {
+    ctx->num = static_cast<unsigned>(in.size());
+    OPENSSL_memcpy(ctx->data, in.data(), in.size());
   }
 }
 
 // crypto_md32_final incorporates the partial block and trailing length into the
-// digest state |h|. The trailing length is encoded in little-endian if
+// digest state in |ctx|. The trailing length is encoded in little-endian if
 // |is_big_endian| is zero and big-endian otherwise. |data| must be a buffer of
 // length |block_size| with the first |*num| bytes containing a partial block.
 // |Nh| and |Nl| contain the total number of bits processed. On return, this
@@ -120,42 +129,38 @@
 //
 // This function does not serialize |h| into a final digest. This is the
 // responsibility of the caller.
-static inline void crypto_md32_final(crypto_md32_block_func block_func,
-                                     uint32_t *h, uint8_t *data,
-                                     size_t block_size, unsigned *num,
-                                     uint32_t Nh, uint32_t Nl,
-                                     int is_big_endian) {
+template <typename Traits>
+inline void crypto_md32_final(typename Traits::HashContext *ctx) {
+  static_assert(Traits::kBlockSize == sizeof(ctx->data), "block size is wrong");
   // |data| always has room for at least one byte. A full block would have
   // been consumed.
-  size_t n = *num;
-  assert(n < block_size);
-  data[n] = 0x80;
+  size_t n = ctx->num;
+  assert(n < Traits::kBlockSize);
+  ctx->data[n] = 0x80;
   n++;
 
   // Fill the block with zeros if there isn't room for a 64-bit length.
-  if (n > block_size - 8) {
-    OPENSSL_memset(data + n, 0, block_size - n);
+  if (n > Traits::kBlockSize - 8) {
+    OPENSSL_memset(ctx->data + n, 0, Traits::kBlockSize - n);
     n = 0;
-    block_func(h, data, 1);
+    Traits::HashBlocks(ctx->h, ctx->data, 1);
   }
-  OPENSSL_memset(data + n, 0, block_size - 8 - n);
+  OPENSSL_memset(ctx->data + n, 0, Traits::kBlockSize - 8 - n);
 
   // Append a 64-bit length to the block and process it.
-  if (is_big_endian) {
-    CRYPTO_store_u32_be(data + block_size - 8, Nh);
-    CRYPTO_store_u32_be(data + block_size - 4, Nl);
+  if constexpr (Traits::kLengthIsBigEndian) {
+    CRYPTO_store_u32_be(ctx->data + Traits::kBlockSize - 8, ctx->Nh);
+    CRYPTO_store_u32_be(ctx->data + Traits::kBlockSize - 4, ctx->Nl);
   } else {
-    CRYPTO_store_u32_le(data + block_size - 8, Nl);
-    CRYPTO_store_u32_le(data + block_size - 4, Nh);
+    CRYPTO_store_u32_le(ctx->data + Traits::kBlockSize - 8, ctx->Nl);
+    CRYPTO_store_u32_le(ctx->data + Traits::kBlockSize - 4, ctx->Nh);
   }
-  block_func(h, data, 1);
-  *num = 0;
-  OPENSSL_memset(data, 0, block_size);
+  Traits::HashBlocks(ctx->h, ctx->data, 1);
+  ctx->num = 0;
+  OPENSSL_memset(ctx->data, 0, Traits::kBlockSize);
 }
 
 
-#if defined(__cplusplus)
-}  // extern C
-#endif
+BSSL_NAMESPACE_END
 
 #endif  // OPENSSL_HEADER_CRYPTO_FIPSMODULE_DIGEST_MD32_COMMON_H

diff --git a/crypto/fipsmodule/sha/sha1.cc.inc b/crypto/fipsmodule/sha/sha1.cc.inc
index ca591fd..530ab93 100644
--- a/crypto/fipsmodule/sha/sha1.cc.inc
+++ b/crypto/fipsmodule/sha/sha1.cc.inc

@@ -15,6 +15,7 @@
 #include <string.h>
 
 #include <openssl/mem.h>
+#include <openssl/span.h>
 
 #include "../../internal.h"
 #include "../bcm_interface.h"
@@ -43,10 +44,21 @@
   return bcm_infallible::approved;
 }
 
+namespace {
+struct SHA1Traits {
+  using HashContext = SHA_CTX;
+  static constexpr size_t kBlockSize = BCM_SHA_CBLOCK;
+  static constexpr bool kLengthIsBigEndian = true;
+  static void HashBlocks(uint32_t *state, const uint8_t *data,
+                         size_t num_blocks) {
+    sha1_block_data_order(state, data, num_blocks);
+  }
+};
+}  // namespace
+
 bcm_infallible BCM_sha1_update(SHA_CTX *c, const void *data, size_t len) {
-  crypto_md32_update(&sha1_block_data_order, c->h, c->data, SHA_CBLOCK, &c->num,
-                     &c->Nh, &c->Nl, reinterpret_cast<const uint8_t *>(data),
-                     len);
+  bssl::crypto_md32_update<SHA1Traits>(
+      c, bssl::Span(static_cast<const uint8_t *>(data), len));
   return bcm_infallible::approved;
 }
 
@@ -60,9 +72,7 @@
 }
 
 bcm_infallible BCM_sha1_final(uint8_t out[SHA_DIGEST_LENGTH], SHA_CTX *c) {
-  crypto_md32_final(&sha1_block_data_order, c->h, c->data, SHA_CBLOCK, &c->num,
-                    c->Nh, c->Nl, /*is_big_endian=*/1);
-
+  bssl::crypto_md32_final<SHA1Traits>(c);
   sha1_output_state(out, c);
   FIPS_service_indicator_update_state();
   return bcm_infallible::approved;

diff --git a/crypto/fipsmodule/sha/sha256.cc.inc b/crypto/fipsmodule/sha/sha256.cc.inc
index 1ce4210..a1a22a9 100644
--- a/crypto/fipsmodule/sha/sha256.cc.inc
+++ b/crypto/fipsmodule/sha/sha256.cc.inc

@@ -15,6 +15,7 @@
 #include <string.h>
 
 #include <openssl/mem.h>
+#include <openssl/span.h>
 
 #include "../../internal.h"
 #include "../bcm_interface.h"
@@ -62,10 +63,21 @@
   return bcm_infallible::approved;
 }
 
+namespace {
+struct SHA256Traits {
+  using HashContext = SHA256_CTX;
+  static constexpr size_t kBlockSize = BCM_SHA256_CBLOCK;
+  static constexpr bool kLengthIsBigEndian = true;
+  static void HashBlocks(uint32_t *state, const uint8_t *data,
+                         size_t num_blocks) {
+    sha256_block_data_order(state, data, num_blocks);
+  }
+};
+}  // namespace
+
 bcm_infallible BCM_sha256_update(SHA256_CTX *c, const void *data, size_t len) {
-  crypto_md32_update(&sha256_block_data_order, c->h, c->data, BCM_SHA256_CBLOCK,
-                     &c->num, &c->Nh, &c->Nl,
-                     reinterpret_cast<const uint8_t *>(data), len);
+  bssl::crypto_md32_update<SHA256Traits>(
+      c, bssl::Span(static_cast<const uint8_t *>(data), len));
   return bcm_infallible::approved;
 }
 
@@ -75,8 +87,7 @@
 }
 
 static void sha256_final_impl(uint8_t *out, size_t md_len, SHA256_CTX *c) {
-  crypto_md32_final(&sha256_block_data_order, c->h, c->data, BCM_SHA256_CBLOCK,
-                    &c->num, c->Nh, c->Nl, /*is_big_endian=*/1);
+  bssl::crypto_md32_final<SHA256Traits>(c);
 
   BSSL_CHECK(md_len <= BCM_SHA256_DIGEST_LENGTH);
 

diff --git a/crypto/md4/md4.cc b/crypto/md4/md4.cc
index ae6dea8..10a8130 100644
--- a/crypto/md4/md4.cc
+++ b/crypto/md4/md4.cc

@@ -17,6 +17,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <openssl/span.h>
+
 #include "../fipsmodule/digest/md32_common.h"
 #include "../internal.h"
 
@@ -48,17 +50,26 @@
   md4_block_data_order(c->h, data, 1);
 }
 
+namespace {
+struct MD4Traits {
+  using HashContext = MD4_CTX;
+  static constexpr size_t kBlockSize = MD4_CBLOCK;
+  static constexpr bool kLengthIsBigEndian = false;
+  static void HashBlocks(uint32_t *state, const uint8_t *data,
+                         size_t num_blocks) {
+    md4_block_data_order(state, data, num_blocks);
+  }
+};
+}  // namespace
+
 int MD4_Update(MD4_CTX *c, const void *data, size_t len) {
-  crypto_md32_update(&md4_block_data_order, c->h, c->data, MD4_CBLOCK, &c->num,
-                     &c->Nh, &c->Nl, reinterpret_cast<const uint8_t *>(data),
-                     len);
+  bssl::crypto_md32_update<MD4Traits>(
+      c, bssl::Span(static_cast<const uint8_t *>(data), len));
   return 1;
 }
 
 int MD4_Final(uint8_t out[MD4_DIGEST_LENGTH], MD4_CTX *c) {
-  crypto_md32_final(&md4_block_data_order, c->h, c->data, MD4_CBLOCK, &c->num,
-                    c->Nh, c->Nl, /*is_big_endian=*/0);
-
+  bssl::crypto_md32_final<MD4Traits>(c);
   CRYPTO_store_u32_le(out, c->h[0]);
   CRYPTO_store_u32_le(out + 4, c->h[1]);
   CRYPTO_store_u32_le(out + 8, c->h[2]);

diff --git a/crypto/md5/md5.cc b/crypto/md5/md5.cc
index 89444e9..3bd2a2b 100644
--- a/crypto/md5/md5.cc
+++ b/crypto/md5/md5.cc

@@ -17,6 +17,7 @@
 #include <string.h>
 
 #include <openssl/mem.h>
+#include <openssl/span.h>
 
 #include "../fipsmodule/digest/md32_common.h"
 #include "../internal.h"
@@ -52,17 +53,26 @@
   md5_block_data_order(c->h, data, 1);
 }
 
+namespace {
+struct MD5Traits {
+  using HashContext = MD5_CTX;
+  static constexpr size_t kBlockSize = MD5_CBLOCK;
+  static constexpr bool kLengthIsBigEndian = false;
+  static void HashBlocks(uint32_t *state, const uint8_t *data,
+                         size_t num_blocks) {
+    md5_block_data_order(state, data, num_blocks);
+  }
+};
+}  // namespace
+
 int MD5_Update(MD5_CTX *c, const void *data, size_t len) {
-  crypto_md32_update(&md5_block_data_order, c->h, c->data, MD5_CBLOCK, &c->num,
-                     &c->Nh, &c->Nl, reinterpret_cast<const uint8_t *>(data),
-                     len);
+  bssl::crypto_md32_update<MD5Traits>(
+      c, bssl::Span(static_cast<const uint8_t *>(data), len));
   return 1;
 }
 
 int MD5_Final(uint8_t out[MD5_DIGEST_LENGTH], MD5_CTX *c) {
-  crypto_md32_final(&md5_block_data_order, c->h, c->data, MD5_CBLOCK, &c->num,
-                    c->Nh, c->Nl, /*is_big_endian=*/0);
-
+  bssl::crypto_md32_final<MD5Traits>(c);
   CRYPTO_store_u32_le(out, c->h[0]);
   CRYPTO_store_u32_le(out + 4, c->h[1]);
   CRYPTO_store_u32_le(out + 8, c->h[2]);

diff --git a/decrepit/ripemd/ripemd.cc b/decrepit/ripemd/ripemd.cc
index 716a14e..dbbb2b3 100644
--- a/decrepit/ripemd/ripemd.cc
+++ b/decrepit/ripemd/ripemd.cc

@@ -44,18 +44,26 @@
   ripemd160_block_data_order(c->h, data, 1);
 }
 
+namespace {
+struct RIPEMD160Traits {
+  using HashContext = RIPEMD160_CTX;
+  static constexpr size_t kBlockSize = RIPEMD160_CBLOCK;
+  static constexpr bool kLengthIsBigEndian = false;
+  static void HashBlocks(uint32_t *state, const uint8_t *data,
+                         size_t num_blocks) {
+    ripemd160_block_data_order(state, data, num_blocks);
+  }
+};
+}  // namespace
+
 int RIPEMD160_Update(RIPEMD160_CTX *c, const void *data, size_t len) {
-  crypto_md32_update(&ripemd160_block_data_order, c->h, c->data,
-                     RIPEMD160_CBLOCK, &c->num, &c->Nh, &c->Nl,
-                     reinterpret_cast<const uint8_t *>(data), len);
+  bssl::crypto_md32_update<RIPEMD160Traits>(
+      c, bssl::Span(static_cast<const uint8_t *>(data), len));
   return 1;
 }
 
 int RIPEMD160_Final(uint8_t out[RIPEMD160_DIGEST_LENGTH], RIPEMD160_CTX *c) {
-  crypto_md32_final(&ripemd160_block_data_order, c->h, c->data,
-                    RIPEMD160_CBLOCK, &c->num, c->Nh, c->Nl,
-                    /*is_big_endian=*/0);
-
+  bssl::crypto_md32_final<RIPEMD160Traits>(c);
   CRYPTO_store_u32_le(out, c->h[0]);
   CRYPTO_store_u32_le(out + 4, c->h[1]);
   CRYPTO_store_u32_le(out + 8, c->h[2]);
commit	e88e5069694dd6da6398dab7f020d6c271c3022f	[log] [tgz]
author	David Benjamin <davidben@google.com>	Wed Apr 30 19:44:56 2025 -0400
committer	Boringssl LUCI CQ <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com>	Thu May 01 12:19:04 2025 -0700
tree	8d3ab3d62d50274e8496a9412832c65938ea3c3f
parent	7df46880add8c863b8526e5228b0dc28b9a28f59 [diff]