Extract MD5 from bcm

MD5 is no longer approved as part of fips, so we
move it to digest_extra.

Change-Id: I504c3d0d381cba72345c615209b99d4451886d96
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/70727
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
Auto-Submit: Bob Beck <bbe@google.com>
diff --git a/build.json b/build.json
index 51023ce..f3ac354 100644
--- a/build.json
+++ b/build.json
@@ -68,7 +68,6 @@
             "crypto/fipsmodule/ecdsa/ecdsa.c.inc",
             "crypto/fipsmodule/hkdf/hkdf.c.inc",
             "crypto/fipsmodule/hmac/hmac.c.inc",
-            "crypto/fipsmodule/md5/md5.c.inc",
             "crypto/fipsmodule/modes/cbc.c.inc",
             "crypto/fipsmodule/modes/cfb.c.inc",
             "crypto/fipsmodule/modes/ctr.c.inc",
@@ -125,7 +124,6 @@
             {"src": "crypto/fipsmodule/bn/asm/co-586.pl"},
             {"src": "crypto/fipsmodule/modes/asm/ghash-ssse3-x86.pl"},
             {"src": "crypto/fipsmodule/modes/asm/ghash-x86.pl"},
-            {"src": "crypto/fipsmodule/md5/asm/md5-586.pl"},
             {"src": "crypto/fipsmodule/sha/asm/sha1-586.pl"},
             {"src": "crypto/fipsmodule/sha/asm/sha256-586.pl"},
             {"src": "crypto/fipsmodule/sha/asm/sha512-586.pl"},
@@ -137,7 +135,6 @@
             {"src": "crypto/fipsmodule/aes/asm/aesni-x86_64.pl"},
             {"src": "crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl"},
             {"src": "crypto/fipsmodule/modes/asm/ghash-x86_64.pl"},
-            {"src": "crypto/fipsmodule/md5/asm/md5-x86_64.pl"},
             {"src": "crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl"},
             {"src": "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl"},
             {"src": "crypto/fipsmodule/rand/asm/rdrand-x86_64.pl"},
@@ -267,6 +264,7 @@
             "crypto/kyber/kyber.c",
             "crypto/lhash/lhash.c",
             "crypto/md4/md4.c",
+            "crypto/md5/md5.c",
             "crypto/mem.c",
             "crypto/mldsa/mldsa.c",
             "crypto/mlkem/mlkem.cc",
@@ -511,7 +509,6 @@
             "crypto/fipsmodule/ec/p256-nistz.h",
             "crypto/fipsmodule/ec/p256_table.h",
             "crypto/fipsmodule/ecdsa/internal.h",
-            "crypto/fipsmodule/md5/internal.h",
             "crypto/fipsmodule/modes/internal.h",
             "crypto/fipsmodule/rand/internal.h",
             "crypto/fipsmodule/rsa/internal.h",
@@ -524,6 +521,7 @@
             "crypto/keccak/internal.h",
             "crypto/kyber/internal.h",
             "crypto/lhash/internal.h",
+            "crypto/md5/internal.h",
             "crypto/mldsa/internal.h",
             "crypto/mlkem/internal.h",
             "crypto/obj/obj_dat.h",
@@ -570,12 +568,14 @@
             {"src": "crypto/chacha/asm/chacha-armv4.pl"}
         ],
         "perlasm_x86": [
-            {"src": "crypto/chacha/asm/chacha-x86.pl"}
+            {"src": "crypto/chacha/asm/chacha-x86.pl"},
+            {"src": "crypto/md5/asm/md5-586.pl"}
         ],
         "perlasm_x86_64": [
             {"src": "crypto/chacha/asm/chacha-x86_64.pl"},
             {"src": "crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl"},
-            {"src": "crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl"}
+            {"src": "crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl"},
+            {"src": "crypto/md5/asm/md5-x86_64.pl"}
         ]
     },
     "pki": {
@@ -828,7 +828,6 @@
             "crypto/fipsmodule/ec/p256_test.cc",
             "crypto/fipsmodule/ecdsa/ecdsa_test.cc",
             "crypto/fipsmodule/hkdf/hkdf_test.cc",
-            "crypto/fipsmodule/md5/md5_test.cc",
             "crypto/fipsmodule/modes/gcm_test.cc",
             "crypto/fipsmodule/rand/ctrdrbg_test.cc",
             "crypto/fipsmodule/service_indicator/service_indicator_test.cc",
@@ -840,6 +839,7 @@
             "crypto/keccak/keccak_test.cc",
             "crypto/kyber/kyber_test.cc",
             "crypto/lhash/lhash_test.cc",
+            "crypto/md5/md5_test.cc",
             "crypto/mlkem/mlkem_test.cc",
             "crypto/obj/obj_test.cc",
             "crypto/pem/pem_test.cc",
diff --git a/crypto/digest_extra/digest_extra.c b/crypto/digest_extra/digest_extra.c
index 5b9e26d..7033d9c 100644
--- a/crypto/digest_extra/digest_extra.c
+++ b/crypto/digest_extra/digest_extra.c
@@ -62,6 +62,7 @@
 #include <openssl/bytestring.h>
 #include <openssl/obj.h>
 #include <openssl/md4.h>
+#include <openssl/md5.h>
 #include <openssl/nid.h>
 
 #include "../asn1/internal.h"
@@ -291,3 +292,64 @@
 };
 
 const EVP_MD *EVP_md4(void) { return &evp_md_md4; }
+
+static void md5_init(EVP_MD_CTX *ctx) {
+  BSSL_CHECK(MD5_Init(ctx->md_data));
+}
+
+static void md5_update(EVP_MD_CTX *ctx, const void *data, size_t count) {
+  BSSL_CHECK(MD5_Update(ctx->md_data, data, count));
+}
+
+static void md5_final(EVP_MD_CTX *ctx, uint8_t *out) {
+  BSSL_CHECK(MD5_Final(out, ctx->md_data));
+}
+
+static const EVP_MD evp_md_md5 = {
+  NID_md5,
+  MD5_DIGEST_LENGTH,
+  0,
+  md5_init,
+  md5_update,
+  md5_final,
+  64,
+  sizeof(MD5_CTX),
+};
+
+const EVP_MD *EVP_md5(void) { return &evp_md_md5; }
+
+typedef struct {
+  MD5_CTX md5;
+  SHA_CTX sha1;
+} MD5_SHA1_CTX;
+
+static void md5_sha1_init(EVP_MD_CTX *md_ctx) {
+  MD5_SHA1_CTX *ctx = md_ctx->md_data;
+  BSSL_CHECK(MD5_Init(&ctx->md5) && SHA1_Init(&ctx->sha1));
+}
+
+static void md5_sha1_update(EVP_MD_CTX *md_ctx, const void *data,
+                            size_t count) {
+  MD5_SHA1_CTX *ctx = md_ctx->md_data;
+  BSSL_CHECK(MD5_Update(&ctx->md5, data, count) &&
+        SHA1_Update(&ctx->sha1, data, count));
+}
+
+static void md5_sha1_final(EVP_MD_CTX *md_ctx, uint8_t *out) {
+  MD5_SHA1_CTX *ctx = md_ctx->md_data;
+  BSSL_CHECK(MD5_Final(out, &ctx->md5) &&
+        SHA1_Final(out + MD5_DIGEST_LENGTH, &ctx->sha1));
+}
+
+const EVP_MD evp_md_md5_sha1 = {
+  NID_md5_sha1,
+  MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH,
+  0,
+  md5_sha1_init,
+  md5_sha1_update,
+  md5_sha1_final,
+  64,
+  sizeof(MD5_SHA1_CTX),
+};
+
+const EVP_MD *EVP_md5_sha1(void) { return &evp_md_md5_sha1; }
diff --git a/crypto/fipsmodule/bcm.c b/crypto/fipsmodule/bcm.c
index 5921ab6..81e03cb 100644
--- a/crypto/fipsmodule/bcm.c
+++ b/crypto/fipsmodule/bcm.c
@@ -85,7 +85,6 @@
 #include "ec/wnaf.c.inc"
 #include "hkdf/hkdf.c.inc"
 #include "hmac/hmac.c.inc"
-#include "md5/md5.c.inc"
 #include "modes/cbc.c.inc"
 #include "modes/cfb.c.inc"
 #include "modes/ctr.c.inc"
diff --git a/crypto/fipsmodule/digest/digests.c.inc b/crypto/fipsmodule/digest/digests.c.inc
index 638724b..effcbf5 100644
--- a/crypto/fipsmodule/digest/digests.c.inc
+++ b/crypto/fipsmodule/digest/digests.c.inc
@@ -59,7 +59,6 @@
 #include <assert.h>
 #include <string.h>
 
-#include <openssl/md5.h>
 #include <openssl/nid.h>
 #include <openssl/sha.h>
 
@@ -74,30 +73,6 @@
 #endif
 
 
-static void md5_init(EVP_MD_CTX *ctx) {
-  CHECK(MD5_Init(ctx->md_data));
-}
-
-static void md5_update(EVP_MD_CTX *ctx, const void *data, size_t count) {
-  CHECK(MD5_Update(ctx->md_data, data, count));
-}
-
-static void md5_final(EVP_MD_CTX *ctx, uint8_t *out) {
-  CHECK(MD5_Final(out, ctx->md_data));
-}
-
-DEFINE_METHOD_FUNCTION(EVP_MD, EVP_md5) {
-  out->type = NID_md5;
-  out->md_size = MD5_DIGEST_LENGTH;
-  out->flags = 0;
-  out->init = md5_init;
-  out->update = md5_update;
-  out->final = md5_final;
-  out->block_size = 64;
-  out->ctx_size = sizeof(MD5_CTX);
-}
-
-
 static void sha1_init(EVP_MD_CTX *ctx) {
   CHECK(SHA1_Init(ctx->md_data));
 }
@@ -241,39 +216,4 @@
   out->ctx_size = sizeof(SHA512_CTX);
 }
 
-
-typedef struct {
-  MD5_CTX md5;
-  SHA_CTX sha1;
-} MD5_SHA1_CTX;
-
-static void md5_sha1_init(EVP_MD_CTX *md_ctx) {
-  MD5_SHA1_CTX *ctx = md_ctx->md_data;
-  CHECK(MD5_Init(&ctx->md5) && SHA1_Init(&ctx->sha1));
-}
-
-static void md5_sha1_update(EVP_MD_CTX *md_ctx, const void *data,
-                            size_t count) {
-  MD5_SHA1_CTX *ctx = md_ctx->md_data;
-  CHECK(MD5_Update(&ctx->md5, data, count) &&
-        SHA1_Update(&ctx->sha1, data, count));
-}
-
-static void md5_sha1_final(EVP_MD_CTX *md_ctx, uint8_t *out) {
-  MD5_SHA1_CTX *ctx = md_ctx->md_data;
-  CHECK(MD5_Final(out, &ctx->md5) &&
-        SHA1_Final(out + MD5_DIGEST_LENGTH, &ctx->sha1));
-}
-
-DEFINE_METHOD_FUNCTION(EVP_MD, EVP_md5_sha1) {
-  out->type = NID_md5_sha1;
-  out->md_size = MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH;
-  out->flags = 0;
-  out->init = md5_sha1_init;
-  out->update = md5_sha1_update;
-  out->final = md5_sha1_final;
-  out->block_size = 64;
-  out->ctx_size = sizeof(MD5_SHA1_CTX);
-}
-
 #undef CHECK
diff --git a/crypto/fipsmodule/service_indicator/service_indicator.c.inc b/crypto/fipsmodule/service_indicator/service_indicator.c.inc
index dcbf819..80ab82e 100644
--- a/crypto/fipsmodule/service_indicator/service_indicator.c.inc
+++ b/crypto/fipsmodule/service_indicator/service_indicator.c.inc
@@ -300,12 +300,9 @@
 }
 
 void TLSKDF_verify_service_indicator(const EVP_MD *md) {
-  // HMAC-MD5/HMAC-SHA1 (both used concurrently) is approved for use in the KDF
-  // in TLS 1.0/1.1. HMAC-SHA{256, 384, 512} are approved for use in the KDF in
-  // TLS 1.2. These Key Derivation functions are to be used in the context of
-  // the TLS protocol.
+  // HMAC-SHA{256, 384, 512} are approved for use in the KDF in TLS 1.2. These
+  // Key Derivation functions are to be used in the context of the TLS protocol.
   switch (EVP_MD_type(md)) {
-    case NID_md5_sha1:
     case NID_sha256:
     case NID_sha384:
     case NID_sha512:
diff --git a/crypto/fipsmodule/service_indicator/service_indicator_test.cc b/crypto/fipsmodule/service_indicator/service_indicator_test.cc
index 05943d5..b7febf0 100644
--- a/crypto/fipsmodule/service_indicator/service_indicator_test.cc
+++ b/crypto/fipsmodule/service_indicator/service_indicator_test.cc
@@ -1129,15 +1129,20 @@
     {4096, &EVP_md5, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED},
 
     // RSA 1024 is not approved under FIPS 186-5.
-    {1024, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED},
-    {1024, &EVP_sha256, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED},
-    {1024, &EVP_sha512, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED},
+    {1024, &EVP_sha1, false, FIPSStatus::NOT_APPROVED,
+     FIPSStatus::NOT_APPROVED},
+    {1024, &EVP_sha256, false, FIPSStatus::NOT_APPROVED,
+     FIPSStatus::NOT_APPROVED},
+    {1024, &EVP_sha512, false, FIPSStatus::NOT_APPROVED,
+     FIPSStatus::NOT_APPROVED},
     {1024, &EVP_sha1, true, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED},
-    {1024, &EVP_sha256, true, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED},
+    {1024, &EVP_sha256, true, FIPSStatus::NOT_APPROVED,
+     FIPSStatus::NOT_APPROVED},
     // PSS with hashLen == saltLen is not possible for 1024-bit modulus and
     // SHA-512.
 
-    {2048, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED},
+    {2048, &EVP_sha1, false, FIPSStatus::NOT_APPROVED,
+     FIPSStatus::NOT_APPROVED},
     {2048, &EVP_sha224, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
     {2048, &EVP_sha256, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
     {2048, &EVP_sha384, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
@@ -1148,7 +1153,8 @@
     {2048, &EVP_sha384, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
     {2048, &EVP_sha512, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
 
-    {3072, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED},
+    {3072, &EVP_sha1, false, FIPSStatus::NOT_APPROVED,
+     FIPSStatus::NOT_APPROVED},
     {3072, &EVP_sha224, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
     {3072, &EVP_sha256, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
     {3072, &EVP_sha384, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
@@ -1159,7 +1165,8 @@
     {3072, &EVP_sha384, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
     {3072, &EVP_sha512, true, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
 
-    {4096, &EVP_sha1, false, FIPSStatus::NOT_APPROVED, FIPSStatus::NOT_APPROVED},
+    {4096, &EVP_sha1, false, FIPSStatus::NOT_APPROVED,
+     FIPSStatus::NOT_APPROVED},
     {4096, &EVP_sha224, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
     {4096, &EVP_sha256, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
     {4096, &EVP_sha384, false, FIPSStatus::APPROVED, FIPSStatus::APPROVED},
@@ -1773,7 +1780,8 @@
   const uint8_t *expected_output;
   const FIPSStatus expect_approved;
 } kKDFTestVectors[] = {
-    {EVP_md5_sha1, kTLSOutput_md5_sha1, FIPSStatus::APPROVED},
+    // TLS 1.0 and 1.1 are no longer an approved part of fips
+    {EVP_md5_sha1, kTLSOutput_md5_sha1, FIPSStatus::NOT_APPROVED},
     {EVP_sha224, kTLSOutput_sha224, FIPSStatus::NOT_APPROVED},
     {EVP_sha256, kTLSOutput_sha256, FIPSStatus::APPROVED},
     {EVP_sha384, kTLSOutput_sha384, FIPSStatus::APPROVED},
@@ -1900,8 +1908,8 @@
 
   std::vector<uint8_t> digest;
 
-  // MD4 is no longer of FIPS - this is retained for now to mimic previous
-  // behavior.
+  // MD4 is no longer part of FIPS - this is retained for now to ensure that
+  // MD4 continues to report itself as not approved.
   digest.resize(MD4_DIGEST_LENGTH);
   MD4_CTX md4_ctx;
   ASSERT_TRUE(CALL_SERVICE_AND_CHECK_APPROVED(approved, MD4_Init(&md4_ctx)));
@@ -1914,6 +1922,8 @@
   EXPECT_EQ(Bytes(kOutput_md4), Bytes(digest));
   EXPECT_EQ(approved, FIPSStatus::NOT_APPROVED);
 
+  // MD5 is no longer part of FIPS - this is retained for now to ensure that
+  // MD5 continues to report itself as not approved.
   digest.resize(MD5_DIGEST_LENGTH);
   MD5_CTX md5_ctx;
   ASSERT_TRUE(CALL_SERVICE_AND_CHECK_APPROVED(approved, MD5_Init(&md5_ctx)));
diff --git a/crypto/fipsmodule/md5/asm/md5-586.pl b/crypto/md5/asm/md5-586.pl
similarity index 99%
rename from crypto/fipsmodule/md5/asm/md5-586.pl
rename to crypto/md5/asm/md5-586.pl
index ec34aad..f849b46 100644
--- a/crypto/fipsmodule/md5/asm/md5-586.pl
+++ b/crypto/md5/asm/md5-586.pl
@@ -8,7 +8,7 @@
 $normal=0;
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../../perlasm");
+push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";
 
 $output=pop;
diff --git a/crypto/fipsmodule/md5/asm/md5-x86_64.pl b/crypto/md5/asm/md5-x86_64.pl
similarity index 99%
rename from crypto/fipsmodule/md5/asm/md5-x86_64.pl
rename to crypto/md5/asm/md5-x86_64.pl
index 655ea09..eee50a3 100644
--- a/crypto/fipsmodule/md5/asm/md5-x86_64.pl
+++ b/crypto/md5/asm/md5-x86_64.pl
@@ -117,7 +117,7 @@
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate;
 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";
 
 open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
diff --git a/crypto/fipsmodule/md5/internal.h b/crypto/md5/internal.h
similarity index 100%
rename from crypto/fipsmodule/md5/internal.h
rename to crypto/md5/internal.h
diff --git a/crypto/fipsmodule/md5/md5.c.inc b/crypto/md5/md5.c
similarity index 98%
rename from crypto/fipsmodule/md5/md5.c.inc
rename to crypto/md5/md5.c
index 1691526..7eca609 100644
--- a/crypto/fipsmodule/md5/md5.c.inc
+++ b/crypto/md5/md5.c
@@ -60,8 +60,8 @@
 
 #include <openssl/mem.h>
 
-#include "../../internal.h"
-#include "../digest/md32_common.h"
+#include "../internal.h"
+#include "../fipsmodule/digest/md32_common.h"
 #include "internal.h"
 
 
diff --git a/crypto/fipsmodule/md5/md5_test.cc b/crypto/md5/md5_test.cc
similarity index 97%
rename from crypto/fipsmodule/md5/md5_test.cc
rename to crypto/md5/md5_test.cc
index 7df5bb2..022b1ea 100644
--- a/crypto/fipsmodule/md5/md5_test.cc
+++ b/crypto/md5/md5_test.cc
@@ -17,7 +17,7 @@
 #include <gtest/gtest.h>
 
 #include "internal.h"
-#include "../../test/abi_test.h"
+#include "../test/abi_test.h"
 
 
 #if defined(MD5_ASM) && defined(SUPPORTS_ABI_TEST)
diff --git a/gen/crypto/md5-586-apple.S b/gen/crypto/md5-586-apple.S
new file mode 100644
index 0000000..986d590
--- /dev/null
+++ b/gen/crypto/md5-586-apple.S
@@ -0,0 +1,684 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#include <openssl/asm_base.h>
+
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
+.text
+.globl	_md5_block_asm_data_order
+.private_extern	_md5_block_asm_data_order
+.align	4
+_md5_block_asm_data_order:
+L_md5_block_asm_data_order_begin:
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%edi
+	movl	16(%esp),%esi
+	movl	20(%esp),%ecx
+	pushl	%ebp
+	shll	$6,%ecx
+	pushl	%ebx
+	addl	%esi,%ecx
+	subl	$64,%ecx
+	movl	(%edi),%eax
+	pushl	%ecx
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+L000start:
+
+	# R0 section 
+	movl	%ecx,%edi
+	movl	(%esi),%ebp
+	# R0 0 
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	3614090360(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	4(%esi),%ebp
+	addl	%ebx,%eax
+	# R0 1 
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	3905402710(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	8(%esi),%ebp
+	addl	%eax,%edx
+	# R0 2 
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	606105819(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	12(%esi),%ebp
+	addl	%edx,%ecx
+	# R0 3 
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	3250441966(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	16(%esi),%ebp
+	addl	%ecx,%ebx
+	# R0 4 
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	4118548399(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	20(%esi),%ebp
+	addl	%ebx,%eax
+	# R0 5 
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	1200080426(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	24(%esi),%ebp
+	addl	%eax,%edx
+	# R0 6 
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	2821735955(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	28(%esi),%ebp
+	addl	%edx,%ecx
+	# R0 7 
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	4249261313(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	32(%esi),%ebp
+	addl	%ecx,%ebx
+	# R0 8 
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	1770035416(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	36(%esi),%ebp
+	addl	%ebx,%eax
+	# R0 9 
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	2336552879(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	40(%esi),%ebp
+	addl	%eax,%edx
+	# R0 10 
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	4294925233(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	44(%esi),%ebp
+	addl	%edx,%ecx
+	# R0 11 
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	2304563134(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	48(%esi),%ebp
+	addl	%ecx,%ebx
+	# R0 12 
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	1804603682(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	52(%esi),%ebp
+	addl	%ebx,%eax
+	# R0 13 
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	4254626195(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	56(%esi),%ebp
+	addl	%eax,%edx
+	# R0 14 
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	2792965006(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	60(%esi),%ebp
+	addl	%edx,%ecx
+	# R0 15 
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	1236535329(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	4(%esi),%ebp
+	addl	%ecx,%ebx
+
+	# R1 section 
+	# R1 16 
+	leal	4129170786(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	24(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+	# R1 17 
+	leal	3225465664(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	44(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+	# R1 18 
+	leal	643717713(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	# R1 19 
+	leal	3921069994(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	20(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	# R1 20 
+	leal	3593408605(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	40(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+	# R1 21 
+	leal	38016083(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	60(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+	# R1 22 
+	leal	3634488961(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	16(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	# R1 23 
+	leal	3889429448(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	36(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	# R1 24 
+	leal	568446438(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	56(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+	# R1 25 
+	leal	3275163606(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	12(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+	# R1 26 
+	leal	4107603335(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	32(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	# R1 27 
+	leal	1163531501(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	52(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	# R1 28 
+	leal	2850285829(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	8(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+	# R1 29 
+	leal	4243563512(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	28(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+	# R1 30 
+	leal	1735328473(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	48(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	# R1 31 
+	leal	2368359562(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	20(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	# R2 section 
+	# R2 32 
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	4294588738(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	32(%esi),%ebp
+	movl	%ebx,%edi
+	# R2 33 
+	leal	2272392833(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	44(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+	# R2 34 
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	1839030562(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	56(%esi),%ebp
+	movl	%edx,%edi
+	# R2 35 
+	leal	4259657740(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	4(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+	# R2 36 
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	2763975236(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	16(%esi),%ebp
+	movl	%ebx,%edi
+	# R2 37 
+	leal	1272893353(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	28(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+	# R2 38 
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	4139469664(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	40(%esi),%ebp
+	movl	%edx,%edi
+	# R2 39 
+	leal	3200236656(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	52(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+	# R2 40 
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	681279174(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	(%esi),%ebp
+	movl	%ebx,%edi
+	# R2 41 
+	leal	3936430074(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	12(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+	# R2 42 
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	3572445317(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	24(%esi),%ebp
+	movl	%edx,%edi
+	# R2 43 
+	leal	76029189(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	36(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+	# R2 44 
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	3654602809(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	48(%esi),%ebp
+	movl	%ebx,%edi
+	# R2 45 
+	leal	3873151461(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	60(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+	# R2 46 
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	530742520(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	8(%esi),%ebp
+	movl	%edx,%edi
+	# R2 47 
+	leal	3299628645(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	# R3 section 
+	# R3 48 
+	xorl	%edx,%edi
+	orl	%ebx,%edi
+	leal	4096336452(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	28(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+	# R3 49 
+	orl	%eax,%edi
+	leal	1126891415(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	56(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+	# R3 50 
+	orl	%edx,%edi
+	leal	2878612391(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	20(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	# R3 51 
+	orl	%ecx,%edi
+	leal	4237533241(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	48(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+	# R3 52 
+	orl	%ebx,%edi
+	leal	1700485571(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	12(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+	# R3 53 
+	orl	%eax,%edi
+	leal	2399980690(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	40(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+	# R3 54 
+	orl	%edx,%edi
+	leal	4293915773(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	4(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	# R3 55 
+	orl	%ecx,%edi
+	leal	2240044497(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	32(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+	# R3 56 
+	orl	%ebx,%edi
+	leal	1873313359(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	60(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+	# R3 57 
+	orl	%eax,%edi
+	leal	4264355552(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	24(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+	# R3 58 
+	orl	%edx,%edi
+	leal	2734768916(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	52(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	# R3 59 
+	orl	%ecx,%edi
+	leal	1309151649(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	16(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+	# R3 60 
+	orl	%ebx,%edi
+	leal	4149444226(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	44(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+	# R3 61 
+	orl	%eax,%edi
+	leal	3174756917(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	8(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+	# R3 62 
+	orl	%edx,%edi
+	leal	718787259(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	36(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	# R3 63 
+	orl	%ecx,%edi
+	leal	3951481745(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	24(%esp),%ebp
+	addl	%edi,%ebx
+	addl	$64,%esi
+	roll	$21,%ebx
+	movl	(%ebp),%edi
+	addl	%ecx,%ebx
+	addl	%edi,%eax
+	movl	4(%ebp),%edi
+	addl	%edi,%ebx
+	movl	8(%ebp),%edi
+	addl	%edi,%ecx
+	movl	12(%ebp),%edi
+	addl	%edi,%edx
+	movl	%eax,(%ebp)
+	movl	%ebx,4(%ebp)
+	movl	(%esp),%edi
+	movl	%ecx,8(%ebp)
+	movl	%edx,12(%ebp)
+	cmpl	%esi,%edi
+	jae	L000start
+	popl	%eax
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+#endif  // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
diff --git a/gen/crypto/md5-586-linux.S b/gen/crypto/md5-586-linux.S
new file mode 100644
index 0000000..a297f2b
--- /dev/null
+++ b/gen/crypto/md5-586-linux.S
@@ -0,0 +1,686 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#include <openssl/asm_base.h>
+
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
+.text
+.globl	md5_block_asm_data_order
+.hidden	md5_block_asm_data_order
+.type	md5_block_asm_data_order,@function
+.align	16
+md5_block_asm_data_order:
+.L_md5_block_asm_data_order_begin:
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%edi
+	movl	16(%esp),%esi
+	movl	20(%esp),%ecx
+	pushl	%ebp
+	shll	$6,%ecx
+	pushl	%ebx
+	addl	%esi,%ecx
+	subl	$64,%ecx
+	movl	(%edi),%eax
+	pushl	%ecx
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+.L000start:
+
+
+	movl	%ecx,%edi
+	movl	(%esi),%ebp
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	3614090360(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	4(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	3905402710(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	8(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	606105819(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	12(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	3250441966(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	16(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	4118548399(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	20(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	1200080426(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	24(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	2821735955(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	28(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	4249261313(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	32(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	1770035416(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	36(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	2336552879(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	40(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	4294925233(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	44(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	2304563134(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	48(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	1804603682(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	52(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	4254626195(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	56(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	2792965006(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	60(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	1236535329(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	4(%esi),%ebp
+	addl	%ecx,%ebx
+
+
+
+	leal	4129170786(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	24(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	3225465664(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	44(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	643717713(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	3921069994(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	20(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	3593408605(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	40(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	38016083(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	60(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	3634488961(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	16(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	3889429448(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	36(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	568446438(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	56(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	3275163606(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	12(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	4107603335(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	32(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	1163531501(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	52(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	2850285829(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	8(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	4243563512(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	28(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	1735328473(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	48(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	2368359562(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	20(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	4294588738(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	32(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	2272392833(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	44(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	1839030562(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	56(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	4259657740(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	4(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	2763975236(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	16(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	1272893353(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	28(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	4139469664(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	40(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	3200236656(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	52(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	681279174(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	3936430074(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	12(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	3572445317(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	24(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	76029189(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	36(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	3654602809(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	48(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	3873151461(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	60(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	530742520(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	8(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	3299628645(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+
+
+	xorl	%edx,%edi
+	orl	%ebx,%edi
+	leal	4096336452(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	28(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	1126891415(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	56(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	2878612391(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	20(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	4237533241(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	48(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	1700485571(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	12(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	2399980690(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	40(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	4293915773(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	4(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	2240044497(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	32(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	1873313359(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	60(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	4264355552(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	24(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	2734768916(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	52(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	1309151649(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	16(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	4149444226(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	44(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	3174756917(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	8(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	718787259(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	36(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	3951481745(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	24(%esp),%ebp
+	addl	%edi,%ebx
+	addl	$64,%esi
+	roll	$21,%ebx
+	movl	(%ebp),%edi
+	addl	%ecx,%ebx
+	addl	%edi,%eax
+	movl	4(%ebp),%edi
+	addl	%edi,%ebx
+	movl	8(%ebp),%edi
+	addl	%edi,%ecx
+	movl	12(%ebp),%edi
+	addl	%edi,%edx
+	movl	%eax,(%ebp)
+	movl	%ebx,4(%ebp)
+	movl	(%esp),%edi
+	movl	%ecx,8(%ebp)
+	movl	%edx,12(%ebp)
+	cmpl	%esi,%edi
+	jae	.L000start
+	popl	%eax
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
+#endif  // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
diff --git a/gen/crypto/md5-586-win.asm b/gen/crypto/md5-586-win.asm
new file mode 100644
index 0000000..25592b8
--- /dev/null
+++ b/gen/crypto/md5-586-win.asm
@@ -0,0 +1,694 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__, win32
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+global	_md5_block_asm_data_order
+align	16
+_md5_block_asm_data_order:
+L$_md5_block_asm_data_order_begin:
+	push	esi
+	push	edi
+	mov	edi,DWORD [12+esp]
+	mov	esi,DWORD [16+esp]
+	mov	ecx,DWORD [20+esp]
+	push	ebp
+	shl	ecx,6
+	push	ebx
+	add	ecx,esi
+	sub	ecx,64
+	mov	eax,DWORD [edi]
+	push	ecx
+	mov	ebx,DWORD [4+edi]
+	mov	ecx,DWORD [8+edi]
+	mov	edx,DWORD [12+edi]
+L$000start:
+	; 
+	; R0 section
+	mov	edi,ecx
+	mov	ebp,DWORD [esi]
+	; R0 0
+	xor	edi,edx
+	and	edi,ebx
+	lea	eax,[3614090360+ebp*1+eax]
+	xor	edi,edx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,7
+	mov	ebp,DWORD [4+esi]
+	add	eax,ebx
+	; R0 1
+	xor	edi,ecx
+	and	edi,eax
+	lea	edx,[3905402710+ebp*1+edx]
+	xor	edi,ecx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,12
+	mov	ebp,DWORD [8+esi]
+	add	edx,eax
+	; R0 2
+	xor	edi,ebx
+	and	edi,edx
+	lea	ecx,[606105819+ebp*1+ecx]
+	xor	edi,ebx
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,17
+	mov	ebp,DWORD [12+esi]
+	add	ecx,edx
+	; R0 3
+	xor	edi,eax
+	and	edi,ecx
+	lea	ebx,[3250441966+ebp*1+ebx]
+	xor	edi,eax
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,22
+	mov	ebp,DWORD [16+esi]
+	add	ebx,ecx
+	; R0 4
+	xor	edi,edx
+	and	edi,ebx
+	lea	eax,[4118548399+ebp*1+eax]
+	xor	edi,edx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,7
+	mov	ebp,DWORD [20+esi]
+	add	eax,ebx
+	; R0 5
+	xor	edi,ecx
+	and	edi,eax
+	lea	edx,[1200080426+ebp*1+edx]
+	xor	edi,ecx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,12
+	mov	ebp,DWORD [24+esi]
+	add	edx,eax
+	; R0 6
+	xor	edi,ebx
+	and	edi,edx
+	lea	ecx,[2821735955+ebp*1+ecx]
+	xor	edi,ebx
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,17
+	mov	ebp,DWORD [28+esi]
+	add	ecx,edx
+	; R0 7
+	xor	edi,eax
+	and	edi,ecx
+	lea	ebx,[4249261313+ebp*1+ebx]
+	xor	edi,eax
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,22
+	mov	ebp,DWORD [32+esi]
+	add	ebx,ecx
+	; R0 8
+	xor	edi,edx
+	and	edi,ebx
+	lea	eax,[1770035416+ebp*1+eax]
+	xor	edi,edx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,7
+	mov	ebp,DWORD [36+esi]
+	add	eax,ebx
+	; R0 9
+	xor	edi,ecx
+	and	edi,eax
+	lea	edx,[2336552879+ebp*1+edx]
+	xor	edi,ecx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,12
+	mov	ebp,DWORD [40+esi]
+	add	edx,eax
+	; R0 10
+	xor	edi,ebx
+	and	edi,edx
+	lea	ecx,[4294925233+ebp*1+ecx]
+	xor	edi,ebx
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,17
+	mov	ebp,DWORD [44+esi]
+	add	ecx,edx
+	; R0 11
+	xor	edi,eax
+	and	edi,ecx
+	lea	ebx,[2304563134+ebp*1+ebx]
+	xor	edi,eax
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,22
+	mov	ebp,DWORD [48+esi]
+	add	ebx,ecx
+	; R0 12
+	xor	edi,edx
+	and	edi,ebx
+	lea	eax,[1804603682+ebp*1+eax]
+	xor	edi,edx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,7
+	mov	ebp,DWORD [52+esi]
+	add	eax,ebx
+	; R0 13
+	xor	edi,ecx
+	and	edi,eax
+	lea	edx,[4254626195+ebp*1+edx]
+	xor	edi,ecx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,12
+	mov	ebp,DWORD [56+esi]
+	add	edx,eax
+	; R0 14
+	xor	edi,ebx
+	and	edi,edx
+	lea	ecx,[2792965006+ebp*1+ecx]
+	xor	edi,ebx
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,17
+	mov	ebp,DWORD [60+esi]
+	add	ecx,edx
+	; R0 15
+	xor	edi,eax
+	and	edi,ecx
+	lea	ebx,[1236535329+ebp*1+ebx]
+	xor	edi,eax
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,22
+	mov	ebp,DWORD [4+esi]
+	add	ebx,ecx
+	; 
+	; R1 section
+	; R1 16
+	lea	eax,[4129170786+ebp*1+eax]
+	xor	edi,ebx
+	and	edi,edx
+	mov	ebp,DWORD [24+esi]
+	xor	edi,ecx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,5
+	add	eax,ebx
+	; R1 17
+	lea	edx,[3225465664+ebp*1+edx]
+	xor	edi,eax
+	and	edi,ecx
+	mov	ebp,DWORD [44+esi]
+	xor	edi,ebx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,9
+	add	edx,eax
+	; R1 18
+	lea	ecx,[643717713+ebp*1+ecx]
+	xor	edi,edx
+	and	edi,ebx
+	mov	ebp,DWORD [esi]
+	xor	edi,eax
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,14
+	add	ecx,edx
+	; R1 19
+	lea	ebx,[3921069994+ebp*1+ebx]
+	xor	edi,ecx
+	and	edi,eax
+	mov	ebp,DWORD [20+esi]
+	xor	edi,edx
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,20
+	add	ebx,ecx
+	; R1 20
+	lea	eax,[3593408605+ebp*1+eax]
+	xor	edi,ebx
+	and	edi,edx
+	mov	ebp,DWORD [40+esi]
+	xor	edi,ecx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,5
+	add	eax,ebx
+	; R1 21
+	lea	edx,[38016083+ebp*1+edx]
+	xor	edi,eax
+	and	edi,ecx
+	mov	ebp,DWORD [60+esi]
+	xor	edi,ebx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,9
+	add	edx,eax
+	; R1 22
+	lea	ecx,[3634488961+ebp*1+ecx]
+	xor	edi,edx
+	and	edi,ebx
+	mov	ebp,DWORD [16+esi]
+	xor	edi,eax
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,14
+	add	ecx,edx
+	; R1 23
+	lea	ebx,[3889429448+ebp*1+ebx]
+	xor	edi,ecx
+	and	edi,eax
+	mov	ebp,DWORD [36+esi]
+	xor	edi,edx
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,20
+	add	ebx,ecx
+	; R1 24
+	lea	eax,[568446438+ebp*1+eax]
+	xor	edi,ebx
+	and	edi,edx
+	mov	ebp,DWORD [56+esi]
+	xor	edi,ecx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,5
+	add	eax,ebx
+	; R1 25
+	lea	edx,[3275163606+ebp*1+edx]
+	xor	edi,eax
+	and	edi,ecx
+	mov	ebp,DWORD [12+esi]
+	xor	edi,ebx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,9
+	add	edx,eax
+	; R1 26
+	lea	ecx,[4107603335+ebp*1+ecx]
+	xor	edi,edx
+	and	edi,ebx
+	mov	ebp,DWORD [32+esi]
+	xor	edi,eax
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,14
+	add	ecx,edx
+	; R1 27
+	lea	ebx,[1163531501+ebp*1+ebx]
+	xor	edi,ecx
+	and	edi,eax
+	mov	ebp,DWORD [52+esi]
+	xor	edi,edx
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,20
+	add	ebx,ecx
+	; R1 28
+	lea	eax,[2850285829+ebp*1+eax]
+	xor	edi,ebx
+	and	edi,edx
+	mov	ebp,DWORD [8+esi]
+	xor	edi,ecx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,5
+	add	eax,ebx
+	; R1 29
+	lea	edx,[4243563512+ebp*1+edx]
+	xor	edi,eax
+	and	edi,ecx
+	mov	ebp,DWORD [28+esi]
+	xor	edi,ebx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,9
+	add	edx,eax
+	; R1 30
+	lea	ecx,[1735328473+ebp*1+ecx]
+	xor	edi,edx
+	and	edi,ebx
+	mov	ebp,DWORD [48+esi]
+	xor	edi,eax
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,14
+	add	ecx,edx
+	; R1 31
+	lea	ebx,[2368359562+ebp*1+ebx]
+	xor	edi,ecx
+	and	edi,eax
+	mov	ebp,DWORD [20+esi]
+	xor	edi,edx
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,20
+	add	ebx,ecx
+	; 
+	; R2 section
+	; R2 32
+	xor	edi,edx
+	xor	edi,ebx
+	lea	eax,[4294588738+ebp*1+eax]
+	add	eax,edi
+	rol	eax,4
+	mov	ebp,DWORD [32+esi]
+	mov	edi,ebx
+	; R2 33
+	lea	edx,[2272392833+ebp*1+edx]
+	add	eax,ebx
+	xor	edi,ecx
+	xor	edi,eax
+	mov	ebp,DWORD [44+esi]
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,11
+	add	edx,eax
+	; R2 34
+	xor	edi,ebx
+	xor	edi,edx
+	lea	ecx,[1839030562+ebp*1+ecx]
+	add	ecx,edi
+	rol	ecx,16
+	mov	ebp,DWORD [56+esi]
+	mov	edi,edx
+	; R2 35
+	lea	ebx,[4259657740+ebp*1+ebx]
+	add	ecx,edx
+	xor	edi,eax
+	xor	edi,ecx
+	mov	ebp,DWORD [4+esi]
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,23
+	add	ebx,ecx
+	; R2 36
+	xor	edi,edx
+	xor	edi,ebx
+	lea	eax,[2763975236+ebp*1+eax]
+	add	eax,edi
+	rol	eax,4
+	mov	ebp,DWORD [16+esi]
+	mov	edi,ebx
+	; R2 37
+	lea	edx,[1272893353+ebp*1+edx]
+	add	eax,ebx
+	xor	edi,ecx
+	xor	edi,eax
+	mov	ebp,DWORD [28+esi]
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,11
+	add	edx,eax
+	; R2 38
+	xor	edi,ebx
+	xor	edi,edx
+	lea	ecx,[4139469664+ebp*1+ecx]
+	add	ecx,edi
+	rol	ecx,16
+	mov	ebp,DWORD [40+esi]
+	mov	edi,edx
+	; R2 39
+	lea	ebx,[3200236656+ebp*1+ebx]
+	add	ecx,edx
+	xor	edi,eax
+	xor	edi,ecx
+	mov	ebp,DWORD [52+esi]
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,23
+	add	ebx,ecx
+	; R2 40
+	xor	edi,edx
+	xor	edi,ebx
+	lea	eax,[681279174+ebp*1+eax]
+	add	eax,edi
+	rol	eax,4
+	mov	ebp,DWORD [esi]
+	mov	edi,ebx
+	; R2 41
+	lea	edx,[3936430074+ebp*1+edx]
+	add	eax,ebx
+	xor	edi,ecx
+	xor	edi,eax
+	mov	ebp,DWORD [12+esi]
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,11
+	add	edx,eax
+	; R2 42
+	xor	edi,ebx
+	xor	edi,edx
+	lea	ecx,[3572445317+ebp*1+ecx]
+	add	ecx,edi
+	rol	ecx,16
+	mov	ebp,DWORD [24+esi]
+	mov	edi,edx
+	; R2 43
+	lea	ebx,[76029189+ebp*1+ebx]
+	add	ecx,edx
+	xor	edi,eax
+	xor	edi,ecx
+	mov	ebp,DWORD [36+esi]
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,23
+	add	ebx,ecx
+	; R2 44
+	xor	edi,edx
+	xor	edi,ebx
+	lea	eax,[3654602809+ebp*1+eax]
+	add	eax,edi
+	rol	eax,4
+	mov	ebp,DWORD [48+esi]
+	mov	edi,ebx
+	; R2 45
+	lea	edx,[3873151461+ebp*1+edx]
+	add	eax,ebx
+	xor	edi,ecx
+	xor	edi,eax
+	mov	ebp,DWORD [60+esi]
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,11
+	add	edx,eax
+	; R2 46
+	xor	edi,ebx
+	xor	edi,edx
+	lea	ecx,[530742520+ebp*1+ecx]
+	add	ecx,edi
+	rol	ecx,16
+	mov	ebp,DWORD [8+esi]
+	mov	edi,edx
+	; R2 47
+	lea	ebx,[3299628645+ebp*1+ebx]
+	add	ecx,edx
+	xor	edi,eax
+	xor	edi,ecx
+	mov	ebp,DWORD [esi]
+	add	ebx,edi
+	mov	edi,-1
+	rol	ebx,23
+	add	ebx,ecx
+	; 
+	; R3 section
+	; R3 48
+	xor	edi,edx
+	or	edi,ebx
+	lea	eax,[4096336452+ebp*1+eax]
+	xor	edi,ecx
+	mov	ebp,DWORD [28+esi]
+	add	eax,edi
+	mov	edi,-1
+	rol	eax,6
+	xor	edi,ecx
+	add	eax,ebx
+	; R3 49
+	or	edi,eax
+	lea	edx,[1126891415+ebp*1+edx]
+	xor	edi,ebx
+	mov	ebp,DWORD [56+esi]
+	add	edx,edi
+	mov	edi,-1
+	rol	edx,10
+	xor	edi,ebx
+	add	edx,eax
+	; R3 50
+	or	edi,edx
+	lea	ecx,[2878612391+ebp*1+ecx]
+	xor	edi,eax
+	mov	ebp,DWORD [20+esi]
+	add	ecx,edi
+	mov	edi,-1
+	rol	ecx,15
+	xor	edi,eax
+	add	ecx,edx
+	; R3 51
+	or	edi,ecx
+	lea	ebx,[4237533241+ebp*1+ebx]
+	xor	edi,edx
+	mov	ebp,DWORD [48+esi]
+	add	ebx,edi
+	mov	edi,-1
+	rol	ebx,21
+	xor	edi,edx
+	add	ebx,ecx
+	; R3 52
+	or	edi,ebx
+	lea	eax,[1700485571+ebp*1+eax]
+	xor	edi,ecx
+	mov	ebp,DWORD [12+esi]
+	add	eax,edi
+	mov	edi,-1
+	rol	eax,6
+	xor	edi,ecx
+	add	eax,ebx
+	; R3 53
+	or	edi,eax
+	lea	edx,[2399980690+ebp*1+edx]
+	xor	edi,ebx
+	mov	ebp,DWORD [40+esi]
+	add	edx,edi
+	mov	edi,-1
+	rol	edx,10
+	xor	edi,ebx
+	add	edx,eax
+	; R3 54
+	or	edi,edx
+	lea	ecx,[4293915773+ebp*1+ecx]
+	xor	edi,eax
+	mov	ebp,DWORD [4+esi]
+	add	ecx,edi
+	mov	edi,-1
+	rol	ecx,15
+	xor	edi,eax
+	add	ecx,edx
+	; R3 55
+	or	edi,ecx
+	lea	ebx,[2240044497+ebp*1+ebx]
+	xor	edi,edx
+	mov	ebp,DWORD [32+esi]
+	add	ebx,edi
+	mov	edi,-1
+	rol	ebx,21
+	xor	edi,edx
+	add	ebx,ecx
+	; R3 56
+	or	edi,ebx
+	lea	eax,[1873313359+ebp*1+eax]
+	xor	edi,ecx
+	mov	ebp,DWORD [60+esi]
+	add	eax,edi
+	mov	edi,-1
+	rol	eax,6
+	xor	edi,ecx
+	add	eax,ebx
+	; R3 57
+	or	edi,eax
+	lea	edx,[4264355552+ebp*1+edx]
+	xor	edi,ebx
+	mov	ebp,DWORD [24+esi]
+	add	edx,edi
+	mov	edi,-1
+	rol	edx,10
+	xor	edi,ebx
+	add	edx,eax
+	; R3 58
+	or	edi,edx
+	lea	ecx,[2734768916+ebp*1+ecx]
+	xor	edi,eax
+	mov	ebp,DWORD [52+esi]
+	add	ecx,edi
+	mov	edi,-1
+	rol	ecx,15
+	xor	edi,eax
+	add	ecx,edx
+	; R3 59
+	or	edi,ecx
+	lea	ebx,[1309151649+ebp*1+ebx]
+	xor	edi,edx
+	mov	ebp,DWORD [16+esi]
+	add	ebx,edi
+	mov	edi,-1
+	rol	ebx,21
+	xor	edi,edx
+	add	ebx,ecx
+	; R3 60
+	or	edi,ebx
+	lea	eax,[4149444226+ebp*1+eax]
+	xor	edi,ecx
+	mov	ebp,DWORD [44+esi]
+	add	eax,edi
+	mov	edi,-1
+	rol	eax,6
+	xor	edi,ecx
+	add	eax,ebx
+	; R3 61
+	or	edi,eax
+	lea	edx,[3174756917+ebp*1+edx]
+	xor	edi,ebx
+	mov	ebp,DWORD [8+esi]
+	add	edx,edi
+	mov	edi,-1
+	rol	edx,10
+	xor	edi,ebx
+	add	edx,eax
+	; R3 62
+	or	edi,edx
+	lea	ecx,[718787259+ebp*1+ecx]
+	xor	edi,eax
+	mov	ebp,DWORD [36+esi]
+	add	ecx,edi
+	mov	edi,-1
+	rol	ecx,15
+	xor	edi,eax
+	add	ecx,edx
+	; R3 63
+	or	edi,ecx
+	lea	ebx,[3951481745+ebp*1+ebx]
+	xor	edi,edx
+	mov	ebp,DWORD [24+esp]
+	add	ebx,edi
+	add	esi,64
+	rol	ebx,21
+	mov	edi,DWORD [ebp]
+	add	ebx,ecx
+	add	eax,edi
+	mov	edi,DWORD [4+ebp]
+	add	ebx,edi
+	mov	edi,DWORD [8+ebp]
+	add	ecx,edi
+	mov	edi,DWORD [12+ebp]
+	add	edx,edi
+	mov	DWORD [ebp],eax
+	mov	DWORD [4+ebp],ebx
+	mov	edi,DWORD [esp]
+	mov	DWORD [8+ebp],ecx
+	mov	DWORD [12+ebp],edx
+	cmp	edi,esi
+	jae	NEAR L$000start
+	pop	eax
+	pop	ebx
+	pop	ebp
+	pop	edi
+	pop	esi
+	ret
+%else
+; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
+ret
+%endif
diff --git a/gen/crypto/md5-x86_64-apple.S b/gen/crypto/md5-x86_64-apple.S
new file mode 100644
index 0000000..e4c0241
--- /dev/null
+++ b/gen/crypto/md5-x86_64-apple.S
@@ -0,0 +1,690 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#include <openssl/asm_base.h>
+
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
+.text	
+.p2align	4
+
+.globl	_md5_block_asm_data_order
+.private_extern _md5_block_asm_data_order
+
+_md5_block_asm_data_order:
+
+_CET_ENDBR
+	pushq	%rbp
+
+	pushq	%rbx
+
+	pushq	%r12
+
+	pushq	%r14
+
+	pushq	%r15
+
+L$prologue:
+
+
+
+
+	movq	%rdi,%rbp
+	shlq	$6,%rdx
+	leaq	(%rsi,%rdx,1),%rdi
+	movl	0(%rbp),%eax
+	movl	4(%rbp),%ebx
+	movl	8(%rbp),%ecx
+	movl	12(%rbp),%edx
+
+
+
+
+
+
+
+	cmpq	%rdi,%rsi
+	je	L$end
+
+
+L$loop:
+	movl	%eax,%r8d
+	movl	%ebx,%r9d
+	movl	%ecx,%r14d
+	movl	%edx,%r15d
+	movl	0(%rsi),%r10d
+	movl	%edx,%r11d
+	xorl	%ecx,%r11d
+	leal	-680876936(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	4(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-389564586(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	606105819(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1044525330(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	-176418897(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	1200080426(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1473231341(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-45705983(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1770035416(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-1958414417(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-42063(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1990404162(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1804603682(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-40341101(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1502002290(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	1236535329(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	movl	4(%rsi),%r10d
+	movl	%edx,%r11d
+	movl	%edx,%r12d
+	notl	%r11d
+	leal	-165796510(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1069501632(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	643717713(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-373897302(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-701558691(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	38016083(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-660478335(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-405537848(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	568446438(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1019803690(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-187363961(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	1163531501(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-1444681467(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-51403784(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	1735328473(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-1926607734(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	movl	20(%rsi),%r10d
+	movl	%ecx,%r11d
+	leal	-378558(%rax,%r10,1),%eax
+	movl	32(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-2022574463(%rdx,%r10,1),%edx
+	movl	44(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	1839030562(%rcx,%r10,1),%ecx
+	movl	56(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-35309556(%rbx,%r10,1),%ebx
+	movl	4(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-1530992060(%rax,%r10,1),%eax
+	movl	16(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	1272893353(%rdx,%r10,1),%edx
+	movl	28(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-155497632(%rcx,%r10,1),%ecx
+	movl	40(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-1094730640(%rbx,%r10,1),%ebx
+	movl	52(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	681279174(%rax,%r10,1),%eax
+	movl	0(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-358537222(%rdx,%r10,1),%edx
+	movl	12(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-722521979(%rcx,%r10,1),%ecx
+	movl	24(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	76029189(%rbx,%r10,1),%ebx
+	movl	36(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-640364487(%rax,%r10,1),%eax
+	movl	48(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-421815835(%rdx,%r10,1),%edx
+	movl	60(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	530742520(%rcx,%r10,1),%ecx
+	movl	8(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-995338651(%rbx,%r10,1),%ebx
+	movl	0(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	xorl	%edx,%r11d
+	leal	-198630844(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	28(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	1126891415(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	56(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1416354905(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	20(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-57434055(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	48(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1700485571(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	12(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1894986606(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	40(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1051523(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	4(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-2054922799(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	32(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1873313359(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	60(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-30611744(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	24(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1560198380(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	52(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	1309151649(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	16(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	-145523070(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	44(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1120210379(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	8(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	718787259(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	36(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-343485551(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+
+	addl	%r8d,%eax
+	addl	%r9d,%ebx
+	addl	%r14d,%ecx
+	addl	%r15d,%edx
+
+
+	addq	$64,%rsi
+	cmpq	%rdi,%rsi
+	jb	L$loop
+
+
+L$end:
+	movl	%eax,0(%rbp)
+	movl	%ebx,4(%rbp)
+	movl	%ecx,8(%rbp)
+	movl	%edx,12(%rbp)
+
+	movq	(%rsp),%r15
+
+	movq	8(%rsp),%r14
+
+	movq	16(%rsp),%r12
+
+	movq	24(%rsp),%rbx
+
+	movq	32(%rsp),%rbp
+
+	addq	$40,%rsp
+
+L$epilogue:
+	ret
+
+
+#endif
diff --git a/gen/crypto/md5-x86_64-linux.S b/gen/crypto/md5-x86_64-linux.S
new file mode 100644
index 0000000..7b93662
--- /dev/null
+++ b/gen/crypto/md5-x86_64-linux.S
@@ -0,0 +1,695 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#include <openssl/asm_base.h>
+
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
+.text	
+.align	16
+
+.globl	md5_block_asm_data_order
+.hidden md5_block_asm_data_order
+.type	md5_block_asm_data_order,@function
+md5_block_asm_data_order:
+.cfi_startproc	
+_CET_ENDBR
+	pushq	%rbp
+.cfi_adjust_cfa_offset	8
+.cfi_offset	rbp,-16
+	pushq	%rbx
+.cfi_adjust_cfa_offset	8
+.cfi_offset	rbx,-24
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12,-32
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r14,-40
+	pushq	%r15
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r15,-48
+.Lprologue:
+
+
+
+
+	movq	%rdi,%rbp
+	shlq	$6,%rdx
+	leaq	(%rsi,%rdx,1),%rdi
+	movl	0(%rbp),%eax
+	movl	4(%rbp),%ebx
+	movl	8(%rbp),%ecx
+	movl	12(%rbp),%edx
+
+
+
+
+
+
+
+	cmpq	%rdi,%rsi
+	je	.Lend
+
+
+.Lloop:
+	movl	%eax,%r8d
+	movl	%ebx,%r9d
+	movl	%ecx,%r14d
+	movl	%edx,%r15d
+	movl	0(%rsi),%r10d
+	movl	%edx,%r11d
+	xorl	%ecx,%r11d
+	leal	-680876936(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	4(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-389564586(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	606105819(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1044525330(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	-176418897(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	1200080426(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1473231341(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-45705983(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1770035416(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-1958414417(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-42063(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1990404162(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1804603682(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-40341101(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1502002290(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	1236535329(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	movl	4(%rsi),%r10d
+	movl	%edx,%r11d
+	movl	%edx,%r12d
+	notl	%r11d
+	leal	-165796510(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1069501632(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	643717713(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-373897302(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-701558691(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	38016083(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-660478335(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-405537848(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	568446438(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1019803690(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-187363961(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	1163531501(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-1444681467(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-51403784(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	1735328473(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-1926607734(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	movl	20(%rsi),%r10d
+	movl	%ecx,%r11d
+	leal	-378558(%rax,%r10,1),%eax
+	movl	32(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-2022574463(%rdx,%r10,1),%edx
+	movl	44(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	1839030562(%rcx,%r10,1),%ecx
+	movl	56(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-35309556(%rbx,%r10,1),%ebx
+	movl	4(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-1530992060(%rax,%r10,1),%eax
+	movl	16(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	1272893353(%rdx,%r10,1),%edx
+	movl	28(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-155497632(%rcx,%r10,1),%ecx
+	movl	40(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-1094730640(%rbx,%r10,1),%ebx
+	movl	52(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	681279174(%rax,%r10,1),%eax
+	movl	0(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-358537222(%rdx,%r10,1),%edx
+	movl	12(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-722521979(%rcx,%r10,1),%ecx
+	movl	24(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	76029189(%rbx,%r10,1),%ebx
+	movl	36(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-640364487(%rax,%r10,1),%eax
+	movl	48(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-421815835(%rdx,%r10,1),%edx
+	movl	60(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	530742520(%rcx,%r10,1),%ecx
+	movl	8(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-995338651(%rbx,%r10,1),%ebx
+	movl	0(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	xorl	%edx,%r11d
+	leal	-198630844(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	28(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	1126891415(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	56(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1416354905(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	20(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-57434055(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	48(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1700485571(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	12(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1894986606(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	40(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1051523(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	4(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-2054922799(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	32(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1873313359(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	60(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-30611744(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	24(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1560198380(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	52(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	1309151649(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	16(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	-145523070(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	44(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1120210379(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	8(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	718787259(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	36(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-343485551(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+
+	addl	%r8d,%eax
+	addl	%r9d,%ebx
+	addl	%r14d,%ecx
+	addl	%r15d,%edx
+
+
+	addq	$64,%rsi
+	cmpq	%rdi,%rsi
+	jb	.Lloop
+
+
+.Lend:
+	movl	%eax,0(%rbp)
+	movl	%ebx,4(%rbp)
+	movl	%ecx,8(%rbp)
+	movl	%edx,12(%rbp)
+
+	movq	(%rsp),%r15
+.cfi_restore	r15
+	movq	8(%rsp),%r14
+.cfi_restore	r14
+	movq	16(%rsp),%r12
+.cfi_restore	r12
+	movq	24(%rsp),%rbx
+.cfi_restore	rbx
+	movq	32(%rsp),%rbp
+.cfi_restore	rbp
+	addq	$40,%rsp
+.cfi_adjust_cfa_offset	-40
+.Lepilogue:
+	ret
+.cfi_endproc	
+.size	md5_block_asm_data_order,.-md5_block_asm_data_order
+#endif
diff --git a/gen/crypto/md5-x86_64-win.asm b/gen/crypto/md5-x86_64-win.asm
new file mode 100644
index 0000000..f6c5b62
--- /dev/null
+++ b/gen/crypto/md5-x86_64-win.asm
@@ -0,0 +1,803 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifidn __OUTPUT_FORMAT__, win64
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+%define _CET_ENDBR
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+ALIGN	16
+
+global	md5_block_asm_data_order
+
+md5_block_asm_data_order:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_md5_block_asm_data_order:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+_CET_ENDBR
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r14
+
+	push	r15
+
+$L$prologue:
+
+
+
+
+	mov	rbp,rdi
+	shl	rdx,6
+	lea	rdi,[rdx*1+rsi]
+	mov	eax,DWORD[rbp]
+	mov	ebx,DWORD[4+rbp]
+	mov	ecx,DWORD[8+rbp]
+	mov	edx,DWORD[12+rbp]
+
+
+
+
+
+
+
+	cmp	rsi,rdi
+	je	NEAR $L$end
+
+
+$L$loop:
+	mov	r8d,eax
+	mov	r9d,ebx
+	mov	r14d,ecx
+	mov	r15d,edx
+	mov	r10d,DWORD[rsi]
+	mov	r11d,edx
+	xor	r11d,ecx
+	lea	eax,[((-680876936))+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[4+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[((-389564586))+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[8+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[606105819+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[12+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[((-1044525330))+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[16+rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	xor	r11d,ecx
+	lea	eax,[((-176418897))+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[20+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[1200080426+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[24+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[((-1473231341))+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[28+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[((-45705983))+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[32+rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	xor	r11d,ecx
+	lea	eax,[1770035416+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[36+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[((-1958414417))+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[40+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[((-42063))+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[44+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[((-1990404162))+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[48+rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	xor	r11d,ecx
+	lea	eax,[1804603682+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[52+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[((-40341101))+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[56+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[((-1502002290))+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[60+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[1236535329+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	mov	r10d,DWORD[4+rsi]
+	mov	r11d,edx
+	mov	r12d,edx
+	not	r11d
+	lea	eax,[((-165796510))+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[24+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[((-1069501632))+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[44+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[643717713+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[((-373897302))+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[20+rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	not	r11d
+	lea	eax,[((-701558691))+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[40+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[38016083+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[60+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[((-660478335))+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[16+rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[((-405537848))+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[36+rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	not	r11d
+	lea	eax,[568446438+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[56+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[((-1019803690))+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[12+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[((-187363961))+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[32+rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[1163531501+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[52+rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	not	r11d
+	lea	eax,[((-1444681467))+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[8+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[((-51403784))+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[28+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[1735328473+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[48+rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[((-1926607734))+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	mov	r10d,DWORD[20+rsi]
+	mov	r11d,ecx
+	lea	eax,[((-378558))+r10*1+rax]
+	mov	r10d,DWORD[32+rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[((-2022574463))+r10*1+rdx]
+	mov	r10d,DWORD[44+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[1839030562+r10*1+rcx]
+	mov	r10d,DWORD[56+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[((-35309556))+r10*1+rbx]
+	mov	r10d,DWORD[4+rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	lea	eax,[((-1530992060))+r10*1+rax]
+	mov	r10d,DWORD[16+rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[1272893353+r10*1+rdx]
+	mov	r10d,DWORD[28+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[((-155497632))+r10*1+rcx]
+	mov	r10d,DWORD[40+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[((-1094730640))+r10*1+rbx]
+	mov	r10d,DWORD[52+rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	lea	eax,[681279174+r10*1+rax]
+	mov	r10d,DWORD[rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[((-358537222))+r10*1+rdx]
+	mov	r10d,DWORD[12+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[((-722521979))+r10*1+rcx]
+	mov	r10d,DWORD[24+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[76029189+r10*1+rbx]
+	mov	r10d,DWORD[36+rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	lea	eax,[((-640364487))+r10*1+rax]
+	mov	r10d,DWORD[48+rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[((-421815835))+r10*1+rdx]
+	mov	r10d,DWORD[60+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[530742520+r10*1+rcx]
+	mov	r10d,DWORD[8+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[((-995338651))+r10*1+rbx]
+	mov	r10d,DWORD[rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	mov	r10d,DWORD[rsi]
+	mov	r11d,0xffffffff
+	xor	r11d,edx
+	lea	eax,[((-198630844))+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[28+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[1126891415+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[56+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[((-1416354905))+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[20+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[((-57434055))+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[48+rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+	lea	eax,[1700485571+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[12+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[((-1894986606))+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[40+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[((-1051523))+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[4+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[((-2054922799))+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[32+rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+	lea	eax,[1873313359+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[60+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[((-30611744))+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[24+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[((-1560198380))+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[52+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[1309151649+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[16+rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+	lea	eax,[((-145523070))+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[44+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[((-1120210379))+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[8+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[718787259+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[36+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[((-343485551))+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+
+	add	eax,r8d
+	add	ebx,r9d
+	add	ecx,r14d
+	add	edx,r15d
+
+
+	add	rsi,64
+	cmp	rsi,rdi
+	jb	NEAR $L$loop
+
+
+$L$end:
+	mov	DWORD[rbp],eax
+	mov	DWORD[4+rbp],ebx
+	mov	DWORD[8+rbp],ecx
+	mov	DWORD[12+rbp],edx
+
+	mov	r15,QWORD[rsp]
+
+	mov	r14,QWORD[8+rsp]
+
+	mov	r12,QWORD[16+rsp]
+
+	mov	rbx,QWORD[24+rsp]
+
+	mov	rbp,QWORD[32+rsp]
+
+	add	rsp,40
+
+$L$epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	ret
+
+$L$SEH_end_md5_block_asm_data_order:
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	lea	r10,[$L$prologue]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	lea	r10,[$L$epilogue]
+	cmp	rbx,r10
+	jae	NEAR $L$in_prologue
+
+	lea	rax,[40+rax]
+
+	mov	rbp,QWORD[((-8))+rax]
+	mov	rbx,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r14,QWORD[((-32))+rax]
+	mov	r15,QWORD[((-40))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$in_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	ret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_md5_block_asm_data_order wrt ..imagebase
+	DD	$L$SEH_end_md5_block_asm_data_order wrt ..imagebase
+	DD	$L$SEH_info_md5_block_asm_data_order wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_md5_block_asm_data_order:
+	DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+%else
+; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
+ret
+%endif
diff --git a/gen/sources.bzl b/gen/sources.bzl
index ecb7fb4..07df690 100644
--- a/gen/sources.bzl
+++ b/gen/sources.bzl
@@ -71,7 +71,6 @@
     "crypto/fipsmodule/ecdsa/ecdsa.c.inc",
     "crypto/fipsmodule/hkdf/hkdf.c.inc",
     "crypto/fipsmodule/hmac/hmac.c.inc",
-    "crypto/fipsmodule/md5/md5.c.inc",
     "crypto/fipsmodule/modes/cbc.c.inc",
     "crypto/fipsmodule/modes/cfb.c.inc",
     "crypto/fipsmodule/modes/ctr.c.inc",
@@ -136,10 +135,6 @@
     "gen/bcm/ghashv8-armv8-apple.S",
     "gen/bcm/ghashv8-armv8-linux.S",
     "gen/bcm/ghashv8-armv8-win.S",
-    "gen/bcm/md5-586-apple.S",
-    "gen/bcm/md5-586-linux.S",
-    "gen/bcm/md5-x86_64-apple.S",
-    "gen/bcm/md5-x86_64-linux.S",
     "gen/bcm/p256-armv8-asm-apple.S",
     "gen/bcm/p256-armv8-asm-linux.S",
     "gen/bcm/p256-armv8-asm-win.S",
@@ -206,8 +201,6 @@
     "gen/bcm/ghash-ssse3-x86_64-win.asm",
     "gen/bcm/ghash-x86-win.asm",
     "gen/bcm/ghash-x86_64-win.asm",
-    "gen/bcm/md5-586-win.asm",
-    "gen/bcm/md5-x86_64-win.asm",
     "gen/bcm/p256-x86_64-asm-win.asm",
     "gen/bcm/p256_beeu-x86_64-asm-win.asm",
     "gen/bcm/rdrand-x86_64-win.asm",
@@ -366,6 +359,7 @@
     "crypto/kyber/kyber.c",
     "crypto/lhash/lhash.c",
     "crypto/md4/md4.c",
+    "crypto/md5/md5.c",
     "crypto/mem.c",
     "crypto/mldsa/mldsa.c",
     "crypto/mlkem/mlkem.cc",
@@ -614,7 +608,6 @@
     "crypto/fipsmodule/ec/p256-nistz.h",
     "crypto/fipsmodule/ec/p256_table.h",
     "crypto/fipsmodule/ecdsa/internal.h",
-    "crypto/fipsmodule/md5/internal.h",
     "crypto/fipsmodule/modes/internal.h",
     "crypto/fipsmodule/rand/internal.h",
     "crypto/fipsmodule/rsa/internal.h",
@@ -626,6 +619,7 @@
     "crypto/keccak/internal.h",
     "crypto/kyber/internal.h",
     "crypto/lhash/internal.h",
+    "crypto/md5/internal.h",
     "crypto/mldsa/internal.h",
     "crypto/mlkem/internal.h",
     "crypto/obj/obj_dat.h",
@@ -674,6 +668,10 @@
     "gen/crypto/chacha20_poly1305_armv8-win.S",
     "gen/crypto/chacha20_poly1305_x86_64-apple.S",
     "gen/crypto/chacha20_poly1305_x86_64-linux.S",
+    "gen/crypto/md5-586-apple.S",
+    "gen/crypto/md5-586-linux.S",
+    "gen/crypto/md5-x86_64-apple.S",
+    "gen/crypto/md5-x86_64-linux.S",
     "third_party/fiat/asm/fiat_curve25519_adx_mul.S",
     "third_party/fiat/asm/fiat_curve25519_adx_square.S",
 ]
@@ -683,6 +681,8 @@
     "gen/crypto/chacha-x86-win.asm",
     "gen/crypto/chacha-x86_64-win.asm",
     "gen/crypto/chacha20_poly1305_x86_64-win.asm",
+    "gen/crypto/md5-586-win.asm",
+    "gen/crypto/md5-x86_64-win.asm",
 ]
 
 crypto_test_sources = [
@@ -722,7 +722,6 @@
     "crypto/fipsmodule/ec/p256_test.cc",
     "crypto/fipsmodule/ecdsa/ecdsa_test.cc",
     "crypto/fipsmodule/hkdf/hkdf_test.cc",
-    "crypto/fipsmodule/md5/md5_test.cc",
     "crypto/fipsmodule/modes/gcm_test.cc",
     "crypto/fipsmodule/rand/ctrdrbg_test.cc",
     "crypto/fipsmodule/service_indicator/service_indicator_test.cc",
@@ -734,6 +733,7 @@
     "crypto/keccak/keccak_test.cc",
     "crypto/kyber/kyber_test.cc",
     "crypto/lhash/lhash_test.cc",
+    "crypto/md5/md5_test.cc",
     "crypto/mldsa/mldsa_test.cc",
     "crypto/mlkem/mlkem_test.cc",
     "crypto/obj/obj_test.cc",
diff --git a/gen/sources.cmake b/gen/sources.cmake
index c974c31..c994f60 100644
--- a/gen/sources.cmake
+++ b/gen/sources.cmake
@@ -75,7 +75,6 @@
   crypto/fipsmodule/ecdsa/ecdsa.c.inc
   crypto/fipsmodule/hkdf/hkdf.c.inc
   crypto/fipsmodule/hmac/hmac.c.inc
-  crypto/fipsmodule/md5/md5.c.inc
   crypto/fipsmodule/modes/cbc.c.inc
   crypto/fipsmodule/modes/cfb.c.inc
   crypto/fipsmodule/modes/ctr.c.inc
@@ -142,10 +141,6 @@
   gen/bcm/ghashv8-armv8-apple.S
   gen/bcm/ghashv8-armv8-linux.S
   gen/bcm/ghashv8-armv8-win.S
-  gen/bcm/md5-586-apple.S
-  gen/bcm/md5-586-linux.S
-  gen/bcm/md5-x86_64-apple.S
-  gen/bcm/md5-x86_64-linux.S
   gen/bcm/p256-armv8-asm-apple.S
   gen/bcm/p256-armv8-asm-linux.S
   gen/bcm/p256-armv8-asm-win.S
@@ -214,8 +209,6 @@
   gen/bcm/ghash-ssse3-x86_64-win.asm
   gen/bcm/ghash-x86-win.asm
   gen/bcm/ghash-x86_64-win.asm
-  gen/bcm/md5-586-win.asm
-  gen/bcm/md5-x86_64-win.asm
   gen/bcm/p256-x86_64-asm-win.asm
   gen/bcm/p256_beeu-x86_64-asm-win.asm
   gen/bcm/rdrand-x86_64-win.asm
@@ -380,6 +373,7 @@
   crypto/kyber/kyber.c
   crypto/lhash/lhash.c
   crypto/md4/md4.c
+  crypto/md5/md5.c
   crypto/mem.c
   crypto/mldsa/mldsa.c
   crypto/mlkem/mlkem.cc
@@ -632,7 +626,6 @@
   crypto/fipsmodule/ec/p256-nistz.h
   crypto/fipsmodule/ec/p256_table.h
   crypto/fipsmodule/ecdsa/internal.h
-  crypto/fipsmodule/md5/internal.h
   crypto/fipsmodule/modes/internal.h
   crypto/fipsmodule/rand/internal.h
   crypto/fipsmodule/rsa/internal.h
@@ -644,6 +637,7 @@
   crypto/keccak/internal.h
   crypto/kyber/internal.h
   crypto/lhash/internal.h
+  crypto/md5/internal.h
   crypto/mldsa/internal.h
   crypto/mlkem/internal.h
   crypto/obj/obj_dat.h
@@ -694,6 +688,10 @@
   gen/crypto/chacha20_poly1305_armv8-win.S
   gen/crypto/chacha20_poly1305_x86_64-apple.S
   gen/crypto/chacha20_poly1305_x86_64-linux.S
+  gen/crypto/md5-586-apple.S
+  gen/crypto/md5-586-linux.S
+  gen/crypto/md5-x86_64-apple.S
+  gen/crypto/md5-x86_64-linux.S
   third_party/fiat/asm/fiat_curve25519_adx_mul.S
   third_party/fiat/asm/fiat_curve25519_adx_square.S
 )
@@ -705,6 +703,8 @@
   gen/crypto/chacha-x86-win.asm
   gen/crypto/chacha-x86_64-win.asm
   gen/crypto/chacha20_poly1305_x86_64-win.asm
+  gen/crypto/md5-586-win.asm
+  gen/crypto/md5-x86_64-win.asm
 )
 
 set(
@@ -746,7 +746,6 @@
   crypto/fipsmodule/ec/p256_test.cc
   crypto/fipsmodule/ecdsa/ecdsa_test.cc
   crypto/fipsmodule/hkdf/hkdf_test.cc
-  crypto/fipsmodule/md5/md5_test.cc
   crypto/fipsmodule/modes/gcm_test.cc
   crypto/fipsmodule/rand/ctrdrbg_test.cc
   crypto/fipsmodule/service_indicator/service_indicator_test.cc
@@ -758,6 +757,7 @@
   crypto/keccak/keccak_test.cc
   crypto/kyber/kyber_test.cc
   crypto/lhash/lhash_test.cc
+  crypto/md5/md5_test.cc
   crypto/mldsa/mldsa_test.cc
   crypto/mlkem/mlkem_test.cc
   crypto/obj/obj_test.cc
diff --git a/gen/sources.gni b/gen/sources.gni
index 0eaa36f..34d4acf 100644
--- a/gen/sources.gni
+++ b/gen/sources.gni
@@ -71,7 +71,6 @@
   "crypto/fipsmodule/ecdsa/ecdsa.c.inc",
   "crypto/fipsmodule/hkdf/hkdf.c.inc",
   "crypto/fipsmodule/hmac/hmac.c.inc",
-  "crypto/fipsmodule/md5/md5.c.inc",
   "crypto/fipsmodule/modes/cbc.c.inc",
   "crypto/fipsmodule/modes/cfb.c.inc",
   "crypto/fipsmodule/modes/ctr.c.inc",
@@ -136,10 +135,6 @@
   "gen/bcm/ghashv8-armv8-apple.S",
   "gen/bcm/ghashv8-armv8-linux.S",
   "gen/bcm/ghashv8-armv8-win.S",
-  "gen/bcm/md5-586-apple.S",
-  "gen/bcm/md5-586-linux.S",
-  "gen/bcm/md5-x86_64-apple.S",
-  "gen/bcm/md5-x86_64-linux.S",
   "gen/bcm/p256-armv8-asm-apple.S",
   "gen/bcm/p256-armv8-asm-linux.S",
   "gen/bcm/p256-armv8-asm-win.S",
@@ -206,8 +201,6 @@
   "gen/bcm/ghash-ssse3-x86_64-win.asm",
   "gen/bcm/ghash-x86-win.asm",
   "gen/bcm/ghash-x86_64-win.asm",
-  "gen/bcm/md5-586-win.asm",
-  "gen/bcm/md5-x86_64-win.asm",
   "gen/bcm/p256-x86_64-asm-win.asm",
   "gen/bcm/p256_beeu-x86_64-asm-win.asm",
   "gen/bcm/rdrand-x86_64-win.asm",
@@ -366,6 +359,7 @@
   "crypto/kyber/kyber.c",
   "crypto/lhash/lhash.c",
   "crypto/md4/md4.c",
+  "crypto/md5/md5.c",
   "crypto/mem.c",
   "crypto/mldsa/mldsa.c",
   "crypto/mlkem/mlkem.cc",
@@ -614,7 +608,6 @@
   "crypto/fipsmodule/ec/p256-nistz.h",
   "crypto/fipsmodule/ec/p256_table.h",
   "crypto/fipsmodule/ecdsa/internal.h",
-  "crypto/fipsmodule/md5/internal.h",
   "crypto/fipsmodule/modes/internal.h",
   "crypto/fipsmodule/rand/internal.h",
   "crypto/fipsmodule/rsa/internal.h",
@@ -626,6 +619,7 @@
   "crypto/keccak/internal.h",
   "crypto/kyber/internal.h",
   "crypto/lhash/internal.h",
+  "crypto/md5/internal.h",
   "crypto/mldsa/internal.h",
   "crypto/mlkem/internal.h",
   "crypto/obj/obj_dat.h",
@@ -674,6 +668,10 @@
   "gen/crypto/chacha20_poly1305_armv8-win.S",
   "gen/crypto/chacha20_poly1305_x86_64-apple.S",
   "gen/crypto/chacha20_poly1305_x86_64-linux.S",
+  "gen/crypto/md5-586-apple.S",
+  "gen/crypto/md5-586-linux.S",
+  "gen/crypto/md5-x86_64-apple.S",
+  "gen/crypto/md5-x86_64-linux.S",
   "third_party/fiat/asm/fiat_curve25519_adx_mul.S",
   "third_party/fiat/asm/fiat_curve25519_adx_square.S",
 ]
@@ -683,6 +681,8 @@
   "gen/crypto/chacha-x86-win.asm",
   "gen/crypto/chacha-x86_64-win.asm",
   "gen/crypto/chacha20_poly1305_x86_64-win.asm",
+  "gen/crypto/md5-586-win.asm",
+  "gen/crypto/md5-x86_64-win.asm",
 ]
 
 crypto_test_sources = [
@@ -722,7 +722,6 @@
   "crypto/fipsmodule/ec/p256_test.cc",
   "crypto/fipsmodule/ecdsa/ecdsa_test.cc",
   "crypto/fipsmodule/hkdf/hkdf_test.cc",
-  "crypto/fipsmodule/md5/md5_test.cc",
   "crypto/fipsmodule/modes/gcm_test.cc",
   "crypto/fipsmodule/rand/ctrdrbg_test.cc",
   "crypto/fipsmodule/service_indicator/service_indicator_test.cc",
@@ -734,6 +733,7 @@
   "crypto/keccak/keccak_test.cc",
   "crypto/kyber/kyber_test.cc",
   "crypto/lhash/lhash_test.cc",
+  "crypto/md5/md5_test.cc",
   "crypto/mldsa/mldsa_test.cc",
   "crypto/mlkem/mlkem_test.cc",
   "crypto/obj/obj_test.cc",
diff --git a/gen/sources.json b/gen/sources.json
index f589fc2..a818f8f 100644
--- a/gen/sources.json
+++ b/gen/sources.json
@@ -56,7 +56,6 @@
       "crypto/fipsmodule/ecdsa/ecdsa.c.inc",
       "crypto/fipsmodule/hkdf/hkdf.c.inc",
       "crypto/fipsmodule/hmac/hmac.c.inc",
-      "crypto/fipsmodule/md5/md5.c.inc",
       "crypto/fipsmodule/modes/cbc.c.inc",
       "crypto/fipsmodule/modes/cfb.c.inc",
       "crypto/fipsmodule/modes/ctr.c.inc",
@@ -120,10 +119,6 @@
       "gen/bcm/ghashv8-armv8-apple.S",
       "gen/bcm/ghashv8-armv8-linux.S",
       "gen/bcm/ghashv8-armv8-win.S",
-      "gen/bcm/md5-586-apple.S",
-      "gen/bcm/md5-586-linux.S",
-      "gen/bcm/md5-x86_64-apple.S",
-      "gen/bcm/md5-x86_64-linux.S",
       "gen/bcm/p256-armv8-asm-apple.S",
       "gen/bcm/p256-armv8-asm-linux.S",
       "gen/bcm/p256-armv8-asm-win.S",
@@ -189,8 +184,6 @@
       "gen/bcm/ghash-ssse3-x86_64-win.asm",
       "gen/bcm/ghash-x86-win.asm",
       "gen/bcm/ghash-x86_64-win.asm",
-      "gen/bcm/md5-586-win.asm",
-      "gen/bcm/md5-x86_64-win.asm",
       "gen/bcm/p256-x86_64-asm-win.asm",
       "gen/bcm/p256_beeu-x86_64-asm-win.asm",
       "gen/bcm/rdrand-x86_64-win.asm",
@@ -350,6 +343,7 @@
       "crypto/kyber/kyber.c",
       "crypto/lhash/lhash.c",
       "crypto/md4/md4.c",
+      "crypto/md5/md5.c",
       "crypto/mem.c",
       "crypto/mldsa/mldsa.c",
       "crypto/mlkem/mlkem.cc",
@@ -596,7 +590,6 @@
       "crypto/fipsmodule/ec/p256-nistz.h",
       "crypto/fipsmodule/ec/p256_table.h",
       "crypto/fipsmodule/ecdsa/internal.h",
-      "crypto/fipsmodule/md5/internal.h",
       "crypto/fipsmodule/modes/internal.h",
       "crypto/fipsmodule/rand/internal.h",
       "crypto/fipsmodule/rsa/internal.h",
@@ -608,6 +601,7 @@
       "crypto/keccak/internal.h",
       "crypto/kyber/internal.h",
       "crypto/lhash/internal.h",
+      "crypto/md5/internal.h",
       "crypto/mldsa/internal.h",
       "crypto/mlkem/internal.h",
       "crypto/obj/obj_dat.h",
@@ -655,6 +649,10 @@
       "gen/crypto/chacha20_poly1305_armv8-win.S",
       "gen/crypto/chacha20_poly1305_x86_64-apple.S",
       "gen/crypto/chacha20_poly1305_x86_64-linux.S",
+      "gen/crypto/md5-586-apple.S",
+      "gen/crypto/md5-586-linux.S",
+      "gen/crypto/md5-x86_64-apple.S",
+      "gen/crypto/md5-x86_64-linux.S",
       "third_party/fiat/asm/fiat_curve25519_adx_mul.S",
       "third_party/fiat/asm/fiat_curve25519_adx_square.S"
     ],
@@ -662,7 +660,9 @@
       "gen/crypto/aes128gcmsiv-x86_64-win.asm",
       "gen/crypto/chacha-x86-win.asm",
       "gen/crypto/chacha-x86_64-win.asm",
-      "gen/crypto/chacha20_poly1305_x86_64-win.asm"
+      "gen/crypto/chacha20_poly1305_x86_64-win.asm",
+      "gen/crypto/md5-586-win.asm",
+      "gen/crypto/md5-x86_64-win.asm"
     ]
   },
   "crypto_test": {
@@ -703,7 +703,6 @@
       "crypto/fipsmodule/ec/p256_test.cc",
       "crypto/fipsmodule/ecdsa/ecdsa_test.cc",
       "crypto/fipsmodule/hkdf/hkdf_test.cc",
-      "crypto/fipsmodule/md5/md5_test.cc",
       "crypto/fipsmodule/modes/gcm_test.cc",
       "crypto/fipsmodule/rand/ctrdrbg_test.cc",
       "crypto/fipsmodule/service_indicator/service_indicator_test.cc",
@@ -715,6 +714,7 @@
       "crypto/keccak/keccak_test.cc",
       "crypto/kyber/kyber_test.cc",
       "crypto/lhash/lhash_test.cc",
+      "crypto/md5/md5_test.cc",
       "crypto/mldsa/mldsa_test.cc",
       "crypto/mlkem/mlkem_test.cc",
       "crypto/obj/obj_test.cc",