diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1dbb643..64181f5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,6 +65,11 @@
   add_definitions(-DBORINGSSL_ALLOW_CXX_RUNTIME)
 endif()
 
+if(CMAKE_BUILD_TYPE STREQUAL "Release")
+  # Windows release builds don't set NDEBUG in NASM flags automatically.
+  set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DNDEBUG")
+endif()
+
 if(BORINGSSL_PREFIX AND BORINGSSL_PREFIX_SYMBOLS)
   add_definitions(-DBORINGSSL_PREFIX=${BORINGSSL_PREFIX})
   # CMake automatically connects include_directories to the NASM command-line,
diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
index 8565977..e940f7d 100644
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt
@@ -463,6 +463,7 @@
   hkdf/hkdf_test.cc
   hmac_extra/hmac_test.cc
   hrss/hrss_test.cc
+  impl_dispatch_test.cc
   lhash/lhash_test.cc
   obj/obj_test.cc
   pem/pem_test.cc
diff --git a/crypto/crypto.c b/crypto/crypto.c
index f7ac255..c86206a 100644
--- a/crypto/crypto.c
+++ b/crypto/crypto.c
@@ -36,8 +36,8 @@
 #define BORINGSSL_NO_STATIC_INITIALIZER
 #endif
 
-#endif  /* !OPENSSL_NO_ASM && (OPENSSL_X86 || OPENSSL_X86_64 ||
-                               OPENSSL_ARM || OPENSSL_AARCH64) */
+#endif  // !NO_ASM && !STATIC_ARMCAP &&
+        // (X86 || X86_64 || ARM || AARCH64 || PPC64LE)
 
 
 // Our assembly does not use the GOT to reference symbols, which means
@@ -60,8 +60,7 @@
 // that tests the capability values will still skip the constructor but, so
 // far, the init constructor function only sets the capability variables.
 
-#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
-
+#if !defined(NDEBUG) && !defined(BORINGSSL_FIPS)
 // This value must be explicitly initialised to zero in order to work around a
 // bug in libtool or the linker on OS X.
 //
@@ -69,6 +68,12 @@
 // archive, linking on OS X will fail to resolve common symbols. By
 // initialising it to zero, it becomes a "data symbol", which isn't so
 // affected.
+HIDDEN uint8_t BORINGSSL_function_hit[7] = {0};
+#endif
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+
+// This value must be explicitly initialized to zero. See similar comment above.
 HIDDEN uint32_t OPENSSL_ia32cap_P[4] = {0};
 
 #elif defined(OPENSSL_PPC64LE)
diff --git a/crypto/fipsmodule/aes/asm/aesni-x86.pl b/crypto/fipsmodule/aes/asm/aesni-x86.pl
index aff2b40..fcb5b98 100644
--- a/crypto/fipsmodule/aes/asm/aesni-x86.pl
+++ b/crypto/fipsmodule/aes/asm/aesni-x86.pl
@@ -84,6 +84,9 @@
 &asm_init($ARGV[0]);
 
 &external_label("OPENSSL_ia32cap_P");
+&preprocessor_ifndef("NDEBUG")
+&external_label("BORINGSSL_function_hit");
+&preprocessor_endif();
 &static_label("key_const");
 
 if ($PREFIX eq $AESNI_PREFIX)	{ $movekey=\&movups; }
@@ -193,6 +196,8 @@
 # void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
 &aesni_generate1("enc") if (!$inline);
 &function_begin_B("${PREFIX}_encrypt");
+	&record_function_hit(1);
+
 	&mov	("eax",&wparam(0));
 	&mov	($key,&wparam(2));
 	&movups	($inout0,&QWP(0,"eax"));
@@ -875,6 +880,8 @@
 #	80	saved %esp
 
 &function_begin("${PREFIX}_ctr32_encrypt_blocks");
+	&record_function_hit(0);
+
 	&mov	($inp,&wparam(0));
 	&mov	($out,&wparam(1));
 	&mov	($len,&wparam(2));
@@ -2483,6 +2490,8 @@
 # int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits,
 #                              AES_KEY *key)
 &function_begin_B("${PREFIX}_set_encrypt_key");
+	&record_function_hit(3);
+
 	&mov	("eax",&wparam(0));
 	&mov	($rounds,&wparam(1));
 	&mov	($key,&wparam(2));
diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
index 14175ad..437fd3a 100644
--- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
@@ -275,6 +275,12 @@
 .align	16
 ${PREFIX}_encrypt:
 .cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+.extern	BORINGSSL_function_hit
+	movb \$1,BORINGSSL_function_hit+1(%rip)
+#endif
+#endif
 	movups	($inp),$inout0		# load input
 	mov	240($key),$rounds	# key->rounds
 ___
@@ -1199,6 +1205,11 @@
 .align	16
 ${PREFIX}_ctr32_encrypt_blocks:
 .cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+	movb \$1,BORINGSSL_function_hit(%rip)
+#endif
+#endif
 	cmp	\$1,$len
 	jne	.Lctr32_bulk
 
@@ -4252,7 +4263,7 @@
 .cfi_endproc
 .size	${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
 ___
-} 
+}
 # int ${PREFIX}_set_decrypt_key(const unsigned char *inp,
 #				int bits, AES_KEY *key)
 #
@@ -4343,6 +4354,11 @@
 ${PREFIX}_set_encrypt_key:
 __aesni_set_encrypt_key:
 .cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+	movb \$1,BORINGSSL_function_hit+3(%rip)
+#endif
+#endif
 	.byte	0x48,0x83,0xEC,0x08	# sub rsp,8
 .cfi_adjust_cfa_offset	8
 	mov	\$-1,%rax
diff --git a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
index f0031ef..81331bf 100644
--- a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
@@ -1920,6 +1920,12 @@
 .align	16
 bsaes_ctr32_encrypt_blocks:
 .cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+.extern	BORINGSSL_function_hit
+	movb \$1, BORINGSSL_function_hit+6(%rip)
+#endif
+#endif
 	mov	%rsp, %rax
 .Lctr_enc_prologue:
 	push	%rbp
diff --git a/crypto/fipsmodule/aes/asm/vpaes-x86.pl b/crypto/fipsmodule/aes/asm/vpaes-x86.pl
index 5f4b208..81e7e84 100644
--- a/crypto/fipsmodule/aes/asm/vpaes-x86.pl
+++ b/crypto/fipsmodule/aes/asm/vpaes-x86.pl
@@ -69,6 +69,9 @@
 my  ($round, $base, $magic, $key, $const, $inp, $out)=
     ("eax",  "ebx", "ecx",  "edx","ebp",  "esi","edi");
 
+&preprocessor_ifndef("NDEBUG")
+&external_label("BORINGSSL_function_hit");
+&preprocessor_endif();
 &static_label("_vpaes_consts");
 &static_label("_vpaes_schedule_low_round");
 
@@ -758,6 +761,8 @@
 # Interface to OpenSSL
 #
 &function_begin("${PREFIX}_set_encrypt_key");
+	record_function_hit(5);
+
 	&mov	($inp,&wparam(0));		# inp
 	&lea	($base,&DWP(-56,"esp"));
 	&mov	($round,&wparam(1));		# bits
@@ -812,6 +817,8 @@
 &function_end("${PREFIX}_set_decrypt_key");
 
 &function_begin("${PREFIX}_encrypt");
+	record_function_hit(4);
+
 	&lea	($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
 	&call	("_vpaes_preheat");
 &set_label("pic_point");
diff --git a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
index 3d0600f..b9edb79 100644
--- a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
@@ -664,6 +664,12 @@
 .align	32
 aesni_gcm_encrypt:
 .cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+.extern	BORINGSSL_function_hit
+	movb \$1,BORINGSSL_function_hit+2(%rip)
+#endif
+#endif
 	xor	$ret,$ret
 
 	# We call |_aesni_ctr32_6x| twice, each call consuming 96 bytes of
diff --git a/crypto/impl_dispatch_test.cc b/crypto/impl_dispatch_test.cc
new file mode 100644
index 0000000..efe12b4
--- /dev/null
+++ b/crypto/impl_dispatch_test.cc
@@ -0,0 +1,153 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/base.h>
+
+#if !defined(NDEBUG) && !defined(BORINGSSL_FIPS) && \
+    !defined(BORINGSSL_SHARED_LIBRARY)
+
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include <openssl/aead.h>
+#include <openssl/aes.h>
+#include <openssl/cpu.h>
+#include <openssl/mem.h>
+
+#include <gtest/gtest.h>
+
+#include "internal.h"
+
+
+class ImplDispatchTest : public ::testing::Test {
+ public:
+  void SetUp() override {
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+    aesni_ = OPENSSL_ia32cap_P[1] & (1 << (57 - 32));
+    avx_movbe_ = ((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41;
+    ssse3_ = OPENSSL_ia32cap_P[1] & (1 << (41 - 32));
+    is_x86_64_ =
+#if defined(OPENSSL_X86_64)
+        true;
+#else
+        false;
+#endif
+#endif  // X86 || X86_64
+  }
+
+ protected:
+  // AssertFunctionsHit takes a list of pairs (flag index, boolean), and a
+  // function to test. It runs the given function and asserts, for each flag
+  // index, that the boolean reflects whether that flag index was written or
+  // not, and that no other flagged functions were triggered.
+  void AssertFunctionsHit(std::vector<std::pair<size_t, bool>> flags,
+                          std::function<void()> f) {
+    OPENSSL_memset(BORINGSSL_function_hit, 0, sizeof(BORINGSSL_function_hit));
+
+    f();
+
+    for (const auto flag : flags) {
+      SCOPED_TRACE(flag.first);
+
+      ASSERT_LT(flag.first, sizeof(BORINGSSL_function_hit));
+      EXPECT_EQ(flag.second, BORINGSSL_function_hit[flag.first] == 1);
+      BORINGSSL_function_hit[flag.first] = 0;
+    }
+
+    for (size_t i = 0; i < sizeof(BORINGSSL_function_hit); i++) {
+      EXPECT_EQ(0u, BORINGSSL_function_hit[i])
+          << "Flag " << i << " unexpectedly hit";
+    }
+  }
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+  bool aesni_ = false;
+  bool avx_movbe_ = false;
+  bool ssse3_ = false;
+  bool is_x86_64_ = false;
+#endif
+};
+
+#if !defined(OPENSSL_NO_ASM) && \
+    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
+
+constexpr size_t kFlag_aes_hw_ctr32_encrypt_blocks = 0;
+constexpr size_t kFlag_aes_hw_encrypt = 1;
+constexpr size_t kFlag_aesni_gcm_encrypt = 2;
+constexpr size_t kFlag_aes_hw_set_encrypt_key = 3;
+constexpr size_t kFlag_vpaes_encrypt = 4;
+constexpr size_t kFlag_vpaes_set_encrypt_key = 5;
+constexpr size_t kFlag_bsaes_ctr32_encrypt_blocks = 6;
+
+TEST_F(ImplDispatchTest, AEAD_AES_GCM) {
+  AssertFunctionsHit(
+      {
+          {kFlag_aes_hw_ctr32_encrypt_blocks, aesni_},
+          {kFlag_aes_hw_encrypt, aesni_},
+          {kFlag_aes_hw_set_encrypt_key, aesni_},
+          {kFlag_aesni_gcm_encrypt, is_x86_64_ && aesni_ && avx_movbe_},
+          {kFlag_vpaes_encrypt, !is_x86_64_ && ssse3_ && !aesni_},
+          {kFlag_vpaes_set_encrypt_key, !is_x86_64_ && ssse3_ && !aesni_},
+          {kFlag_bsaes_ctr32_encrypt_blocks, is_x86_64_ && ssse3_ && !aesni_},
+      },
+      [] {
+        const uint8_t kZeros[16] = {0};
+        const uint8_t kPlaintext[40] = {1, 2, 3, 4, 0};
+        uint8_t ciphertext[sizeof(kPlaintext) + 16];
+        size_t ciphertext_len;
+        EVP_AEAD_CTX ctx;
+        ASSERT_TRUE(EVP_AEAD_CTX_init(&ctx, EVP_aead_aes_128_gcm(), kZeros,
+                                      sizeof(kZeros),
+                                      EVP_AEAD_DEFAULT_TAG_LENGTH, nullptr));
+        ASSERT_TRUE(EVP_AEAD_CTX_seal(
+            &ctx, ciphertext, &ciphertext_len, sizeof(ciphertext), kZeros,
+            EVP_AEAD_nonce_length(EVP_aead_aes_128_gcm()), kPlaintext,
+            sizeof(kPlaintext), nullptr, 0));
+      });
+}
+
+TEST_F(ImplDispatchTest, AES_set_encrypt_key) {
+  AssertFunctionsHit(
+      {
+          {kFlag_aes_hw_set_encrypt_key, aesni_},
+          // VPAES / BSAES will not be used for the |AES_*| functions.
+      },
+      [] {
+        AES_KEY key;
+        static const uint8_t kZeros[16] = {0};
+        AES_set_encrypt_key(kZeros, sizeof(kZeros) * 8, &key);
+      });
+}
+
+TEST_F(ImplDispatchTest, AES_single_block) {
+  AES_KEY key;
+  static const uint8_t kZeros[16] = {0};
+  AES_set_encrypt_key(kZeros, sizeof(kZeros) * 8, &key);
+
+  AssertFunctionsHit(
+      {
+          {kFlag_aes_hw_encrypt, aesni_},
+          // VPAES / BSAES will not be used for the |AES_*| functions.
+      },
+      [&key] {
+        uint8_t in[AES_BLOCK_SIZE] = {0};
+        uint8_t out[AES_BLOCK_SIZE];
+        AES_encrypt(in, out, &key);
+      });
+}
+
+#endif  // X86 || X86_64
+
+#endif  // !NDEBUG && !FIPS && !SHARED_LIBRARY
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl
index 213093d..2e65d1d 100755
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@@ -1181,7 +1181,17 @@
 
     $line =~ s|\R$||;           # Better chomp
 
-    $line =~ s|[#!].*$||;	# get rid of asm-style comments...
+    if ($nasm) {
+	$line =~ s|^#ifdef |%ifdef |;
+	$line =~ s|^#ifndef |%ifndef |;
+	$line =~ s|^#endif|%endif|;
+	$line =~ s|[#!].*$||;	# get rid of asm-style comments...
+    } else {
+	# Get rid of asm-style comments but not preprocessor directives. The
+	# latter are identified by not having a space after the '#'.
+	$line =~ s|[#!] .*$||;
+    }
+
     $line =~ s|/\*.*\*/||;	# ... and C-style comments...
     $line =~ s|^\s+||;		# ... and skip white spaces in beginning
     $line =~ s|\s+$||;		# ... and at the end
diff --git a/crypto/perlasm/x86asm.pl b/crypto/perlasm/x86asm.pl
index 3d2943b..3278188 100644
--- a/crypto/perlasm/x86asm.pl
+++ b/crypto/perlasm/x86asm.pl
@@ -33,6 +33,26 @@
     &generic($opcode,@_) or die "undefined subroutine \&$AUTOLOAD";
 }
 
+# record_function_hit(int) writes a byte with value one to the given offset of
+# |BORINGSSL_function_hit|, but only if NDEBUG is not defined. This is used in
+# impl_dispatch_test.cc to test whether the expected assembly functions are
+# triggered by high-level API calls.
+sub ::record_function_hit
+{ my($index)=@_;
+    &preprocessor_ifndef("NDEBUG");
+    &push("ebx");
+    &push("edx");
+    &call(&label("pic"));
+    &set_label("pic");
+    &blindpop("ebx");
+    &lea("ebx",&DWP("BORINGSSL_function_hit+$index"."-".&label("pic"),"ebx"));
+    &mov("edx", 1);
+    &movb(&BP(0, "ebx"), "dl");
+    &pop("edx");
+    &pop("ebx");
+    &preprocessor_endif();
+}
+
 sub ::emit
 { my $opcode=shift;
 
diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl
index 0c989a1..4e19a89 100644
--- a/crypto/perlasm/x86gas.pl
+++ b/crypto/perlasm/x86gas.pl
@@ -265,6 +265,14 @@
 sub ::dataseg
 {   push(@out,".data\n");   }
 
+sub ::preprocessor_ifndef
+{ my($define)=@_;
+    push(@out,"#ifndef ${define}\n");
+}
+
+sub ::preprocessor_endif
+{ push(@out,"#endif\n");    }
+
 *::hidden = sub { push(@out,".hidden\t$nmdecor$_[0]\n"); } if ($::elf);
 
 1;
diff --git a/crypto/perlasm/x86masm.pl b/crypto/perlasm/x86masm.pl
index dffee76..01bfbf3 100644
--- a/crypto/perlasm/x86masm.pl
+++ b/crypto/perlasm/x86masm.pl
@@ -203,4 +203,12 @@
     push(@out,"ENDIF\n");
 }
 
+sub ::preprocessor_ifndef
+{ my($define)=@_;
+    push(@out,"%ifndef ${define}\n");
+}
+
+sub ::preprocessor_endif
+{ push(@out,"%endif\n");    }
+
 1;
diff --git a/crypto/perlasm/x86nasm.pl b/crypto/perlasm/x86nasm.pl
index d3773b6..a3e0ab7 100644
--- a/crypto/perlasm/x86nasm.pl
+++ b/crypto/perlasm/x86nasm.pl
@@ -191,4 +191,12 @@
     push(@out,"%endif\n");
 }
 
+sub ::preprocessor_ifndef
+{ my($define)=@_;
+    push(@out,"%ifndef ${define}\n");
+}
+
+sub ::preprocessor_endif
+{ push(@out,"%endif\n");    }
+
 1;
diff --git a/include/openssl/cpu.h b/include/openssl/cpu.h
index edac98e..ad5fc94 100644
--- a/include/openssl/cpu.h
+++ b/include/openssl/cpu.h
@@ -190,6 +190,21 @@
 
 #endif  // OPENSSL_PPC64LE
 
+#if !defined(NDEBUG) && !defined(BORINGSSL_FIPS)
+// Runtime CPU dispatch testing support
+
+// BORINGSSL_function_hit is an array of flags. The following functions will
+// set these flags in non-FIPS builds if NDEBUG is not defined.
+//   0: aes_hw_ctr32_encrypt_blocks
+//   1: aes_hw_encrypt
+//   2: aesni_gcm_encrypt
+//   3: aes_hw_set_encrypt_key
+//   4: vpaes_encrypt
+//   5: vpaes_set_encrypt_key
+//   6: bsaes_ctr32_encrypt_blocks
+extern uint8_t BORINGSSL_function_hit[7];
+#endif  // !NDEBUG && !FIPS
+
 
 #if defined(__cplusplus)
 }  // extern C
