Add test of assembly code dispatch.

The first attempt involved using Linux's support for hardware
breakpoints to detect when assembly code was run. However, this doesn't
work with SDE, which is a problem.

This version has the assembly code update a global flags variable when
it's run, but only in non-FIPS and non-debug builds.

Update-Note: Assembly files now pay attention to the NDEBUG preprocessor
symbol. Ensure the build passes the symbol in. (If release builds fail
to link due to missing BORINGSSL_function_hit, this is the cause.)

Change-Id: I6b7ced442b7a77d0b4ae148b00c351f68af89a6e
Reviewed-on: https://boringssl-review.googlesource.com/c/33384
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1dbb643..64181f5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,6 +65,11 @@
   add_definitions(-DBORINGSSL_ALLOW_CXX_RUNTIME)
 endif()
 
+if(CMAKE_BUILD_TYPE STREQUAL "Release")
+  # Windows release builds don't set NDEBUG in NASM flags automatically.
+  set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DNDEBUG")
+endif()
+
 if(BORINGSSL_PREFIX AND BORINGSSL_PREFIX_SYMBOLS)
   add_definitions(-DBORINGSSL_PREFIX=${BORINGSSL_PREFIX})
   # CMake automatically connects include_directories to the NASM command-line,
diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
index 8565977..e940f7d 100644
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt
@@ -463,6 +463,7 @@
   hkdf/hkdf_test.cc
   hmac_extra/hmac_test.cc
   hrss/hrss_test.cc
+  impl_dispatch_test.cc
   lhash/lhash_test.cc
   obj/obj_test.cc
   pem/pem_test.cc
diff --git a/crypto/crypto.c b/crypto/crypto.c
index f7ac255..c86206a 100644
--- a/crypto/crypto.c
+++ b/crypto/crypto.c
@@ -36,8 +36,8 @@
 #define BORINGSSL_NO_STATIC_INITIALIZER
 #endif
 
-#endif  /* !OPENSSL_NO_ASM && (OPENSSL_X86 || OPENSSL_X86_64 ||
-                               OPENSSL_ARM || OPENSSL_AARCH64) */
+#endif  // !NO_ASM && !STATIC_ARMCAP &&
+        // (X86 || X86_64 || ARM || AARCH64 || PPC64LE)
 
 
 // Our assembly does not use the GOT to reference symbols, which means
@@ -60,8 +60,7 @@
 // that tests the capability values will still skip the constructor but, so
 // far, the init constructor function only sets the capability variables.
 
-#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
-
+#if !defined(NDEBUG) && !defined(BORINGSSL_FIPS)
 // This value must be explicitly initialised to zero in order to work around a
 // bug in libtool or the linker on OS X.
 //
@@ -69,6 +68,12 @@
 // archive, linking on OS X will fail to resolve common symbols. By
 // initialising it to zero, it becomes a "data symbol", which isn't so
 // affected.
+HIDDEN uint8_t BORINGSSL_function_hit[7] = {0};
+#endif
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+
+// This value must be explicitly initialized to zero. See similar comment above.
 HIDDEN uint32_t OPENSSL_ia32cap_P[4] = {0};
 
 #elif defined(OPENSSL_PPC64LE)
diff --git a/crypto/fipsmodule/aes/asm/aesni-x86.pl b/crypto/fipsmodule/aes/asm/aesni-x86.pl
index aff2b40..fcb5b98 100644
--- a/crypto/fipsmodule/aes/asm/aesni-x86.pl
+++ b/crypto/fipsmodule/aes/asm/aesni-x86.pl
@@ -84,6 +84,9 @@
 &asm_init($ARGV[0]);
 
 &external_label("OPENSSL_ia32cap_P");
+&preprocessor_ifndef("NDEBUG")
+&external_label("BORINGSSL_function_hit");
+&preprocessor_endif();
 &static_label("key_const");
 
 if ($PREFIX eq $AESNI_PREFIX)	{ $movekey=\&movups; }
@@ -193,6 +196,8 @@
 # void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
 &aesni_generate1("enc") if (!$inline);
 &function_begin_B("${PREFIX}_encrypt");
+	&record_function_hit(1);
+
 	&mov	("eax",&wparam(0));
 	&mov	($key,&wparam(2));
 	&movups	($inout0,&QWP(0,"eax"));
@@ -875,6 +880,8 @@
 #	80	saved %esp
 
 &function_begin("${PREFIX}_ctr32_encrypt_blocks");
+	&record_function_hit(0);
+
 	&mov	($inp,&wparam(0));
 	&mov	($out,&wparam(1));
 	&mov	($len,&wparam(2));
@@ -2483,6 +2490,8 @@
 # int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits,
 #                              AES_KEY *key)
 &function_begin_B("${PREFIX}_set_encrypt_key");
+	&record_function_hit(3);
+
 	&mov	("eax",&wparam(0));
 	&mov	($rounds,&wparam(1));
 	&mov	($key,&wparam(2));
diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
index 14175ad..437fd3a 100644
--- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
@@ -275,6 +275,12 @@
 .align	16
 ${PREFIX}_encrypt:
 .cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+.extern	BORINGSSL_function_hit
+	movb \$1,BORINGSSL_function_hit+1(%rip)
+#endif
+#endif
 	movups	($inp),$inout0		# load input
 	mov	240($key),$rounds	# key->rounds
 ___
@@ -1199,6 +1205,11 @@
 .align	16
 ${PREFIX}_ctr32_encrypt_blocks:
 .cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+	movb \$1,BORINGSSL_function_hit(%rip)
+#endif
+#endif
 	cmp	\$1,$len
 	jne	.Lctr32_bulk
 
@@ -4252,7 +4263,7 @@
 .cfi_endproc
 .size	${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
 ___
-} 
+}
 # int ${PREFIX}_set_decrypt_key(const unsigned char *inp,
 #				int bits, AES_KEY *key)
 #
@@ -4343,6 +4354,11 @@
 ${PREFIX}_set_encrypt_key:
 __aesni_set_encrypt_key:
 .cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+	movb \$1,BORINGSSL_function_hit+3(%rip)
+#endif
+#endif
 	.byte	0x48,0x83,0xEC,0x08	# sub rsp,8
 .cfi_adjust_cfa_offset	8
 	mov	\$-1,%rax
diff --git a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
index f0031ef..81331bf 100644
--- a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
@@ -1920,6 +1920,12 @@
 .align	16
 bsaes_ctr32_encrypt_blocks:
 .cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+.extern	BORINGSSL_function_hit
+	movb \$1, BORINGSSL_function_hit+6(%rip)
+#endif
+#endif
 	mov	%rsp, %rax
 .Lctr_enc_prologue:
 	push	%rbp
diff --git a/crypto/fipsmodule/aes/asm/vpaes-x86.pl b/crypto/fipsmodule/aes/asm/vpaes-x86.pl
index 5f4b208..81e7e84 100644
--- a/crypto/fipsmodule/aes/asm/vpaes-x86.pl
+++ b/crypto/fipsmodule/aes/asm/vpaes-x86.pl
@@ -69,6 +69,9 @@
 my  ($round, $base, $magic, $key, $const, $inp, $out)=
     ("eax",  "ebx", "ecx",  "edx","ebp",  "esi","edi");
 
+&preprocessor_ifndef("NDEBUG")
+&external_label("BORINGSSL_function_hit");
+&preprocessor_endif();
 &static_label("_vpaes_consts");
 &static_label("_vpaes_schedule_low_round");
 
@@ -758,6 +761,8 @@
 # Interface to OpenSSL
 #
 &function_begin("${PREFIX}_set_encrypt_key");
+	record_function_hit(5);
+
 	&mov	($inp,&wparam(0));		# inp
 	&lea	($base,&DWP(-56,"esp"));
 	&mov	($round,&wparam(1));		# bits
@@ -812,6 +817,8 @@
 &function_end("${PREFIX}_set_decrypt_key");
 
 &function_begin("${PREFIX}_encrypt");
+	record_function_hit(4);
+
 	&lea	($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
 	&call	("_vpaes_preheat");
 &set_label("pic_point");
diff --git a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
index 3d0600f..b9edb79 100644
--- a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
@@ -664,6 +664,12 @@
 .align	32
 aesni_gcm_encrypt:
 .cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+.extern	BORINGSSL_function_hit
+	movb \$1,BORINGSSL_function_hit+2(%rip)
+#endif
+#endif
 	xor	$ret,$ret
 
 	# We call |_aesni_ctr32_6x| twice, each call consuming 96 bytes of
diff --git a/crypto/impl_dispatch_test.cc b/crypto/impl_dispatch_test.cc
new file mode 100644
index 0000000..efe12b4
--- /dev/null
+++ b/crypto/impl_dispatch_test.cc
@@ -0,0 +1,153 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/base.h>
+
+#if !defined(NDEBUG) && !defined(BORINGSSL_FIPS) && \
+    !defined(BORINGSSL_SHARED_LIBRARY)
+
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include <openssl/aead.h>
+#include <openssl/aes.h>
+#include <openssl/cpu.h>
+#include <openssl/mem.h>
+
+#include <gtest/gtest.h>
+
+#include "internal.h"
+
+
+class ImplDispatchTest : public ::testing::Test {
+ public:
+  void SetUp() override {
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+    aesni_ = OPENSSL_ia32cap_P[1] & (1 << (57 - 32));
+    avx_movbe_ = ((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41;
+    ssse3_ = OPENSSL_ia32cap_P[1] & (1 << (41 - 32));
+    is_x86_64_ =
+#if defined(OPENSSL_X86_64)
+        true;
+#else
+        false;
+#endif
+#endif  // X86 || X86_64
+  }
+
+ protected:
+  // AssertFunctionsHit takes a list of pairs (flag index, boolean), and a
+  // function to test. It runs the given function and asserts, for each flag
+  // index, that the boolean reflects whether that flag index was written or
+  // not, and that no other flagged functions were triggered.
+  void AssertFunctionsHit(std::vector<std::pair<size_t, bool>> flags,
+                          std::function<void()> f) {
+    OPENSSL_memset(BORINGSSL_function_hit, 0, sizeof(BORINGSSL_function_hit));
+
+    f();
+
+    for (const auto flag : flags) {
+      SCOPED_TRACE(flag.first);
+
+      ASSERT_LT(flag.first, sizeof(BORINGSSL_function_hit));
+      EXPECT_EQ(flag.second, BORINGSSL_function_hit[flag.first] == 1);
+      BORINGSSL_function_hit[flag.first] = 0;
+    }
+
+    for (size_t i = 0; i < sizeof(BORINGSSL_function_hit); i++) {
+      EXPECT_EQ(0u, BORINGSSL_function_hit[i])
+          << "Flag " << i << " unexpectedly hit";
+    }
+  }
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+  bool aesni_ = false;
+  bool avx_movbe_ = false;
+  bool ssse3_ = false;
+  bool is_x86_64_ = false;
+#endif
+};
+
+#if !defined(OPENSSL_NO_ASM) && \
+    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
+
+constexpr size_t kFlag_aes_hw_ctr32_encrypt_blocks = 0;
+constexpr size_t kFlag_aes_hw_encrypt = 1;
+constexpr size_t kFlag_aesni_gcm_encrypt = 2;
+constexpr size_t kFlag_aes_hw_set_encrypt_key = 3;
+constexpr size_t kFlag_vpaes_encrypt = 4;
+constexpr size_t kFlag_vpaes_set_encrypt_key = 5;
+constexpr size_t kFlag_bsaes_ctr32_encrypt_blocks = 6;
+
+TEST_F(ImplDispatchTest, AEAD_AES_GCM) {
+  AssertFunctionsHit(
+      {
+          {kFlag_aes_hw_ctr32_encrypt_blocks, aesni_},
+          {kFlag_aes_hw_encrypt, aesni_},
+          {kFlag_aes_hw_set_encrypt_key, aesni_},
+          {kFlag_aesni_gcm_encrypt, is_x86_64_ && aesni_ && avx_movbe_},
+          {kFlag_vpaes_encrypt, !is_x86_64_ && ssse3_ && !aesni_},
+          {kFlag_vpaes_set_encrypt_key, !is_x86_64_ && ssse3_ && !aesni_},
+          {kFlag_bsaes_ctr32_encrypt_blocks, is_x86_64_ && ssse3_ && !aesni_},
+      },
+      [] {
+        const uint8_t kZeros[16] = {0};
+        const uint8_t kPlaintext[40] = {1, 2, 3, 4, 0};
+        uint8_t ciphertext[sizeof(kPlaintext) + 16];
+        size_t ciphertext_len;
+        EVP_AEAD_CTX ctx;
+        ASSERT_TRUE(EVP_AEAD_CTX_init(&ctx, EVP_aead_aes_128_gcm(), kZeros,
+                                      sizeof(kZeros),
+                                      EVP_AEAD_DEFAULT_TAG_LENGTH, nullptr));
+        ASSERT_TRUE(EVP_AEAD_CTX_seal(
+            &ctx, ciphertext, &ciphertext_len, sizeof(ciphertext), kZeros,
+            EVP_AEAD_nonce_length(EVP_aead_aes_128_gcm()), kPlaintext,
+            sizeof(kPlaintext), nullptr, 0));
+      });
+}
+
+TEST_F(ImplDispatchTest, AES_set_encrypt_key) {
+  AssertFunctionsHit(
+      {
+          {kFlag_aes_hw_set_encrypt_key, aesni_},
+          // VPAES / BSAES will not be used for the |AES_*| functions.
+      },
+      [] {
+        AES_KEY key;
+        static const uint8_t kZeros[16] = {0};
+        AES_set_encrypt_key(kZeros, sizeof(kZeros) * 8, &key);
+      });
+}
+
+TEST_F(ImplDispatchTest, AES_single_block) {
+  AES_KEY key;
+  static const uint8_t kZeros[16] = {0};
+  AES_set_encrypt_key(kZeros, sizeof(kZeros) * 8, &key);
+
+  AssertFunctionsHit(
+      {
+          {kFlag_aes_hw_encrypt, aesni_},
+          // VPAES / BSAES will not be used for the |AES_*| functions.
+      },
+      [&key] {
+        uint8_t in[AES_BLOCK_SIZE] = {0};
+        uint8_t out[AES_BLOCK_SIZE];
+        AES_encrypt(in, out, &key);
+      });
+}
+
+#endif  // X86 || X86_64
+
+#endif  // !NDEBUG && !FIPS && !SHARED_LIBRARY
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl
index 213093d..2e65d1d 100755
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@@ -1181,7 +1181,17 @@
 
     $line =~ s|\R$||;           # Better chomp
 
-    $line =~ s|[#!].*$||;	# get rid of asm-style comments...
+    if ($nasm) {
+	$line =~ s|^#ifdef |%ifdef |;
+	$line =~ s|^#ifndef |%ifndef |;
+	$line =~ s|^#endif|%endif|;
+	$line =~ s|[#!].*$||;	# get rid of asm-style comments...
+    } else {
+	# Get rid of asm-style comments but not preprocessor directives. The
+	# latter are identified by not having a space after the '#'.
+	$line =~ s|[#!] .*$||;
+    }
+
     $line =~ s|/\*.*\*/||;	# ... and C-style comments...
     $line =~ s|^\s+||;		# ... and skip white spaces in beginning
     $line =~ s|\s+$||;		# ... and at the end
diff --git a/crypto/perlasm/x86asm.pl b/crypto/perlasm/x86asm.pl
index 3d2943b..3278188 100644
--- a/crypto/perlasm/x86asm.pl
+++ b/crypto/perlasm/x86asm.pl
@@ -33,6 +33,26 @@
     &generic($opcode,@_) or die "undefined subroutine \&$AUTOLOAD";
 }
 
+# record_function_hit(int) writes a byte with value one to the given offset of
+# |BORINGSSL_function_hit|, but only if NDEBUG is not defined. This is used in
+# impl_dispatch_test.cc to test whether the expected assembly functions are
+# triggered by high-level API calls.
+sub ::record_function_hit
+{ my($index)=@_;
+    &preprocessor_ifndef("NDEBUG");
+    &push("ebx");
+    &push("edx");
+    &call(&label("pic"));
+    &set_label("pic");
+    &blindpop("ebx");
+    &lea("ebx",&DWP("BORINGSSL_function_hit+$index"."-".&label("pic"),"ebx"));
+    &mov("edx", 1);
+    &movb(&BP(0, "ebx"), "dl");
+    &pop("edx");
+    &pop("ebx");
+    &preprocessor_endif();
+}
+
 sub ::emit
 { my $opcode=shift;
 
diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl
index 0c989a1..4e19a89 100644
--- a/crypto/perlasm/x86gas.pl
+++ b/crypto/perlasm/x86gas.pl
@@ -265,6 +265,14 @@
 sub ::dataseg
 {   push(@out,".data\n");   }
 
+sub ::preprocessor_ifndef
+{ my($define)=@_;
+    push(@out,"#ifndef ${define}\n");
+}
+
+sub ::preprocessor_endif
+{ push(@out,"#endif\n");    }
+
 *::hidden = sub { push(@out,".hidden\t$nmdecor$_[0]\n"); } if ($::elf);
 
 1;
diff --git a/crypto/perlasm/x86masm.pl b/crypto/perlasm/x86masm.pl
index dffee76..01bfbf3 100644
--- a/crypto/perlasm/x86masm.pl
+++ b/crypto/perlasm/x86masm.pl
@@ -203,4 +203,12 @@
     push(@out,"ENDIF\n");
 }
 
+sub ::preprocessor_ifndef
+{ my($define)=@_;
+    push(@out,"%ifndef ${define}\n");
+}
+
+sub ::preprocessor_endif
+{ push(@out,"%endif\n");    }
+
 1;
diff --git a/crypto/perlasm/x86nasm.pl b/crypto/perlasm/x86nasm.pl
index d3773b6..a3e0ab7 100644
--- a/crypto/perlasm/x86nasm.pl
+++ b/crypto/perlasm/x86nasm.pl
@@ -191,4 +191,12 @@
     push(@out,"%endif\n");
 }
 
+sub ::preprocessor_ifndef
+{ my($define)=@_;
+    push(@out,"%ifndef ${define}\n");
+}
+
+sub ::preprocessor_endif
+{ push(@out,"%endif\n");    }
+
 1;
diff --git a/include/openssl/cpu.h b/include/openssl/cpu.h
index edac98e..ad5fc94 100644
--- a/include/openssl/cpu.h
+++ b/include/openssl/cpu.h
@@ -190,6 +190,21 @@
 
 #endif  // OPENSSL_PPC64LE
 
+#if !defined(NDEBUG) && !defined(BORINGSSL_FIPS)
+// Runtime CPU dispatch testing support
+
+// BORINGSSL_function_hit is an array of flags. The following functions will
+// set these flags in non-FIPS builds if NDEBUG is not defined.
+//   0: aes_hw_ctr32_encrypt_blocks
+//   1: aes_hw_encrypt
+//   2: aesni_gcm_encrypt
+//   3: aes_hw_set_encrypt_key
+//   4: vpaes_encrypt
+//   5: vpaes_set_encrypt_key
+//   6: bsaes_ctr32_encrypt_blocks
+extern uint8_t BORINGSSL_function_hit[7];
+#endif  // !NDEBUG && !FIPS
+
 
 #if defined(__cplusplus)
 }  // extern C