Be less clever with CHECK_ABI.

Unwind testing will make CHECK_ABI much slower. The original
ptrace-based design is some 10,000x slower. I've found an alternate
design that's a mere 1,000x slower, but this probably warrants being
more straightforward. It also removes the weirdness where NDEBUG
controlled which tests were run.

While it does mean we need to write some extra tests for p256-x86_64.pl,
we otherwise do not directly unit test our assembly anyway. Usually we
test the public crypto APIs themselves. So, for most files, this isn't
actually extra work.

Bug: 181
Change-Id: I7cbb7f930c2ea6ae32a201da503dcd36844704f0
Reviewed-on: https://boringssl-review.googlesource.com/c/33965
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/ec/p256-x86_64_test.cc b/crypto/fipsmodule/ec/p256-x86_64_test.cc
index 202ea7e..f699fc8 100644
--- a/crypto/fipsmodule/ec/p256-x86_64_test.cc
+++ b/crypto/fipsmodule/ec/p256-x86_64_test.cc
@@ -50,7 +50,7 @@
 
   for (int i = 0; i <= 16; i++) {
     P256_POINT val;
-    CHECK_ABI(ecp_nistz256_select_w5, &val, table, i);
+    ecp_nistz256_select_w5(&val, table, i);
 
     P256_POINT expected;
     if (i == 0) {
@@ -62,6 +62,11 @@
     EXPECT_EQ(Bytes(reinterpret_cast<const char *>(&expected), sizeof(expected)),
               Bytes(reinterpret_cast<const char *>(&val), sizeof(val)));
   }
+
+  // This is a constant-time function, so it is only necessary to instrument one
+  // index for ABI checking.
+  P256_POINT val;
+  CHECK_ABI(ecp_nistz256_select_w5, &val, table, 7);
 }
 
 TEST(P256_X86_64Test, SelectW7) {
@@ -74,7 +79,7 @@
 
   for (int i = 0; i <= 64; i++) {
     P256_POINT_AFFINE val;
-    CHECK_ABI(ecp_nistz256_select_w7, &val, table, i);
+    ecp_nistz256_select_w7(&val, table, i);
 
     P256_POINT_AFFINE expected;
     if (i == 0) {
@@ -86,6 +91,11 @@
     EXPECT_EQ(Bytes(reinterpret_cast<const char *>(&expected), sizeof(expected)),
               Bytes(reinterpret_cast<const char *>(&val), sizeof(val)));
   }
+
+  // This is a constant-time function, so it is only necessary to instrument one
+  // index for ABI checking.
+  P256_POINT_AFFINE val;
+  CHECK_ABI(ecp_nistz256_select_w7, &val, table, 42);
 }
 
 TEST(P256_X86_64Test, BEEU) {
@@ -107,12 +117,17 @@
   OPENSSL_memset(in, 0, sizeof(in));
 
   // Trying to find the inverse of zero should fail.
+  ASSERT_FALSE(beeu_mod_inverse_vartime(out, in, order_words));
+  // This is not a constant-time function, so instrument both zero and a few
+  // inputs below.
   ASSERT_FALSE(CHECK_ABI(beeu_mod_inverse_vartime, out, in, order_words));
 
   // kOneMont is 1, in Montgomery form.
   static const BN_ULONG kOneMont[P256_LIMBS] = {
-      TOBN(0xc46353d, 0x039cdaaf), TOBN(0x43190552, 0x58e8617b),
-      0, 0xffffffff,
+      TOBN(0xc46353d, 0x039cdaaf),
+      TOBN(0x43190552, 0x58e8617b),
+      0,
+      0xffffffff,
   };
 
   for (BN_ULONG i = 1; i < 2000; i++) {
@@ -128,7 +143,7 @@
     }
 
     EXPECT_TRUE(bn_less_than_words(in, order_words, P256_LIMBS));
-    ASSERT_TRUE(CHECK_ABI(beeu_mod_inverse_vartime, out, in, order_words));
+    ASSERT_TRUE(beeu_mod_inverse_vartime(out, in, order_words));
     EXPECT_TRUE(bn_less_than_words(out, order_words, P256_LIMBS));
 
     // Calculate out*in and confirm that it equals one, modulo the order.
@@ -141,8 +156,12 @@
     EXPECT_EQ(0, OPENSSL_memcmp(kOneMont, &result, sizeof(kOneMont)));
 
     // Invert the result and expect to get back to the original value.
-    ASSERT_TRUE(CHECK_ABI(beeu_mod_inverse_vartime, out, out, order_words));
+    ASSERT_TRUE(beeu_mod_inverse_vartime(out, out, order_words));
     EXPECT_EQ(0, OPENSSL_memcmp(in, out, sizeof(in)));
+
+    if (i < 5) {
+      EXPECT_TRUE(CHECK_ABI(beeu_mod_inverse_vartime, out, in, order_words));
+    }
   }
 }
 
@@ -286,19 +305,19 @@
 
   // Test that -A = B.
   BN_ULONG ret[P256_LIMBS];
-  CHECK_ABI(ecp_nistz256_neg, ret, a);
+  ecp_nistz256_neg(ret, a);
   EXPECT_FIELD_ELEMENTS_EQUAL(b, ret);
 
   OPENSSL_memcpy(ret, a, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_neg, ret, ret /* a */);
+  ecp_nistz256_neg(ret, ret /* a */);
   EXPECT_FIELD_ELEMENTS_EQUAL(b, ret);
 
   // Test that -B = A.
-  CHECK_ABI(ecp_nistz256_neg, ret, b);
+  ecp_nistz256_neg(ret, b);
   EXPECT_FIELD_ELEMENTS_EQUAL(a, ret);
 
   OPENSSL_memcpy(ret, b, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_neg, ret, ret /* b */);
+  ecp_nistz256_neg(ret, ret /* b */);
   EXPECT_FIELD_ELEMENTS_EQUAL(a, ret);
 }
 
@@ -309,34 +328,34 @@
   ASSERT_TRUE(GetFieldElement(t, result, "Result"));
 
   BN_ULONG ret[P256_LIMBS];
-  CHECK_ABI(ecp_nistz256_mul_mont, ret, a, b);
+  ecp_nistz256_mul_mont(ret, a, b);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
-  CHECK_ABI(ecp_nistz256_mul_mont, ret, b, a);
+  ecp_nistz256_mul_mont(ret, b, a);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
   OPENSSL_memcpy(ret, a, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_mul_mont, ret, ret /* a */, b);
+  ecp_nistz256_mul_mont(ret, ret /* a */, b);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
   OPENSSL_memcpy(ret, a, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_mul_mont, ret, b, ret);
+  ecp_nistz256_mul_mont(ret, b, ret);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
   OPENSSL_memcpy(ret, b, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_mul_mont, ret, a, ret /* b */);
+  ecp_nistz256_mul_mont(ret, a, ret /* b */);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
   OPENSSL_memcpy(ret, b, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_mul_mont, ret, ret /* b */, a);
+  ecp_nistz256_mul_mont(ret, ret /* b */, a);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
   if (OPENSSL_memcmp(a, b, sizeof(a)) == 0) {
-    CHECK_ABI(ecp_nistz256_sqr_mont, ret, a);
+    ecp_nistz256_sqr_mont(ret, a);
     EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
     OPENSSL_memcpy(ret, a, sizeof(ret));
-    CHECK_ABI(ecp_nistz256_sqr_mont, ret, ret /* a */);
+    ecp_nistz256_sqr_mont(ret, ret /* a */);
     EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
   }
 }
@@ -347,11 +366,11 @@
   ASSERT_TRUE(GetFieldElement(t, result, "Result"));
 
   BN_ULONG ret[P256_LIMBS];
-  CHECK_ABI(ecp_nistz256_from_mont, ret, a);
+  ecp_nistz256_from_mont(ret, a);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
   OPENSSL_memcpy(ret, a, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_from_mont, ret, ret /* a */);
+  ecp_nistz256_from_mont(ret, ret /* a */);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 }
 
@@ -368,26 +387,26 @@
   ASSERT_TRUE(GetFieldElement(t, result.Y, "Result.Y"));
 
   P256_POINT ret;
-  CHECK_ABI(ecp_nistz256_point_add, &ret, &a, &b);
+  ecp_nistz256_point_add(&ret, &a, &b);
   EXPECT_POINTS_EQUAL(&result, &ret);
 
-  CHECK_ABI(ecp_nistz256_point_add, &ret, &b, &a);
+  ecp_nistz256_point_add(&ret, &b, &a);
   EXPECT_POINTS_EQUAL(&result, &ret);
 
   OPENSSL_memcpy(&ret, &a, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_point_add, &ret, &ret /* a */, &b);
+  ecp_nistz256_point_add(&ret, &ret /* a */, &b);
   EXPECT_POINTS_EQUAL(&result, &ret);
 
   OPENSSL_memcpy(&ret, &a, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_point_add, &ret, &b, &ret /* a */);
+  ecp_nistz256_point_add(&ret, &b, &ret /* a */);
   EXPECT_POINTS_EQUAL(&result, &ret);
 
   OPENSSL_memcpy(&ret, &b, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_point_add, &ret, &a, &ret /* b */);
+  ecp_nistz256_point_add(&ret, &a, &ret /* b */);
   EXPECT_POINTS_EQUAL(&result, &ret);
 
   OPENSSL_memcpy(&ret, &b, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_point_add, &ret, &ret /* b */, &a);
+  ecp_nistz256_point_add(&ret, &ret /* b */, &a);
   EXPECT_POINTS_EQUAL(&result, &ret);
 
   P256_POINT_AFFINE a_affine, b_affine, infinity;
@@ -399,27 +418,27 @@
   // point at infinity.
   if (OPENSSL_memcmp(&a_affine, &b_affine, sizeof(a_affine)) != 0 ||
       OPENSSL_memcmp(&a_affine, &infinity, sizeof(a_affine)) == 0) {
-    CHECK_ABI(ecp_nistz256_point_add_affine, &ret, &a, &b_affine);
+    ecp_nistz256_point_add_affine(&ret, &a, &b_affine);
     EXPECT_POINTS_EQUAL(&result, &ret);
 
     OPENSSL_memcpy(&ret, &a, sizeof(ret));
-    CHECK_ABI(ecp_nistz256_point_add_affine, &ret, &ret /* a */, &b_affine);
+    ecp_nistz256_point_add_affine(&ret, &ret /* a */, &b_affine);
     EXPECT_POINTS_EQUAL(&result, &ret);
 
-    CHECK_ABI(ecp_nistz256_point_add_affine, &ret, &b, &a_affine);
+    ecp_nistz256_point_add_affine(&ret, &b, &a_affine);
     EXPECT_POINTS_EQUAL(&result, &ret);
 
     OPENSSL_memcpy(&ret, &b, sizeof(ret));
-    CHECK_ABI(ecp_nistz256_point_add_affine, &ret, &ret /* b */, &a_affine);
+    ecp_nistz256_point_add_affine(&ret, &ret /* b */, &a_affine);
     EXPECT_POINTS_EQUAL(&result, &ret);
   }
 
   if (OPENSSL_memcmp(&a, &b, sizeof(a)) == 0) {
-    CHECK_ABI(ecp_nistz256_point_double, &ret, &a);
+    ecp_nistz256_point_double(&ret, &a);
     EXPECT_POINTS_EQUAL(&result, &ret);
 
     ret = a;
-    CHECK_ABI(ecp_nistz256_point_double, &ret, &ret /* a */);
+    ecp_nistz256_point_double(&ret, &ret /* a */);
     EXPECT_POINTS_EQUAL(&result, &ret);
   }
 }
@@ -433,34 +452,34 @@
   ASSERT_TRUE(GetFieldElement(t, result, "Result"));
 
   BN_ULONG ret[P256_LIMBS];
-  CHECK_ABI(ecp_nistz256_ord_mul_mont, ret, a, b);
+  ecp_nistz256_ord_mul_mont(ret, a, b);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
-  CHECK_ABI(ecp_nistz256_ord_mul_mont, ret, b, a);
+  ecp_nistz256_ord_mul_mont(ret, b, a);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
   OPENSSL_memcpy(ret, a, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_ord_mul_mont, ret, ret /* a */, b);
+  ecp_nistz256_ord_mul_mont(ret, ret /* a */, b);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
   OPENSSL_memcpy(ret, a, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_ord_mul_mont, ret, b, ret);
+  ecp_nistz256_ord_mul_mont(ret, b, ret);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
   OPENSSL_memcpy(ret, b, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_ord_mul_mont, ret, a, ret /* b */);
+  ecp_nistz256_ord_mul_mont(ret, a, ret /* b */);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
   OPENSSL_memcpy(ret, b, sizeof(ret));
-  CHECK_ABI(ecp_nistz256_ord_mul_mont, ret, ret /* b */, a);
+  ecp_nistz256_ord_mul_mont(ret, ret /* b */, a);
   EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
   if (OPENSSL_memcmp(a, b, sizeof(a)) == 0) {
-    CHECK_ABI(ecp_nistz256_ord_sqr_mont, ret, a, 1);
+    ecp_nistz256_ord_sqr_mont(ret, a, 1);
     EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
 
     OPENSSL_memcpy(ret, a, sizeof(ret));
-    CHECK_ABI(ecp_nistz256_ord_sqr_mont, ret, ret /* a */, 1);
+    ecp_nistz256_ord_sqr_mont(ret, ret /* a */, 1);
     EXPECT_FIELD_ELEMENTS_EQUAL(result, ret);
   }
 }
@@ -484,4 +503,75 @@
   });
 }
 
+// Instrument the functions covered in TestVectors for ABI checking.
+TEST(P256_X86_64Test, ABI) {
+  BN_ULONG a[P256_LIMBS], b[P256_LIMBS], c[P256_LIMBS];
+  OPENSSL_memset(a, 0x01, sizeof(a));
+  // These functions are all constant-time, so it is only necessary to
+  // instrument one call each for ABI checking.
+  CHECK_ABI(ecp_nistz256_neg, b, a);
+  CHECK_ABI(ecp_nistz256_mul_mont, c, a, b);
+  CHECK_ABI(ecp_nistz256_sqr_mont, c, a);
+  CHECK_ABI(ecp_nistz256_from_mont, c, a);
+  CHECK_ABI(ecp_nistz256_ord_mul_mont, c, a, b);
+
+  // Check a few different loop counts.
+  CHECK_ABI(ecp_nistz256_ord_sqr_mont, b, a, 1);
+  CHECK_ABI(ecp_nistz256_ord_sqr_mont, b, a, 3);
+
+  // Point addition has some special cases around infinity and doubling. Test a
+  // few different scenarios.
+  static const P256_POINT kA = {
+      {TOBN(0x60559ac7, 0xc8d0d89d), TOBN(0x6cda3400, 0x545f7e2c),
+       TOBN(0x9b5159e0, 0x323e6048), TOBN(0xcb8dea33, 0x27057fe6)},
+      {TOBN(0x81a2d3bc, 0xc93a2d53), TOBN(0x81f40762, 0xa4f33ccf),
+       TOBN(0xc3c3300a, 0xa8ad50ea), TOBN(0x553de89b, 0x31719830)},
+      {TOBN(0x3fd9470f, 0xb277d181), TOBN(0xc191b8d5, 0x6376f206),
+       TOBN(0xb2572c1f, 0x45eda26f), TOBN(0x4589e40d, 0xf2efc546)},
+  };
+  static const P256_POINT kB = {
+      {TOBN(0x3cf0b0aa, 0x92054341), TOBN(0xb949bb80, 0xdab57807),
+       TOBN(0x99de6814, 0xefd21b3e), TOBN(0x32ad5649, 0x7c6c6e83)},
+      {TOBN(0x06afaa02, 0x688399e0), TOBN(0x75f2d096, 0x2a3ce65c),
+       TOBN(0xf6a31eb7, 0xca0244b3), TOBN(0x57b33b7a, 0xcfeee75e)},
+      {TOBN(0x7617d2e0, 0xb4f1d35f), TOBN(0xa922cb10, 0x7f592b65),
+       TOBN(0x12fd6c7a, 0x51a2f474), TOBN(0x337d5e1e, 0xc2fc711b)},
+  };
+  // This file represents Jacobian infinity as (*, *, 0).
+  static const P256_POINT kInfinity = {
+      {TOBN(0, 0), TOBN(0, 0), TOBN(0, 0), TOBN(0, 0)},
+      {TOBN(0, 0), TOBN(0, 0), TOBN(0, 0), TOBN(0, 0)},
+      {TOBN(0, 0), TOBN(0, 0), TOBN(0, 0), TOBN(0, 0)},
+  };
+
+  P256_POINT p;
+  CHECK_ABI(ecp_nistz256_point_add, &p, &kA, &kB);
+  CHECK_ABI(ecp_nistz256_point_add, &p, &kA, &kA);
+  OPENSSL_memcpy(&p, &kA, sizeof(P256_POINT));
+  ecp_nistz256_neg(p.Y, p.Y);
+  CHECK_ABI(ecp_nistz256_point_add, &p, &kA, &p);  // A + -A
+  CHECK_ABI(ecp_nistz256_point_add, &p, &kA, &kInfinity);
+  CHECK_ABI(ecp_nistz256_point_add, &p, &kInfinity, &kA);
+  CHECK_ABI(ecp_nistz256_point_add, &p, &kInfinity, &kInfinity);
+  CHECK_ABI(ecp_nistz256_point_double, &p, &kA);
+  CHECK_ABI(ecp_nistz256_point_double, &p, &kInfinity);
+
+  static const P256_POINT_AFFINE kC = {
+      {TOBN(0x7e3ad339, 0xfb3fa5f0), TOBN(0x559d669d, 0xe3a047b2),
+       TOBN(0x8883b298, 0x7042e595), TOBN(0xfabada65, 0x7e477f08)},
+      {TOBN(0xd9cfceb8, 0xda1c3e85), TOBN(0x80863761, 0x0ce6d6bc),
+       TOBN(0xa8409d84, 0x66034f02), TOBN(0x05519925, 0x31a68d55)},
+  };
+  // This file represents affine infinity as (0, 0).
+  static const P256_POINT_AFFINE kInfinityAffine = {
+    {TOBN(0, 0), TOBN(0, 0), TOBN(0, 0), TOBN(0, 0)},
+    {TOBN(0, 0), TOBN(0, 0), TOBN(0, 0), TOBN(0, 0)},
+  };
+
+  CHECK_ABI(ecp_nistz256_point_add_affine, &p, &kA, &kC);
+  CHECK_ABI(ecp_nistz256_point_add_affine, &p, &kA, &kInfinityAffine);
+  CHECK_ABI(ecp_nistz256_point_add_affine, &p, &kInfinity, &kInfinityAffine);
+  CHECK_ABI(ecp_nistz256_point_add_affine, &p, &kInfinity, &kC);
+}
+
 #endif
diff --git a/crypto/test/abi_test.h b/crypto/test/abi_test.h
index ab9a729..c1ef8f1 100644
--- a/crypto/test/abi_test.h
+++ b/crypto/test/abi_test.h
@@ -63,7 +63,7 @@
 // See https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017#register-usage
 #define LOOP_CALLER_STATE_REGISTERS()  \
   CALLER_STATE_REGISTER(uint64_t, rbx) \
-  CALLER_STATE_REGISTER(uint64_t, rdp) \
+  CALLER_STATE_REGISTER(uint64_t, rbp) \
   CALLER_STATE_REGISTER(uint64_t, rdi) \
   CALLER_STATE_REGISTER(uint64_t, rsi) \
   CALLER_STATE_REGISTER(uint64_t, r12) \
@@ -100,13 +100,8 @@
 //
 // - This is not a shared library build. Assembly functions are not reachable
 //   from tests in shared library builds.
-//
-// - This is a debug build. We can instrument release builds as well, but this
-//   ensures we have coverage for both instrumented and uninstrumented code.
-//   See the comment in |CHECK_ABI|. Note ABI testing is only meaningful for
-//   assembly, which is not affected by compiler optimizations.
 #if defined(LOOP_CALLER_STATE_REGISTERS) && !defined(OPENSSL_NO_ASM) && \
-    !defined(BORINGSSL_SHARED_LIBRARY) && !defined(NDEBUG)
+    !defined(BORINGSSL_SHARED_LIBRARY)
 #define SUPPORTS_ABI_TEST
 
 // CallerState contains all caller state that the callee is expected to
@@ -210,9 +205,13 @@
 // non-fatal GTest failure if the call did not satisfy ABI requirements.
 //
 // |CHECK_ABI| does return the value and thus may replace any function call,
-// provided it takes only simple parameters. It is recommended to integrate it
-// into functional tests of assembly. To ensure coverage of both instrumented
-// and uninstrumented calls, ABI testing is disabled in release-mode tests.
+// provided it takes only simple parameters. However, it is recommended to test
+// ABI separately from functional tests of assembly. A future unwind testing
+// extension will single-step the function, which is inefficient.
+//
+// Functional testing requires coverage of input values, while ABI testing only
+// requires branch coverage. Most of our assembly is constant-time, so usually
+// only a few instrumented calls are necessray.
 #define CHECK_ABI(...) \
   abi_test::internal::CheckGTest(#__VA_ARGS__, __FILE__, __LINE__, __VA_ARGS__)