Add ABI testing for 32-bit x86.

This is much less interesting (stack-based parameters, Windows and SysV
match, no SEH concerns as far as I can tell) than x86_64, but it was
easy to do and I'm more familiar with x86 than ARM, so it made a better
second architecture to make sure all the architecture ifdefs worked out.

Also fix a bug in the x86_64 direction flag code. It was shifting in the
wrong direction, making give 0 or 1<<20 rather than 0 or 1.

(Happily, x86_64 appears to be unique in having vastly different calling
conventions between OSs. x86 is the same between SysV and Windows, and
ARM had the good sense to specify a (mostly) common set of rules.)

Since a lot of the assembly functions use the same names and the tests
were written generically, merely dropping in a trampoline and
CallerState implementation gives us a bunch of ABI tests for free.

Change-Id: I15408c18d43e88cfa1c5c0634a8b268a150ed961
Reviewed-on: https://boringssl-review.googlesource.com/c/34624
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
index e940f7d..a3cdc08 100644
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt
@@ -121,6 +121,7 @@
     CRYPTO_ARCH_SOURCES
 
     chacha/chacha-x86.${ASM_EXT}
+    test/trampoline-x86.${ASM_EXT}
   )
 endif()
 
@@ -142,6 +143,7 @@
 perlasm(chacha/chacha-x86_64.${ASM_EXT} chacha/asm/chacha-x86_64.pl)
 perlasm(cipher_extra/aes128gcmsiv-x86_64.${ASM_EXT} cipher_extra/asm/aes128gcmsiv-x86_64.pl)
 perlasm(cipher_extra/chacha20_poly1305_x86_64.${ASM_EXT} cipher_extra/asm/chacha20_poly1305_x86_64.pl)
+perlasm(test/trampoline-x86.${ASM_EXT} test/asm/trampoline-x86.pl)
 perlasm(test/trampoline-x86_64.${ASM_EXT} test/asm/trampoline-x86_64.pl)
 
 add_custom_command(
diff --git a/crypto/abi_self_test.cc b/crypto/abi_self_test.cc
index 025a32d..5352b80 100644
--- a/crypto/abi_self_test.cc
+++ b/crypto/abi_self_test.cc
@@ -48,6 +48,7 @@
   CHECK_ABI(abi_test_trampoline, reinterpret_cast<crypto_word_t>(TestFunction),
             &state, argv, 10, 0 /* no breakpoint */);
 
+#if defined(OPENSSL_X86_64)
   if (abi_test::UnwindTestsEnabled()) {
     EXPECT_NONFATAL_FAILURE(CHECK_ABI(abi_test_bad_unwind_wrong_register),
                             "was not recovered unwinding");
@@ -57,6 +58,7 @@
     CHECK_ABI_NO_UNWIND(abi_test_bad_unwind_wrong_register);
     CHECK_ABI_NO_UNWIND(abi_test_bad_unwind_temporary);
   }
+#endif  // OPENSSL_X86_64
 #endif  // SUPPORTS_ABI_TEST
 }
 
@@ -206,3 +208,59 @@
 #endif  // OPENSSL_WINDOWS
 
 #endif   // OPENSSL_X86_64 && SUPPORTS_ABI_TEST
+
+#if defined(OPENSSL_X86) && defined(SUPPORTS_ABI_TEST)
+extern "C" {
+void abi_test_clobber_eax(void);
+void abi_test_clobber_ebx(void);
+void abi_test_clobber_ecx(void);
+void abi_test_clobber_edx(void);
+void abi_test_clobber_esi(void);
+void abi_test_clobber_edi(void);
+void abi_test_clobber_ebp(void);
+void abi_test_clobber_xmm0(void);
+void abi_test_clobber_xmm1(void);
+void abi_test_clobber_xmm2(void);
+void abi_test_clobber_xmm3(void);
+void abi_test_clobber_xmm4(void);
+void abi_test_clobber_xmm5(void);
+void abi_test_clobber_xmm6(void);
+void abi_test_clobber_xmm7(void);
+}  // extern "C"
+
+TEST(ABITest, X86) {
+  // abi_test_trampoline hides unsaved registers from the caller, so we can
+  // safely call the abi_test_clobber_* functions below.
+  abi_test::internal::CallerState state;
+  RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state));
+  CHECK_ABI_NO_UNWIND(abi_test_trampoline,
+                      reinterpret_cast<crypto_word_t>(abi_test_clobber_ebx),
+                      &state, nullptr, 0, 0 /* no breakpoint */);
+
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_eax);
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_ebx),
+                          "ebx was not restored after return");
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_ecx);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_edx);
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_edi),
+                          "edi was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_esi),
+                          "esi was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_ebp),
+                          "ebp was not restored after return");
+
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_xmm0);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_xmm1);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_xmm2);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_xmm3);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_xmm4);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_xmm5);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_xmm6);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_xmm7);
+
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_set_direction_flag),
+                          "Direction flag set after return");
+  EXPECT_EQ(0, abi_test_get_and_clear_direction_flag())
+      << "CHECK_ABI did not insulate the caller from direction flag errors";
+}
+#endif   // OPENSSL_X86 && SUPPORTS_ABI_TEST
diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc
index 54827ca..c1b2c9f 100644
--- a/crypto/fipsmodule/modes/gcm_test.cc
+++ b/crypto/fipsmodule/modes/gcm_test.cc
@@ -124,7 +124,7 @@
             CRYPTO_bswap8(UINT64_C(0x0102030405060708)));
 }
 
-#if defined(GHASH_ASM_X86_64) && defined(SUPPORTS_ABI_TEST)
+#if defined(SUPPORTS_ABI_TEST) && defined(GHASH_ASM)
 TEST(GCMTest, ABI) {
   static const uint64_t kH[2] = {
       UINT64_C(0x66e94bd4ef8a2c3b),
@@ -141,11 +141,19 @@
 
   alignas(16) u128 Htable[16];
   CHECK_ABI(gcm_init_4bit, Htable, kH);
+#if defined(GHASH_ASM_X86)
+  CHECK_ABI(gcm_gmult_4bit_mmx, X, Htable);
+  for (size_t blocks : kBlockCounts) {
+    CHECK_ABI(gcm_ghash_4bit_mmx, X, Htable, buf, 16 * blocks);
+  }
+#else
   CHECK_ABI(gcm_gmult_4bit, X, Htable);
   for (size_t blocks : kBlockCounts) {
     CHECK_ABI(gcm_ghash_4bit, X, Htable, buf, 16 * blocks);
   }
+#endif  // GHASH_ASM_X86
 
+#if defined(GHASH_ASM_X86_64)
   if (gcm_ssse3_capable()) {
     CHECK_ABI(gcm_init_ssse3, Htable, kH);
     CHECK_ABI(gcm_gmult_ssse3, X, Htable);
@@ -153,7 +161,9 @@
       CHECK_ABI(gcm_ghash_ssse3, X, Htable, buf, 16 * blocks);
     }
   }
+#endif  // GHASH_ASM_X86_64
 
+#if defined(GHASH_ASM_X86) || defined(GHASH_ASM_X86_64)
   if (crypto_gcm_clmul_enabled()) {
     CHECK_ABI(gcm_init_clmul, Htable, kH);
     CHECK_ABI(gcm_gmult_clmul, X, Htable);
@@ -161,6 +171,7 @@
       CHECK_ABI(gcm_ghash_clmul, X, Htable, buf, 16 * blocks);
     }
 
+#if defined(GHASH_ASM_X86_64)
     if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) {  // AVX+MOVBE
       CHECK_ABI(gcm_init_avx, Htable, kH);
       CHECK_ABI(gcm_gmult_avx, X, Htable);
@@ -168,10 +179,12 @@
         CHECK_ABI(gcm_ghash_avx, X, Htable, buf, 16 * blocks);
       }
     }
+#endif  // GHASH_ASM_X86_64
   }
+#endif  // GHASH_ASM_X86 || GHASH_ASM_X86_64
 }
 
-#if defined(OPENSSL_WINDOWS)
+#if defined(OPENSSL_WINDOWS) && defined(GHASH_ASM_X86_64)
 // Sanity-check the SEH unwind codes in ghash-ssse3-x86_64.pl.
 // TODO(davidben): Implement unwind testing for SEH and remove this.
 static void GCMSSSE3ExceptionTest() {
@@ -203,5 +216,5 @@
 TEST(GCMTest, SEH) {
   CHECK_ABI_NO_UNWIND(GCMSSSE3ExceptionTest);
 }
-#endif  // OPENSSL_WINDOWS
-#endif  // GHASH_ASM_X86_64 && SUPPORTS_ABI_TEST
+#endif  // OPENSSL_WINDOWS && GHASH_ASM_X86_64
+#endif  // SUPPORTS_ABI_TEST && GHASH_ASM
diff --git a/crypto/test/abi_test.h b/crypto/test/abi_test.h
index 370d455..4ff42d1 100644
--- a/crypto/test/abi_test.h
+++ b/crypto/test/abi_test.h
@@ -90,7 +90,15 @@
   CALLER_STATE_REGISTER(uint64_t, r14) \
   CALLER_STATE_REGISTER(uint64_t, r15)
 #endif  // OPENSSL_WINDOWS
-#endif  // X86_64 && SUPPORTS_ABI_TEST
+#elif defined(OPENSSL_X86)
+// See https://uclibc.org/docs/psABI-i386.pdf and
+// https://docs.microsoft.com/en-us/cpp/cpp/argument-passing-and-naming-conventions?view=vs-2017
+#define LOOP_CALLER_STATE_REGISTERS()  \
+  CALLER_STATE_REGISTER(uint32_t, esi) \
+  CALLER_STATE_REGISTER(uint32_t, edi) \
+  CALLER_STATE_REGISTER(uint32_t, ebx) \
+  CALLER_STATE_REGISTER(uint32_t, ebp)
+#endif  // X86_64 || X86
 
 // Enable ABI testing if all of the following are true.
 //
@@ -257,6 +265,7 @@
                                   const crypto_word_t *argv, size_t argc,
                                   crypto_word_t unwind);
 
+#if defined(OPENSSL_X86_64)
 // abi_test_unwind_start points at the instruction that starts unwind testing in
 // |abi_test_trampoline|. This is the value of the instruction pointer at the
 // first |SIGTRAP| during unwind testing.
@@ -285,6 +294,7 @@
 // abi_test_bad_unwind_temporary preserves the ABI, but temporarily corrupts the
 // storage space for a saved register, breaking unwind.
 void abi_test_bad_unwind_temporary(void);
+#endif  // OPENSSL_X86_64
 
 #if defined(OPENSSL_X86_64) || defined(OPENSSL_X86)
 // abi_test_get_and_clear_direction_flag clears the direction flag. If the flag
diff --git a/crypto/test/asm/trampoline-x86.pl b/crypto/test/asm/trampoline-x86.pl
new file mode 100755
index 0000000..569a3dd
--- /dev/null
+++ b/crypto/test/asm/trampoline-x86.pl
@@ -0,0 +1,123 @@
+#!/usr/bin/env perl
+# Copyright (c) 2018, Google Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# This file defines helper functions for crypto/test/abi_test.h on x86. See
+# that header for details on how to use this.
+#
+# For convenience, this file is linked into libcrypto, where consuming builds
+# already support architecture-specific sources. The static linker should drop
+# this code in non-test binaries. This includes a shared library build of
+# libcrypto, provided --gc-sections (ELF), -dead_strip (Mac), or equivalent is
+# used.
+#
+# References:
+#
+# SysV ABI: https://uclibc.org/docs/psABI-i386.pdf
+# Win32 ABI: https://docs.microsoft.com/en-us/cpp/cpp/argument-passing-and-naming-conventions?view=vs-2017
+
+use strict;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
+my $dir = $1;
+push(@INC, "${dir}", "${dir}../../perlasm");
+require "x86asm.pl";
+
+my $output = pop;
+open STDOUT, ">$output";
+
+&asm_init($ARGV[0]);
+
+# abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+# with |argv|, then saves the callee-saved registers into |state|. It returns
+# the result of |func|. |unwind| is ignored.
+# uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
+#                              const uint32_t *argv, size_t argc,
+#                              int unwind);
+&function_begin("abi_test_trampoline")
+	# Load registers from |state|. Note |function_begin| (as opposed to
+	# |function_begin_B|) automatically saves all callee-saved registers, so we
+	# may freely clobber them.
+	&mov("ecx", &wparam(1));
+	&mov("esi", &DWP(4*0, "ecx"));
+	&mov("edi", &DWP(4*1, "ecx"));
+	&mov("ebx", &DWP(4*2, "ecx"));
+	&mov("ebp", &DWP(4*3, "ecx"));
+
+	# Use a fixed stack allocation so |wparam| continues to work. abi_test.h
+	# supports at most 10 arguments. The SysV ABI requires a 16-byte-aligned
+	# stack on process entry, so round up to 3 (mod 4).
+	&stack_push(11);
+
+	# Copy parameters to stack.
+	&mov("eax", &wparam(2));
+	&xor("ecx", "ecx");
+&set_label("loop");
+	&cmp("ecx", &wparam(3));
+	&jae(&label("loop_done"));
+	&mov("edx", &DWP(0, "eax", "ecx", 4));
+	&mov(&DWP(0, "esp", "ecx", 4), "edx");
+	&add("ecx", 1);
+	&jmp(&label("loop"));
+
+&set_label("loop_done");
+	&call_ptr(&wparam(0));
+
+	&stack_pop(11);
+
+	# Save registers back into |state|.
+	&mov("ecx", &wparam(1));
+	&mov(&DWP(4*0, "ecx"), "esi");
+	&mov(&DWP(4*1, "ecx"), "edi");
+	&mov(&DWP(4*2, "ecx"), "ebx");
+	&mov(&DWP(4*3, "ecx"), "ebp");
+&function_end("abi_test_trampoline")
+
+# abi_test_get_and_clear_direction_flag clears the direction flag. If the flag
+# was previously set, it returns one. Otherwise, it returns zero.
+# int abi_test_get_and_clear_direction_flag(void);
+&function_begin_B("abi_test_get_and_clear_direction_flag");
+	&pushf();
+	&pop("eax");
+	&and("eax", 0x400);
+	&shr("eax", 10);
+	&cld();
+	&ret();
+&function_end_B("abi_test_get_and_clear_direction_flag");
+
+# abi_test_set_direction_flag sets the direction flag.
+# void abi_test_set_direction_flag(void);
+&function_begin_B("abi_test_set_direction_flag");
+	&std();
+	&ret();
+&function_end_B("abi_test_set_direction_flag");
+
+# abi_test_clobber_* zeros the corresponding register. These are used to test
+# the ABI-testing framework.
+foreach ("eax", "ebx", "ecx", "edx", "edi", "esi", "ebp") {
+&function_begin_B("abi_test_clobber_$_");
+	&xor($_, $_);
+	&ret();
+&function_end_B("abi_test_clobber_$_");
+}
+foreach (0..7) {
+&function_begin_B("abi_test_clobber_xmm$_");
+	&pxor("xmm$_", "xmm$_");
+	&ret();
+&function_end_B("abi_test_clobber_xmm$_");
+}
+
+&asm_finish();
+
+close STDOUT;
diff --git a/crypto/test/asm/trampoline-x86_64.pl b/crypto/test/asm/trampoline-x86_64.pl
index 4af9b2a..57d70b2 100755
--- a/crypto/test/asm/trampoline-x86_64.pl
+++ b/crypto/test/asm/trampoline-x86_64.pl
@@ -376,7 +376,7 @@
 	pushfq
 	popq	%rax
 	andq	\$0x400, %rax
-	shlq	\$10, %rax
+	shrq	\$10, %rax
 	cld
 	ret
 .size abi_test_get_and_clear_direction_flag,.-abi_test_get_and_clear_direction_flag