Implement ABI testing for aarch64.
This caught a bug in bn_mul_mont. Tested manually on iOS and Android.
Change-Id: I1819fcd9ad34dbe3ba92bba952507d86dd12185a
Reviewed-on: https://boringssl-review.googlesource.com/c/34805
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
index 2b9479b..5cdfa40 100644
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt
@@ -114,6 +114,7 @@
CRYPTO_ARCH_SOURCES
chacha/chacha-armv8.${ASM_EXT}
+ test/trampoline-armv8.${ASM_EXT}
)
endif()
@@ -145,6 +146,7 @@
perlasm(cipher_extra/aes128gcmsiv-x86_64.${ASM_EXT} cipher_extra/asm/aes128gcmsiv-x86_64.pl)
perlasm(cipher_extra/chacha20_poly1305_x86_64.${ASM_EXT} cipher_extra/asm/chacha20_poly1305_x86_64.pl)
perlasm(test/trampoline-armv4.${ASM_EXT} test/asm/trampoline-armv4.pl)
+perlasm(test/trampoline-armv8.${ASM_EXT} test/asm/trampoline-armv8.pl)
perlasm(test/trampoline-x86.${ASM_EXT} test/asm/trampoline-x86.pl)
perlasm(test/trampoline-x86_64.${ASM_EXT} test/asm/trampoline-x86_64.pl)
diff --git a/crypto/abi_self_test.cc b/crypto/abi_self_test.cc
index d47f37c..0ea7b32 100644
--- a/crypto/abi_self_test.cc
+++ b/crypto/abi_self_test.cc
@@ -22,9 +22,9 @@
static bool test_function_ok;
static int TestFunction(int a1, int a2, int a3, int a4, int a5, int a6, int a7,
- int a8, int a9, int a10) {
+ int a8) {
test_function_ok = a1 == 1 || a2 == 2 || a3 == 3 || a4 == 4 || a5 == 5 ||
- a6 == 6 || a7 == 7 || a8 == 8 || a9 == 9 || a10 == 10;
+ a6 == 6 || a7 == 7 || a8 == 8;
return 42;
}
@@ -32,17 +32,17 @@
EXPECT_NE(0, CHECK_ABI_NO_UNWIND(strcmp, "hello", "world"));
test_function_ok = false;
- EXPECT_EQ(42, CHECK_ABI_SEH(TestFunction, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
+ EXPECT_EQ(42, CHECK_ABI_SEH(TestFunction, 1, 2, 3, 4, 5, 6, 7, 8));
EXPECT_TRUE(test_function_ok);
#if defined(SUPPORTS_ABI_TEST)
abi_test::internal::CallerState state;
RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state));
crypto_word_t argv[] = {
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 1, 2, 3, 4, 5, 6, 7, 8,
};
CHECK_ABI_SEH(abi_test_trampoline,
- reinterpret_cast<crypto_word_t>(TestFunction), &state, argv, 10,
+ reinterpret_cast<crypto_word_t>(TestFunction), &state, argv, 8,
0 /* no breakpoint */);
#if defined(OPENSSL_X86_64)
@@ -340,3 +340,184 @@
"d15 was not restored after return");
}
#endif // OPENSSL_ARM && SUPPORTS_ABI_TEST
+
+
+#if defined(OPENSSL_AARCH64) && defined(SUPPORTS_ABI_TEST)
+extern "C" {
+void abi_test_clobber_x0(void);
+void abi_test_clobber_x1(void);
+void abi_test_clobber_x2(void);
+void abi_test_clobber_x3(void);
+void abi_test_clobber_x4(void);
+void abi_test_clobber_x5(void);
+void abi_test_clobber_x6(void);
+void abi_test_clobber_x7(void);
+void abi_test_clobber_x8(void);
+void abi_test_clobber_x9(void);
+void abi_test_clobber_x10(void);
+void abi_test_clobber_x11(void);
+void abi_test_clobber_x12(void);
+void abi_test_clobber_x13(void);
+void abi_test_clobber_x14(void);
+void abi_test_clobber_x15(void);
+void abi_test_clobber_x16(void);
+void abi_test_clobber_x17(void);
+// x18 is the platform register and off limits.
+void abi_test_clobber_x19(void);
+void abi_test_clobber_x20(void);
+void abi_test_clobber_x21(void);
+void abi_test_clobber_x22(void);
+void abi_test_clobber_x23(void);
+void abi_test_clobber_x24(void);
+void abi_test_clobber_x25(void);
+void abi_test_clobber_x26(void);
+void abi_test_clobber_x27(void);
+void abi_test_clobber_x28(void);
+void abi_test_clobber_x29(void);
+
+void abi_test_clobber_d0(void);
+void abi_test_clobber_d1(void);
+void abi_test_clobber_d2(void);
+void abi_test_clobber_d3(void);
+void abi_test_clobber_d4(void);
+void abi_test_clobber_d5(void);
+void abi_test_clobber_d6(void);
+void abi_test_clobber_d7(void);
+void abi_test_clobber_d8(void);
+void abi_test_clobber_d9(void);
+void abi_test_clobber_d10(void);
+void abi_test_clobber_d11(void);
+void abi_test_clobber_d12(void);
+void abi_test_clobber_d13(void);
+void abi_test_clobber_d14(void);
+void abi_test_clobber_d15(void);
+void abi_test_clobber_d16(void);
+void abi_test_clobber_d17(void);
+void abi_test_clobber_d18(void);
+void abi_test_clobber_d19(void);
+void abi_test_clobber_d20(void);
+void abi_test_clobber_d21(void);
+void abi_test_clobber_d22(void);
+void abi_test_clobber_d23(void);
+void abi_test_clobber_d24(void);
+void abi_test_clobber_d25(void);
+void abi_test_clobber_d26(void);
+void abi_test_clobber_d27(void);
+void abi_test_clobber_d28(void);
+void abi_test_clobber_d29(void);
+void abi_test_clobber_d30(void);
+void abi_test_clobber_d31(void);
+
+void abi_test_clobber_v8_upper(void);
+void abi_test_clobber_v9_upper(void);
+void abi_test_clobber_v10_upper(void);
+void abi_test_clobber_v11_upper(void);
+void abi_test_clobber_v12_upper(void);
+void abi_test_clobber_v13_upper(void);
+void abi_test_clobber_v14_upper(void);
+void abi_test_clobber_v15_upper(void);
+} // extern "C"
+
+TEST(ABITest, AArch64) {
+ // abi_test_trampoline hides unsaved registers from the caller, so we can
+ // safely call the abi_test_clobber_* functions below.
+ abi_test::internal::CallerState state;
+ RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state));
+ CHECK_ABI_NO_UNWIND(abi_test_trampoline,
+ reinterpret_cast<crypto_word_t>(abi_test_clobber_x19),
+ &state, nullptr, 0, 0 /* no breakpoint */);
+
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x0);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x1);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x2);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x3);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x4);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x5);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x6);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x7);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x8);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x9);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x10);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x11);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x12);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x13);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x14);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x15);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x16);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_x17);
+
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_x19),
+ "x19 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_x20),
+ "x20 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_x21),
+ "x21 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_x22),
+ "x22 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_x23),
+ "x23 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_x24),
+ "x24 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_x25),
+ "x25 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_x26),
+ "x26 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_x27),
+ "x27 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_x28),
+ "x28 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_x29),
+ "x29 was not restored after return");
+
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d0);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d1);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d2);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d3);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d4);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d5);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d6);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d7);
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d8),
+ "d8 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d9),
+ "d9 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d10),
+ "d10 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d11),
+ "d11 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d12),
+ "d12 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d13),
+ "d13 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d14),
+ "d14 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d15),
+ "d15 was not restored after return");
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d16);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d18);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d19);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d20);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d21);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d22);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d23);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d24);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d25);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d26);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d27);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d28);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d29);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d30);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_d31);
+
+ // The lower halves of v8-v15 (accessed as d8-d15) must be preserved, but not
+ // the upper halves.
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v8_upper);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v9_upper);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v10_upper);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v11_upper);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v12_upper);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v13_upper);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v14_upper);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v15_upper);
+}
+#endif // OPENSSL_AARCH64 && SUPPORTS_ABI_TEST
diff --git a/crypto/test/abi_test.h b/crypto/test/abi_test.h
index e04b26c..0d685ed 100644
--- a/crypto/test/abi_test.h
+++ b/crypto/test/abi_test.h
@@ -55,12 +55,15 @@
};
// LOOP_CALLER_STATE_REGISTERS is a macro that iterates over all registers the
-// callee is expected to save for the caller.
-//
-// TODO(davidben): Add support for other architectures.
+// callee is expected to save for the caller, with the exception of the stack
+// pointer. The stack pointer is tested implicitly by the function successfully
+// returning at all.
#if defined(OPENSSL_X86_64)
+
+// References:
+// SysV64: https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
+// Win64: https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017#register-usage
#if defined(OPENSSL_WINDOWS)
-// See https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017#register-usage
#define LOOP_CALLER_STATE_REGISTERS() \
CALLER_STATE_REGISTER(uint64_t, rbx) \
CALLER_STATE_REGISTER(uint64_t, rbp) \
@@ -81,7 +84,6 @@
CALLER_STATE_REGISTER(Reg128, xmm14) \
CALLER_STATE_REGISTER(Reg128, xmm15)
#else
-// See https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
#define LOOP_CALLER_STATE_REGISTERS() \
CALLER_STATE_REGISTER(uint64_t, rbx) \
CALLER_STATE_REGISTER(uint64_t, rbp) \
@@ -90,19 +92,31 @@
CALLER_STATE_REGISTER(uint64_t, r14) \
CALLER_STATE_REGISTER(uint64_t, r15)
#endif // OPENSSL_WINDOWS
+
#elif defined(OPENSSL_X86)
-// See https://uclibc.org/docs/psABI-i386.pdf and
-// https://docs.microsoft.com/en-us/cpp/cpp/argument-passing-and-naming-conventions?view=vs-2017
+
+// References:
+// SysV32: https://uclibc.org/docs/psABI-i386.pdf and
+// Win32: https://docs.microsoft.com/en-us/cpp/cpp/argument-passing-and-naming-conventions?view=vs-2017
#define LOOP_CALLER_STATE_REGISTERS() \
CALLER_STATE_REGISTER(uint32_t, esi) \
CALLER_STATE_REGISTER(uint32_t, edi) \
CALLER_STATE_REGISTER(uint32_t, ebx) \
CALLER_STATE_REGISTER(uint32_t, ebp)
+
#elif defined(OPENSSL_ARM)
-// Unlike x86, ARM has a common ABI across all platforms, described in
-// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042f/IHI0042F_aapcs.pdf
-// It almost specifies the callee-saved registers, except r9 is left to the
-// platform. Android and iOS differ in handling of r9.
+
+// References:
+// AAPCS: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042f/IHI0042F_aapcs.pdf
+// iOS32: https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv6FunctionCallingConventions.html
+//
+// ARM specifies a common calling convention, except r9 is left to the
+// platform. Linux and iOS differ in handling of r9. iOS's behavior is defined
+// below. We found no clear reference for Linux but observed behavior from
+// LLVM. iOS 3+ treats r9 as caller-saved, while Linux treats it as
+// callee-saved. Most of our assembly treats it as callee-saved to be uniform,
+// but we match the platform to avoid false positives when testing
+// compiler-generated output.
#define LOOP_CALLER_STATE_REGISTERS_PRE_R9() \
CALLER_STATE_REGISTER(uint64_t, d8) \
CALLER_STATE_REGISTER(uint64_t, d9) \
@@ -121,24 +135,53 @@
CALLER_STATE_REGISTER(uint32_t, r10) \
CALLER_STATE_REGISTER(uint32_t, r11)
#if defined(OPENSSL_APPLE)
-// Starting iOS 3, r9 is treated as a caller-saved register. Before that, it
-// could not be used at all. Most of our assembly treats it as callee-saved
-// anyway to be uniform, but we match the platform to avoid false positives when
-// testing compiler-generated output.
-//
-// https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv6FunctionCallingConventions.html
-#define LOOP_CALLER_STATE_REGISTERS() \
+#define LOOP_CALLER_STATE_REGISTERS() \
LOOP_CALLER_STATE_REGISTERS_PRE_R9() \
LOOP_CALLER_STATE_REGISTERS_POST_R9()
-#else
-// We found no clear reference which defines Linux's use of r9, but LLVM treats
-// r9 as callee-saved on non-Apple ARM platforms.
-#define LOOP_CALLER_STATE_REGISTERS() \
+#else // !OPENSSL_APPLE
+#define LOOP_CALLER_STATE_REGISTERS() \
LOOP_CALLER_STATE_REGISTERS_PRE_R9() \
- CALLER_STATE_REGISTER(uint32_t, r9) \
+ CALLER_STATE_REGISTER(uint32_t, r9) \
LOOP_CALLER_STATE_REGISTERS_POST_R9()
#endif // OPENSSL_APPLE
-#endif // X86_64 || X86 || ARM
+
+#elif defined(OPENSSL_AARCH64)
+
+// References:
+// AAPCS64: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
+// iOS64: https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html
+//
+// In aarch64, r19 (x19 in a 64-bit context) is the platform register. iOS says
+// user code may not touch it. We found no clear reference for Linux. The iOS
+// behavior implies portable assembly cannot use it, and aarch64 has many
+// registers. Thus this framework ignores register's existence. We can test r19
+// violations with grep.
+#define LOOP_CALLER_STATE_REGISTERS() \
+ /* Per AAPCS64, section 5.1.2, only the bottom 64 bits of v8-v15 */ \
+ /* are preserved. These are accessed as dN. */ \
+ CALLER_STATE_REGISTER(uint64_t, d8) \
+ CALLER_STATE_REGISTER(uint64_t, d9) \
+ CALLER_STATE_REGISTER(uint64_t, d10) \
+ CALLER_STATE_REGISTER(uint64_t, d11) \
+ CALLER_STATE_REGISTER(uint64_t, d12) \
+ CALLER_STATE_REGISTER(uint64_t, d13) \
+ CALLER_STATE_REGISTER(uint64_t, d14) \
+ CALLER_STATE_REGISTER(uint64_t, d15) \
+ /* For consistency with dN, use the 64-bit name xN, rather than */ \
+ /* the generic rN. */ \
+ CALLER_STATE_REGISTER(uint64_t, x19) \
+ CALLER_STATE_REGISTER(uint64_t, x20) \
+ CALLER_STATE_REGISTER(uint64_t, x21) \
+ CALLER_STATE_REGISTER(uint64_t, x22) \
+ CALLER_STATE_REGISTER(uint64_t, x23) \
+ CALLER_STATE_REGISTER(uint64_t, x24) \
+ CALLER_STATE_REGISTER(uint64_t, x25) \
+ CALLER_STATE_REGISTER(uint64_t, x26) \
+ CALLER_STATE_REGISTER(uint64_t, x27) \
+ CALLER_STATE_REGISTER(uint64_t, x28) \
+ CALLER_STATE_REGISTER(uint64_t, x29)
+
+#endif // X86_64 || X86 || ARM || AARCH64
// Enable ABI testing if all of the following are true.
//
@@ -169,16 +212,28 @@
template <typename T>
inline crypto_word_t ToWord(T t) {
+#if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64) && \
+ !defined(OPENSSL_ARM) && !defined(OPENSSL_AARCH64)
+#error "Unknown architecture"
+#endif
static_assert(sizeof(T) <= sizeof(crypto_word_t),
"T is larger than crypto_word_t");
- // Functions declared to take arguments smaller than native words cannot
- // assume anything about the unused bits.
+ static_assert(sizeof(T) >= 4, "types under four bytes are complicated");
+
+ // ABIs are complex around arguments that are smaller than native words. For
+ // 32-bit architectures, the rules above imply we only have word-sized
+ // arguments. For 64-bit architectures, we still have assembly functions which
+ // take |int|.
//
- // TODO(davidben): Find authoritative citations for all supported assembly
- // architectures. This is based on observed behavior in Clang, GCC, and MSVC
- // for x86_64. The results are complex.
+ // For aarch64, AAPCS64, section 5.4.2, clauses C.7 and C.14 says any
+ // remaining bits are unspecified. iOS64 contradicts this and says the callee
+ // extends arguments up to 32 bits, and only the upper 32 bits are
+ // unspecified. Rejecting parameters smaller than 32 bits avoids the
+ // divergence.
//
- // ABI rules here may be inferred from two kinds of experiments:
+ // TODO(davidben): Find authoritative citations for x86_64. For x86_64, I
+ // observed the behavior of Clang, GCC, and MSVC. ABI rules here may be
+ // inferred from two kinds of experiments:
//
// 1. When passing a value to a small-argument-taking function, does the
// compiler ensure unused bits are cleared, sign-extended, etc.? Tests for
@@ -188,28 +243,21 @@
// 2. When compiling a small-argument-taking function, does the compiler make
// assumptions about unused bits of arguments?
//
- // Stack parameters are straightforward. As both caller and callee, all
- // compilers consistently use the minimally-sized read and write. Both SysV
- // and Windows ABIs tolerate and produce arbitrary values for unused stack
- // parameter bits.
+ // MSVC for x86_64 is straightforward. It appears to tolerate and produce
+ // arbitrary values for unused bits, like AAPCS64.
//
- // MSVC also appears to tolerate and produce arbitrary values for unused
- // register parameter bits. The SysV ABI is messier. GCC and Clang tolerate
- // and produce arbitrary values for the upper 32 bits of each register, but
- // types smaller than |int| are promoted before passing to a register. (Zero
- // or sign extension depends on signedness of the type.) When compiling a
- // callee, Clang takes advantage of this conversion, but I was unable to make
- // GCC do so.
+ // GCC and Clang for x86_64 are more complex. They match MSVC for stack
+ // parameters. However, for register parameters, they behave like iOS64 and,
+ // as callers, extend up to 32 bits, leaving the remainder arbitrary. When
+ // compiling a callee, Clang takes advantage of this conversion, but I was
+ // unable to make GCC do so.
//
// Note that, although the Win64 rules are sufficient to require our assembly
// be conservative, we wish for |CHECK_ABI| to support C-compiled functions,
// so it must enforce the correct rules for each platform.
//
- // This is all a mess so, for now, do not support parameter types smaller than
- // |int| in |CHECK_ABI|. In practice, assembly functions only use 4- and
- // 8-byte values. (And, given this behavior, we should avoid parameters
- // smaller than native words in all new code.)
- static_assert(sizeof(T) >= 4, "types under four bytes are complicated");
+ // Fortunately, the |static_assert|s above cause all supported architectures
+ // to behave the same.
crypto_word_t ret;
// Filling extra bits with 0xaa will be vastly out of bounds for code
// expecting either sign- or zero-extension. (0xaa is 0b10101010.)
@@ -230,7 +278,12 @@
template <typename R, typename... Args>
inline crypto_word_t CheckImpl(Result *out, bool unwind, R (*func)(Args...),
typename DeductionGuard<Args>::Type... args) {
- static_assert(sizeof...(args) <= 10,
+ // We only support up to 8 arguments. This ensures all arguments on aarch64
+ // are passed in registers and avoids the iOS descrepancy around packing small
+ // arguments on the stack.
+ //
+ // https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html
+ static_assert(sizeof...(args) <= 8,
"too many arguments for abi_test_trampoline");
// Allocate one extra entry so MSVC does not complain about zero-size arrays.
diff --git a/crypto/test/asm/trampoline-armv8.pl b/crypto/test/asm/trampoline-armv8.pl
new file mode 100755
index 0000000..aab5250
--- /dev/null
+++ b/crypto/test/asm/trampoline-armv8.pl
@@ -0,0 +1,209 @@
+#!/usr/bin/env perl
+# Copyright (c) 2019, Google Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# This file defines helper functions for crypto/test/abi_test.h on aarch64. See
+# that header for details on how to use this.
+#
+# For convenience, this file is linked into libcrypto, where consuming builds
+# already support architecture-specific sources. The static linker should drop
+# this code in non-test binaries. This includes a shared library build of
+# libcrypto, provided --gc-sections (ELF), -dead_strip (iOS), or equivalent is
+# used.
+#
+# References:
+#
+# AAPCS64: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
+# iOS ARM64: https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html
+
+use strict;
+
+my $flavour = shift;
+my $output = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
+my $dir = $1;
+my $xlate;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT = *OUT;
+
+my ($func, $state, $argv, $argc) = ("x0", "x1", "x2", "x3");
+my $code = <<____;
+.text
+
+// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+// with |argv|, then saves the callee-saved registers into |state|. It returns
+// the result of |func|. The |unwind| argument is unused.
+// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
+// const uint64_t *argv, size_t argc,
+// uint64_t unwind);
+.type abi_test_trampoline, %function
+.globl abi_test_trampoline
+.align 4
+abi_test_trampoline:
+.Labi_test_trampoline_begin:
+ // Stack layout (low to high addresses)
+ // x29,x30 (16 bytes)
+ // d8-d15 (64 bytes)
+ // x19-x28 (80 bytes)
+ // $state (8 bytes)
+ // padding (8 bytes)
+ stp x29, x30, [sp, #-176]!
+ mov x29, sp
+
+ // Saved callee-saved registers and |state|.
+ stp d8, d9, [sp, #16]
+ stp d10, d11, [sp, #32]
+ stp d12, d13, [sp, #48]
+ stp d14, d15, [sp, #64]
+ stp x19, x20, [sp, #80]
+ stp x21, x22, [sp, #96]
+ stp x23, x24, [sp, #112]
+ stp x25, x26, [sp, #128]
+ stp x27, x28, [sp, #144]
+ str $state, [sp, #160]
+
+ // Load registers from |state|, with the exception of x29. x29 is the
+ // frame pointer and also callee-saved, but AAPCS64 allows platforms to
+ // mandate that x29 always point to a frame. iOS64 does so, which means
+ // we cannot fill x29 with entropy without violating ABI rules
+ // ourselves. x29 is tested separately below.
+ ldp d8, d9, [$state], #16
+ ldp d10, d11, [$state], #16
+ ldp d12, d13, [$state], #16
+ ldp d14, d15, [$state], #16
+ ldp x19, x20, [$state], #16
+ ldp x21, x22, [$state], #16
+ ldp x23, x24, [$state], #16
+ ldp x25, x26, [$state], #16
+ ldp x27, x28, [$state], #16
+
+ // Move parameters into temporary registers.
+ mov x9, $func
+ mov x10, $argv
+ mov x11, $argc
+
+ // Load parameters into registers.
+ cbz x11, .Largs_done
+ ldr x0, [x10], #8
+ subs x11, x11, #1
+ b.eq .Largs_done
+ ldr x1, [x10], #8
+ subs x11, x11, #1
+ b.eq .Largs_done
+ ldr x2, [x10], #8
+ subs x11, x11, #1
+ b.eq .Largs_done
+ ldr x3, [x10], #8
+ subs x11, x11, #1
+ b.eq .Largs_done
+ ldr x4, [x10], #8
+ subs x11, x11, #1
+ b.eq .Largs_done
+ ldr x5, [x10], #8
+ subs x11, x11, #1
+ b.eq .Largs_done
+ ldr x6, [x10], #8
+ subs x11, x11, #1
+ b.eq .Largs_done
+ ldr x7, [x10], #8
+
+.Largs_done:
+ blr x9
+
+ // Reload |state| and store registers.
+ ldr $state, [sp, #160]
+ stp d8, d9, [$state], #16
+ stp d10, d11, [$state], #16
+ stp d12, d13, [$state], #16
+ stp d14, d15, [$state], #16
+ stp x19, x20, [$state], #16
+ stp x21, x22, [$state], #16
+ stp x23, x24, [$state], #16
+ stp x25, x26, [$state], #16
+ stp x27, x28, [$state], #16
+
+ // |func| is required to preserve x29, the frame pointer. We cannot load
+ // random values into x29 (see comment above), so compare it against the
+ // expected value and zero the field of |state| if corrupted.
+ mov x9, sp
+ cmp x29, x9
+ b.eq .Lx29_ok
+ str xzr, [$state]
+
+.Lx29_ok:
+ // Restore callee-saved registers.
+ ldp d8, d9, [sp, #16]
+ ldp d10, d11, [sp, #32]
+ ldp d12, d13, [sp, #48]
+ ldp d14, d15, [sp, #64]
+ ldp x19, x20, [sp, #80]
+ ldp x21, x22, [sp, #96]
+ ldp x23, x24, [sp, #112]
+ ldp x25, x26, [sp, #128]
+ ldp x27, x28, [sp, #144]
+
+ ldp x29, x30, [sp], #176
+ ret
+.size abi_test_trampoline,.-abi_test_trampoline
+____
+
+# abi_test_clobber_* zeros the corresponding register. These are used to test
+# the ABI-testing framework.
+foreach (0..29) {
+ # x18 is the platform register and off limits.
+ next if ($_ == 18);
+ $code .= <<____;
+.type abi_test_clobber_x$_, %function
+.globl abi_test_clobber_x$_
+.align 4
+abi_test_clobber_x$_:
+ mov x$_, xzr
+ ret
+.size abi_test_clobber_x$_,.-abi_test_clobber_x$_
+____
+}
+foreach (0..31) {
+ $code .= <<____;
+.type abi_test_clobber_d$_, %function
+.globl abi_test_clobber_d$_
+.align 4
+abi_test_clobber_d$_:
+ fmov d$_, xzr
+ ret
+.size abi_test_clobber_d$_,.-abi_test_clobber_d$_
+____
+}
+
+# abi_test_clobber_v*_upper clobbers only the upper half of v*. AAPCS64 only
+# requires the lower half (d*) be preserved.
+foreach (8..15) {
+ $code .= <<____;
+.type abi_test_clobber_v${_}_upper, %function
+.globl abi_test_clobber_v${_}_upper
+.align 4
+abi_test_clobber_v${_}_upper:
+ fmov v${_}.d[1], xzr
+ ret
+.size abi_test_clobber_v${_}_upper,.-abi_test_clobber_v${_}_upper
+____
+}
+
+print $code;
+close STDOUT;