Add a ppc64le ABI tester. Now we have ABI tests for every architecture where we have assembly. Change-Id: I59bc2d0f72f2325e9f294b1fc08287bb93fc9cd2 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/39008 Commit-Queue: David Benjamin <davidben@google.com> Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt index 427bc64..c6073d2 100644 --- a/crypto/CMakeLists.txt +++ b/crypto/CMakeLists.txt
@@ -118,6 +118,14 @@ ) endif() +if(${ARCH} STREQUAL "ppc64le") + set( + CRYPTO_ARCH_SOURCES + + test/trampoline-ppc.${ASM_EXT} + ) +endif() + if(${ARCH} STREQUAL "x86") set( CRYPTO_ARCH_SOURCES @@ -147,6 +155,7 @@ perlasm(cipher_extra/chacha20_poly1305_x86_64.${ASM_EXT} cipher_extra/asm/chacha20_poly1305_x86_64.pl) perlasm(test/trampoline-armv4.${ASM_EXT} test/asm/trampoline-armv4.pl) perlasm(test/trampoline-armv8.${ASM_EXT} test/asm/trampoline-armv8.pl) +perlasm(test/trampoline-ppc.${ASM_EXT} test/asm/trampoline-ppc.pl) perlasm(test/trampoline-x86.${ASM_EXT} test/asm/trampoline-x86.pl) perlasm(test/trampoline-x86_64.${ASM_EXT} test/asm/trampoline-x86_64.pl)
diff --git a/crypto/abi_self_test.cc b/crypto/abi_self_test.cc index 1fcf6bc..c48818b 100644 --- a/crypto/abi_self_test.cc +++ b/crypto/abi_self_test.cc
@@ -341,7 +341,6 @@ } #endif // OPENSSL_ARM && SUPPORTS_ABI_TEST - #if defined(OPENSSL_AARCH64) && defined(SUPPORTS_ABI_TEST) extern "C" { void abi_test_clobber_x0(void); @@ -521,3 +520,289 @@ CHECK_ABI_NO_UNWIND(abi_test_clobber_v15_upper); } #endif // OPENSSL_AARCH64 && SUPPORTS_ABI_TEST + +#if defined(OPENSSL_PPC64LE) && defined(SUPPORTS_ABI_TEST) +extern "C" { +void abi_test_clobber_r0(void); +// r1 is the stack pointer. +void abi_test_clobber_r2(void); +void abi_test_clobber_r3(void); +void abi_test_clobber_r4(void); +void abi_test_clobber_r5(void); +void abi_test_clobber_r6(void); +void abi_test_clobber_r7(void); +void abi_test_clobber_r8(void); +void abi_test_clobber_r9(void); +void abi_test_clobber_r10(void); +void abi_test_clobber_r11(void); +void abi_test_clobber_r12(void); +// r13 is the thread pointer. +void abi_test_clobber_r14(void); +void abi_test_clobber_r15(void); +void abi_test_clobber_r16(void); +void abi_test_clobber_r17(void); +void abi_test_clobber_r18(void); +void abi_test_clobber_r19(void); +void abi_test_clobber_r20(void); +void abi_test_clobber_r21(void); +void abi_test_clobber_r22(void); +void abi_test_clobber_r23(void); +void abi_test_clobber_r24(void); +void abi_test_clobber_r25(void); +void abi_test_clobber_r26(void); +void abi_test_clobber_r27(void); +void abi_test_clobber_r28(void); +void abi_test_clobber_r29(void); +void abi_test_clobber_r30(void); +void abi_test_clobber_r31(void); + +void abi_test_clobber_f0(void); +void abi_test_clobber_f1(void); +void abi_test_clobber_f2(void); +void abi_test_clobber_f3(void); +void abi_test_clobber_f4(void); +void abi_test_clobber_f5(void); +void abi_test_clobber_f6(void); +void abi_test_clobber_f7(void); +void abi_test_clobber_f8(void); +void abi_test_clobber_f9(void); +void abi_test_clobber_f10(void); +void abi_test_clobber_f11(void); +void abi_test_clobber_f12(void); +void abi_test_clobber_f13(void); +void abi_test_clobber_f14(void); +void abi_test_clobber_f15(void); +void abi_test_clobber_f16(void); +void abi_test_clobber_f17(void); +void abi_test_clobber_f18(void); +void abi_test_clobber_f19(void); +void abi_test_clobber_f20(void); +void abi_test_clobber_f21(void); +void abi_test_clobber_f22(void); +void abi_test_clobber_f23(void); +void abi_test_clobber_f24(void); +void abi_test_clobber_f25(void); +void abi_test_clobber_f26(void); +void abi_test_clobber_f27(void); +void abi_test_clobber_f28(void); +void abi_test_clobber_f29(void); +void abi_test_clobber_f30(void); +void abi_test_clobber_f31(void); + +void abi_test_clobber_v0(void); +void abi_test_clobber_v1(void); +void abi_test_clobber_v2(void); +void abi_test_clobber_v3(void); +void abi_test_clobber_v4(void); +void abi_test_clobber_v5(void); +void abi_test_clobber_v6(void); +void abi_test_clobber_v7(void); +void abi_test_clobber_v8(void); +void abi_test_clobber_v9(void); +void abi_test_clobber_v10(void); +void abi_test_clobber_v11(void); +void abi_test_clobber_v12(void); +void abi_test_clobber_v13(void); +void abi_test_clobber_v14(void); +void abi_test_clobber_v15(void); +void abi_test_clobber_v16(void); +void abi_test_clobber_v17(void); +void abi_test_clobber_v18(void); +void abi_test_clobber_v19(void); +void abi_test_clobber_v20(void); +void abi_test_clobber_v21(void); +void abi_test_clobber_v22(void); +void abi_test_clobber_v23(void); +void abi_test_clobber_v24(void); +void abi_test_clobber_v25(void); +void abi_test_clobber_v26(void); +void abi_test_clobber_v27(void); +void abi_test_clobber_v28(void); +void abi_test_clobber_v29(void); +void abi_test_clobber_v30(void); +void abi_test_clobber_v31(void); + +void abi_test_clobber_cr0(void); +void abi_test_clobber_cr1(void); +void abi_test_clobber_cr2(void); +void abi_test_clobber_cr3(void); +void abi_test_clobber_cr4(void); +void abi_test_clobber_cr5(void); +void abi_test_clobber_cr6(void); +void abi_test_clobber_cr7(void); + +void abi_test_clobber_ctr(void); +void abi_test_clobber_lr(void); + +} // extern "C" + +TEST(ABITest, PPC64LE) { + // abi_test_trampoline hides unsaved registers from the caller, so we can + // safely call the abi_test_clobber_* functions below. + abi_test::internal::CallerState state; + RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state)); + CHECK_ABI_NO_UNWIND(abi_test_trampoline, + reinterpret_cast<crypto_word_t>(abi_test_clobber_r14), + &state, nullptr, 0, 0 /* no breakpoint */); + + CHECK_ABI_NO_UNWIND(abi_test_clobber_r0); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r2); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r3); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r4); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r5); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r6); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r7); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r8); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r9); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r10); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r11); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r12); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r14), + "r14 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r15), + "r15 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r16), + "r16 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r17), + "r17 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r18), + "r18 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r19), + "r19 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r20), + "r20 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r21), + "r21 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r22), + "r22 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r23), + "r23 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r24), + "r24 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r25), + "r25 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r26), + "r26 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r27), + "r27 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r28), + "r28 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r29), + "r29 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r30), + "r30 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r31), + "r31 was not restored after return"); + + CHECK_ABI_NO_UNWIND(abi_test_clobber_f0); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f1); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f2); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f3); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f4); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f5); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f6); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f7); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f8); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f9); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f10); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f11); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f12); + CHECK_ABI_NO_UNWIND(abi_test_clobber_f13); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f14), + "f14 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f15), + "f15 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f16), + "f16 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f17), + "f17 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f18), + "f18 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f19), + "f19 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f20), + "f20 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f21), + "f21 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f22), + "f22 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f23), + "f23 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f24), + "f24 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f25), + "f25 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f26), + "f26 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f27), + "f27 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f28), + "f28 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f29), + "f29 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f30), + "f30 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f31), + "f31 was not restored after return"); + + CHECK_ABI_NO_UNWIND(abi_test_clobber_v0); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v1); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v2); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v3); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v4); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v5); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v6); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v7); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v8); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v9); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v10); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v11); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v12); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v13); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v14); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v15); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v16); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v17); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v18); + CHECK_ABI_NO_UNWIND(abi_test_clobber_v19); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v20), + "v20 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v21), + "v21 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v22), + "v22 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v23), + "v23 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v24), + "v24 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v25), + "v25 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v26), + "v26 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v27), + "v27 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v28), + "v28 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v29), + "v29 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v30), + "v30 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v31), + "v31 was not restored after return"); + + CHECK_ABI_NO_UNWIND(abi_test_clobber_cr0); + CHECK_ABI_NO_UNWIND(abi_test_clobber_cr1); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr2), + "cr was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr3), + "cr was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr4), + "cr was not restored after return"); + CHECK_ABI_NO_UNWIND(abi_test_clobber_cr5); + CHECK_ABI_NO_UNWIND(abi_test_clobber_cr6); + CHECK_ABI_NO_UNWIND(abi_test_clobber_cr7); + + CHECK_ABI_NO_UNWIND(abi_test_clobber_ctr); + CHECK_ABI_NO_UNWIND(abi_test_clobber_lr); +} +#endif // OPENSSL_PPC64LE && SUPPORTS_ABI_TEST
diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc index ca3d08a..031b06c 100644 --- a/crypto/fipsmodule/modes/gcm_test.cc +++ b/crypto/fipsmodule/modes/gcm_test.cc
@@ -208,5 +208,15 @@ } } #endif // GHASH_ASM_ARM + +#if defined(GHASH_ASM_PPC64LE) + if (CRYPTO_is_PPC64LE_vcrypto_capable()) { + CHECK_ABI(gcm_init_p8, Htable, kH); + CHECK_ABI(gcm_gmult_p8, X, Htable); + for (size_t blocks : kBlockCounts) { + CHECK_ABI(gcm_ghash_p8, X, Htable, buf, 16 * blocks); + } + } +#endif // GHASH_ASM_PPC64LE } #endif // SUPPORTS_ABI_TEST && !OPENSSL_NO_ASM
diff --git a/crypto/test/abi_test.h b/crypto/test/abi_test.h index d65c7f8..ffe4479 100644 --- a/crypto/test/abi_test.h +++ b/crypto/test/abi_test.h
@@ -179,7 +179,78 @@ CALLER_STATE_REGISTER(uint64_t, x28) \ CALLER_STATE_REGISTER(uint64_t, x29) -#endif // X86_64 || X86 || ARM || AARCH64 +#elif defined(OPENSSL_PPC64LE) + +// CRReg only compares the CR2-CR4 bits of a CR register. +struct CRReg { + uint32_t masked() const { return value & 0x00fff000; } + bool operator==(CRReg r) const { return masked() == r.masked(); } + bool operator!=(CRReg r) const { return masked() != r.masked(); } + uint32_t value; +}; + +// References: +// ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf +// +// Note vector and floating-point registers on POWER have two different names. +// Originally, there were 32 floating-point registers and 32 vector registers, +// labelled f0-f31 and v0-v31 respectively. Later, VSX (Vector Scalar Extension) +// unified them into 64 registers vs0-vs63. f0-f31 map to the lower halves of +// vs0-vs31. v0-v31 map to vs32-vs63. The ABI was defined in terms of pre-VSX +// names, so we use those names here. In particular, f14-f31 are +// callee-saved, but the upper halves of vs14-vs31 are not. +#define LOOP_CALLER_STATE_REGISTERS() \ + CALLER_STATE_REGISTER(Reg128, v20) \ + CALLER_STATE_REGISTER(Reg128, v21) \ + CALLER_STATE_REGISTER(Reg128, v22) \ + CALLER_STATE_REGISTER(Reg128, v23) \ + CALLER_STATE_REGISTER(Reg128, v24) \ + CALLER_STATE_REGISTER(Reg128, v25) \ + CALLER_STATE_REGISTER(Reg128, v26) \ + CALLER_STATE_REGISTER(Reg128, v27) \ + CALLER_STATE_REGISTER(Reg128, v28) \ + CALLER_STATE_REGISTER(Reg128, v29) \ + CALLER_STATE_REGISTER(Reg128, v30) \ + CALLER_STATE_REGISTER(Reg128, v31) \ + CALLER_STATE_REGISTER(uint64_t, r14) \ + CALLER_STATE_REGISTER(uint64_t, r15) \ + CALLER_STATE_REGISTER(uint64_t, r16) \ + CALLER_STATE_REGISTER(uint64_t, r17) \ + CALLER_STATE_REGISTER(uint64_t, r18) \ + CALLER_STATE_REGISTER(uint64_t, r19) \ + CALLER_STATE_REGISTER(uint64_t, r20) \ + CALLER_STATE_REGISTER(uint64_t, r21) \ + CALLER_STATE_REGISTER(uint64_t, r22) \ + CALLER_STATE_REGISTER(uint64_t, r23) \ + CALLER_STATE_REGISTER(uint64_t, r24) \ + CALLER_STATE_REGISTER(uint64_t, r25) \ + CALLER_STATE_REGISTER(uint64_t, r26) \ + CALLER_STATE_REGISTER(uint64_t, r27) \ + CALLER_STATE_REGISTER(uint64_t, r28) \ + CALLER_STATE_REGISTER(uint64_t, r29) \ + CALLER_STATE_REGISTER(uint64_t, r30) \ + CALLER_STATE_REGISTER(uint64_t, r31) \ + CALLER_STATE_REGISTER(uint64_t, f14) \ + CALLER_STATE_REGISTER(uint64_t, f15) \ + CALLER_STATE_REGISTER(uint64_t, f16) \ + CALLER_STATE_REGISTER(uint64_t, f17) \ + CALLER_STATE_REGISTER(uint64_t, f18) \ + CALLER_STATE_REGISTER(uint64_t, f19) \ + CALLER_STATE_REGISTER(uint64_t, f20) \ + CALLER_STATE_REGISTER(uint64_t, f21) \ + CALLER_STATE_REGISTER(uint64_t, f22) \ + CALLER_STATE_REGISTER(uint64_t, f23) \ + CALLER_STATE_REGISTER(uint64_t, f24) \ + CALLER_STATE_REGISTER(uint64_t, f25) \ + CALLER_STATE_REGISTER(uint64_t, f26) \ + CALLER_STATE_REGISTER(uint64_t, f27) \ + CALLER_STATE_REGISTER(uint64_t, f28) \ + CALLER_STATE_REGISTER(uint64_t, f29) \ + CALLER_STATE_REGISTER(uint64_t, f30) \ + CALLER_STATE_REGISTER(uint64_t, f31) \ + CALLER_STATE_REGISTER(CRReg, cr) + +#endif // X86_64 || X86 || ARM || AARCH64 || PPC64LE // Enable ABI testing if all of the following are true. // @@ -210,24 +281,44 @@ template <typename T> inline crypto_word_t ToWord(T t) { -#if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64) && \ - !defined(OPENSSL_ARM) && !defined(OPENSSL_AARCH64) -#error "Unknown architecture" -#endif + // ABIs typically pass floats and structs differently from integers and + // pointers. We only need to support the latter. + static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, + "parameter types must be integral or pointer types"); + // We only support types which fit in registers. static_assert(sizeof(T) <= sizeof(crypto_word_t), - "T is larger than crypto_word_t"); - static_assert(sizeof(T) >= 4, "types under four bytes are complicated"); + "parameter types must be at most word-sized"); - // ABIs are complex around arguments that are smaller than native words. For - // 32-bit architectures, the rules above imply we only have word-sized - // arguments. For 64-bit architectures, we still have assembly functions which - // take |int|. + // ABIs are complex around arguments that are smaller than native words. + // Parameters passed in memory are sometimes packed and sometimes padded to a + // word. When parameters are padded in memory or passed in a larger register, + // the unused bits may be undefined or sign- or zero-extended. // - // For aarch64, AAPCS64, section 5.4.2, clauses C.7 and C.14 says any - // remaining bits are unspecified. iOS64 contradicts this and says the callee - // extends arguments up to 32 bits, and only the upper 32 bits are - // unspecified. Rejecting parameters smaller than 32 bits avoids the - // divergence. + // We could simply cast to |crypto_word_t| everywhere but, on platforms where + // padding is undefined, we perturb the bits to test the function accounts for + // for this. +#if defined(OPENSSL_32_BIT) + // We never pass parameters smaller than int, so require word-sized parameters + // on 32-bit architectures for simplicity. + static_assert(sizeof(T) == 4, "parameter types must be word-sized"); + return (crypto_word_t)t; +#elif defined(OPENSSL_PPC64LE) + // ELFv2, section 2.2.2.3 says the parameter save area sign- or zero-extends + // parameters passed in memory. Section 2.2.3 is unclear on how to handle + // register parameters, but section 2.2.2.3 additionally says that the memory + // copy of a parameter is identical to the register one. + return (crypto_word_t)t; +#elif defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64) + // AAPCS64, section 5.4.2, clauses C.7 and C.14 says any remaining bits in + // aarch are unspecified. iOS64 contradicts this and says the callee extends + // arguments up to 32 bits, and only the upper 32 bits are unspecified. + // + // On x86_64, Win64 leaves all unused bits unspecified. SysV also leaves + // unused bits in stack parameters unspecified, but it behaves like iOS64 for + // register parameters. This was determined via experimentation. + // + // We limit to 32-bit and 64-bit parameters, the subset where the above all + // align, and then test that functions tolerate arbitrary unused bits. // // TODO(davidben): Find authoritative citations for x86_64. For x86_64, I // observed the behavior of Clang, GCC, and MSVC. ABI rules here may be @@ -241,27 +332,22 @@ // 2. When compiling a small-argument-taking function, does the compiler make // assumptions about unused bits of arguments? // - // MSVC for x86_64 is straightforward. It appears to tolerate and produce - // arbitrary values for unused bits, like AAPCS64. - // - // GCC and Clang for x86_64 are more complex. They match MSVC for stack - // parameters. However, for register parameters, they behave like iOS64 and, - // as callers, extend up to 32 bits, leaving the remainder arbitrary. When - // compiling a callee, Clang takes advantage of this conversion, but I was - // unable to make GCC do so. - // - // Note that, although the Win64 rules are sufficient to require our assembly - // be conservative, we wish for |CHECK_ABI| to support C-compiled functions, - // so it must enforce the correct rules for each platform. - // - // Fortunately, the |static_assert|s above cause all supported architectures - // to behave the same. + // MSVC was observed to tolerate and produce arbitrary values for unused bits, + // which is conclusive. GCC and Clang, targeting Linux, were similarly + // conclusive on stack parameters. Clang was also conclusive for register + // parameters. Callers only extended parameters up to 32 bits, and callees + // took advantage of the 32-bit extension. GCC only exhibited the callee + // behavior. + static_assert(sizeof(T) >= 4, "parameters must be at least 32 bits wide"); crypto_word_t ret; // Filling extra bits with 0xaa will be vastly out of bounds for code // expecting either sign- or zero-extension. (0xaa is 0b10101010.) OPENSSL_memset(&ret, 0xaa, sizeof(ret)); OPENSSL_memcpy(&ret, &t, sizeof(t)); return ret; +#else +#error "unknown architecture" +#endif } // CheckImpl runs |func| on |args|, recording ABI errors in |out|. If |unwind| @@ -276,11 +362,9 @@ template <typename R, typename... Args> inline crypto_word_t CheckImpl(Result *out, bool unwind, R (*func)(Args...), typename DeductionGuard<Args>::Type... args) { - // We only support up to 8 arguments. This ensures all arguments on aarch64 - // are passed in registers and avoids the iOS descrepancy around packing small - // arguments on the stack. - // - // https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html + // We only support up to 8 arguments, so all arguments on aarch64 and ppc64le + // are passed in registers. This is simpler and avoids the iOS discrepancy + // around packing small arguments on the stack. (See the iOS64 reference.) static_assert(sizeof...(args) <= 8, "too many arguments for abi_test_trampoline");
diff --git a/crypto/test/asm/trampoline-ppc.pl b/crypto/test/asm/trampoline-ppc.pl new file mode 100755 index 0000000..a8d7c3f --- /dev/null +++ b/crypto/test/asm/trampoline-ppc.pl
@@ -0,0 +1,262 @@ +#!/usr/bin/env perl +# Copyright (c) 2019, Google Inc. +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +# This file defines helper functions for crypto/test/abi_test.h on ppc64le. See +# that header for details on how to use this. +# +# For convenience, this file is linked into libcrypto, where consuming builds +# already support architecture-specific sources. The static linker should drop +# this code in non-test binaries. This includes a shared library build of +# libcrypto, provided --gc-sections or equivalent is used. +# +# References: +# +# ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf + +use strict; + +my $flavour = shift; +my $output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; +my $dir = $1; +my $xlate; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\""; +*STDOUT = *OUT; + +unless ($flavour =~ /linux.*64le/) { + die "This file only supports the ELFv2 ABI, used by ppc64le"; +} + +my $code = ""; + +sub load_or_store_regs { + # $op is "l" or "st". + my ($op, $base_reg, $base_offset) = @_; + # Vector registers. + foreach (20..31) { + my $offset = $base_offset + ($_ - 20) * 16; + # Vector registers only support indexed register addressing. + $code .= "\tli\tr11, $offset\n"; + $code .= "\t${op}vx\tv$_, r11, $base_reg\n"; + } + # Save general registers. + foreach (14..31) { + my $offset = $base_offset + 192 + ($_ - 14) * 8; + $code .= "\t${op}d\tr$_, $offset($base_reg)\n"; + } + # Save floating point registers. + foreach (14..31) { + my $offset = $base_offset + 336 + ($_ - 14) * 8; + $code .= "\t${op}fd\tf$_, $offset($base_reg)\n"; + } +} + +sub load_regs { + my ($base_reg, $base_offset) = @_; + load_or_store_regs("l", $base_reg, $base_offset); +} + +sub store_regs { + my ($base_reg, $base_offset) = @_; + load_or_store_regs("st", $base_reg, $base_offset); +} + +my ($func, $state, $argv, $argc) = ("r3", "r4", "r5", "r6"); +$code .= <<____; +.machine "any" +.text + +# abi_test_trampoline loads callee-saved registers from |state|, calls |func| +# with |argv|, then saves the callee-saved registers into |state|. It returns +# the result of |func|. The |unwind| argument is unused. +# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state, +# const uint64_t *argv, size_t argc, +# uint64_t unwind); +.globl abi_test_trampoline +.align 5 +abi_test_trampoline: + # LR is saved into the caller's stack frame. + mflr r0 + std r0, 16(r1) + + # Allocate 66*8 = 528 bytes of stack frame. From the top of the stack + # to the bottom, the stack frame is: + # + # 0(r1) - Back chain pointer + # 8(r1) - CR save area + # 16(r1) - LR save area (for |func|) + # 24(r1) - TOC pointer save area + # 32(r1) - Saved copy of |state| + # 40(r1) - Padding + # 48(r1) - Vector register save area (v20-v31, 12 registers) + # 240(r1) - General register save area (r14-r31, 18 registers) + # 384(r1) - Floating point register save area (f14-f31, 18 registers) + # + # Note the layouts of the register save areas and CallerState match. + # + # In the ELFv2 ABI, the parameter save area is optional if the function + # is non-variadic and all parameters fit in registers. We only support + # such functions, so we omit it to test that |func| does not rely on it. + stdu r1, -528(r1) + + mfcr r0 + std r0, 8(r1) # Save CR + std r2, 24(r1) # Save TOC + std $state, 32(r1) # Save |state| +____ +# Save registers to the stack. +store_regs("r1", 48); +# Load registers from the caller. +load_regs($state, 0); +$code .= <<____; + # Load CR from |state|. + ld r0, 480($state) + mtcr r0 + + # Move parameters into temporary registers so they are not clobbered. + addi r11, $argv, -8 # Adjust for ldu below + mr r12, $func + + # Load parameters into registers. + cmpdi $argc, 0 + beq .Largs_done + mtctr $argc + ldu r3, 8(r11) + bdz .Largs_done + ldu r4, 8(r11) + bdz .Largs_done + ldu r5, 8(r11) + bdz .Largs_done + ldu r6, 8(r11) + bdz .Largs_done + ldu r7, 8(r11) + bdz .Largs_done + ldu r8, 8(r11) + bdz .Largs_done + ldu r9, 8(r11) + bdz .Largs_done + ldu r10, 8(r11) + +.Largs_done: + li r2, 0 # Clear TOC to test |func|'s global entry point + mtctr r12 + bctrl + ld r2, 24(r1) # Restore TOC + + ld $state, 32(r1) # Reload |state| +____ +# Output resulting registers to the caller. +store_regs($state, 0); +# Restore registers from the stack. +load_regs("r1", 48); +$code .= <<____; + mfcr r0 + std r0, 480($state) # Output CR to caller + ld r0, 8(r1) + mtcrf 0b00111000, r0 # Restore CR2-CR4 + addi r1, r1, 528 + ld r0, 16(r1) # Restore LR + mtlr r0 + blr +.size abi_test_trampoline,.-abi_test_trampoline +____ + +# abi_test_clobber_* clobbers the corresponding register. These are used to test +# the ABI-testing framework. +foreach (0..31) { + # r1 is the stack pointer. r13 is the thread pointer. + next if ($_ == 1 || $_ == 13); + $code .= <<____; +.globl abi_test_clobber_r$_ +.align 5 +abi_test_clobber_r$_: + li r$_, 0 + blr +.size abi_test_clobber_r$_,.-abi_test_clobber_r$_ +____ +} + +foreach (0..31) { + $code .= <<____; +.globl abi_test_clobber_f$_ +.align 4 +abi_test_clobber_f$_: + li r0, 0 + # Use the red zone. + std r0, -8(r1) + lfd f$_, -8(r1) + blr +.size abi_test_clobber_f$_,.-abi_test_clobber_f$_ +____ +} + +foreach (0..31) { + $code .= <<____; +.globl abi_test_clobber_v$_ +.align 4 +abi_test_clobber_v$_: + vxor v$_, v$_, v$_ + blr +.size abi_test_clobber_v$_,.-abi_test_clobber_v$_ +____ +} + +foreach (0..7) { + # PPC orders CR fields in big-endian, so the mask is reversed from what one + # would expect. + my $mask = 1 << (7 - $_); + $code .= <<____; +.globl abi_test_clobber_cr$_ +.align 4 +abi_test_clobber_cr$_: + # Flip the bits on cr$_ rather than setting to zero. With a four-bit + # register, zeroing it will do nothing 1 in 16 times. + mfcr r0 + not r0, r0 + mtcrf $mask, r0 + blr +.size abi_test_clobber_cr$_,.-abi_test_clobber_cr$_ +____ +} + +$code .= <<____; +.globl abi_test_clobber_ctr +.align 4 +abi_test_clobber_ctr: + li r0, 0 + mtctr r0 + blr +.size abi_test_clobber_ctr,.-abi_test_clobber_ctr + +.globl abi_test_clobber_lr +.align 4 +abi_test_clobber_lr: + mflr r0 + mtctr r0 + li r0, 0 + mtlr r0 + bctr +.size abi_test_clobber_lr,.-abi_test_clobber_lr + +____ + +print $code; +close STDOUT or die "error closing STDOUT";