Remove ppc64le assembly. We no longer have a need to support ppc64le, nor do we have any testing story for the assembly we previously had. Remove all ppc64le-specific assembly. This CL stops short of removing it from base.h. That'll be done in a follow-up CL, just to separate which removals are for the assembly and which removals remove all support. Update-Note: After this change, ppc64le builds drop assembly optimizations and will fallback to a generic C-based AES implementation. Change-Id: Ic8075638085761d66cebc276eb16c4770ce03920 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/56388 Commit-Queue: David Benjamin <davidben@google.com> Reviewed-by: Adam Langley <agl@google.com>
diff --git a/cmake/perlasm.cmake b/cmake/perlasm.cmake index b9530cf..9828799 100644 --- a/cmake/perlasm.cmake +++ b/cmake/perlasm.cmake
@@ -17,7 +17,6 @@ DEPENDS ${src} ${PROJECT_SOURCE_DIR}/crypto/perlasm/arm-xlate.pl - ${PROJECT_SOURCE_DIR}/crypto/perlasm/ppc-xlate.pl ${PROJECT_SOURCE_DIR}/crypto/perlasm/x86_64-xlate.pl ${PROJECT_SOURCE_DIR}/crypto/perlasm/x86asm.pl ${PROJECT_SOURCE_DIR}/crypto/perlasm/x86gas.pl @@ -41,9 +40,6 @@ add_perlasm_target("${dest}-apple.S" ${src} ios32) add_perlasm_target("${dest}-linux.S" ${src} linux32) append_to_parent_scope("${var}_ASM" "${dest}-apple.S" "${dest}-linux.S") - elseif(arch STREQUAL "ppc64le") - add_perlasm_target("${dest}-linux.S" ${src} linux64le) - append_to_parent_scope("${var}_ASM" "${dest}-linux.S") elseif(arch STREQUAL "x86") add_perlasm_target("${dest}-apple.S" ${src} macosx -fPIC -DOPENSSL_IA32_SSE2) add_perlasm_target("${dest}-linux.S" ${src} elf -fPIC -DOPENSSL_IA32_SSE2)
diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt index e1634a4..ec6d2ee 100644 --- a/crypto/CMakeLists.txt +++ b/crypto/CMakeLists.txt
@@ -24,7 +24,6 @@ perlasm(CRYPTO_SOURCES aarch64 test/trampoline-armv8 test/asm/trampoline-armv8.pl) perlasm(CRYPTO_SOURCES arm chacha/chacha-armv4 chacha/asm/chacha-armv4.pl) perlasm(CRYPTO_SOURCES arm test/trampoline-armv4 test/asm/trampoline-armv4.pl) -perlasm(CRYPTO_SOURCES ppc64le test/trampoline-ppc test/asm/trampoline-ppc.pl) perlasm(CRYPTO_SOURCES x86 chacha/chacha-x86 chacha/asm/chacha-x86.pl) perlasm(CRYPTO_SOURCES x86 test/trampoline-x86 test/asm/trampoline-x86.pl) perlasm(CRYPTO_SOURCES x86_64 chacha/chacha-x86_64 chacha/asm/chacha-x86_64.pl) @@ -135,7 +134,6 @@ cpu_arm_linux.c cpu_arm.c cpu_intel.c - cpu_ppc64le.c crypto.c curve25519/curve25519.c curve25519/spake25519.c
diff --git a/crypto/abi_self_test.cc b/crypto/abi_self_test.cc index 9681498..a42bd1d 100644 --- a/crypto/abi_self_test.cc +++ b/crypto/abi_self_test.cc
@@ -521,289 +521,3 @@ CHECK_ABI_NO_UNWIND(abi_test_clobber_v15_upper); } #endif // OPENSSL_AARCH64 && SUPPORTS_ABI_TEST - -#if defined(OPENSSL_PPC64LE) && defined(SUPPORTS_ABI_TEST) -extern "C" { -void abi_test_clobber_r0(void); -// r1 is the stack pointer. -void abi_test_clobber_r2(void); -void abi_test_clobber_r3(void); -void abi_test_clobber_r4(void); -void abi_test_clobber_r5(void); -void abi_test_clobber_r6(void); -void abi_test_clobber_r7(void); -void abi_test_clobber_r8(void); -void abi_test_clobber_r9(void); -void abi_test_clobber_r10(void); -void abi_test_clobber_r11(void); -void abi_test_clobber_r12(void); -// r13 is the thread pointer. -void abi_test_clobber_r14(void); -void abi_test_clobber_r15(void); -void abi_test_clobber_r16(void); -void abi_test_clobber_r17(void); -void abi_test_clobber_r18(void); -void abi_test_clobber_r19(void); -void abi_test_clobber_r20(void); -void abi_test_clobber_r21(void); -void abi_test_clobber_r22(void); -void abi_test_clobber_r23(void); -void abi_test_clobber_r24(void); -void abi_test_clobber_r25(void); -void abi_test_clobber_r26(void); -void abi_test_clobber_r27(void); -void abi_test_clobber_r28(void); -void abi_test_clobber_r29(void); -void abi_test_clobber_r30(void); -void abi_test_clobber_r31(void); - -void abi_test_clobber_f0(void); -void abi_test_clobber_f1(void); -void abi_test_clobber_f2(void); -void abi_test_clobber_f3(void); -void abi_test_clobber_f4(void); -void abi_test_clobber_f5(void); -void abi_test_clobber_f6(void); -void abi_test_clobber_f7(void); -void abi_test_clobber_f8(void); -void abi_test_clobber_f9(void); -void abi_test_clobber_f10(void); -void abi_test_clobber_f11(void); -void abi_test_clobber_f12(void); -void abi_test_clobber_f13(void); -void abi_test_clobber_f14(void); -void abi_test_clobber_f15(void); -void abi_test_clobber_f16(void); -void abi_test_clobber_f17(void); -void abi_test_clobber_f18(void); -void abi_test_clobber_f19(void); -void abi_test_clobber_f20(void); -void abi_test_clobber_f21(void); -void abi_test_clobber_f22(void); -void abi_test_clobber_f23(void); -void abi_test_clobber_f24(void); -void abi_test_clobber_f25(void); -void abi_test_clobber_f26(void); -void abi_test_clobber_f27(void); -void abi_test_clobber_f28(void); -void abi_test_clobber_f29(void); -void abi_test_clobber_f30(void); -void abi_test_clobber_f31(void); - -void abi_test_clobber_v0(void); -void abi_test_clobber_v1(void); -void abi_test_clobber_v2(void); -void abi_test_clobber_v3(void); -void abi_test_clobber_v4(void); -void abi_test_clobber_v5(void); -void abi_test_clobber_v6(void); -void abi_test_clobber_v7(void); -void abi_test_clobber_v8(void); -void abi_test_clobber_v9(void); -void abi_test_clobber_v10(void); -void abi_test_clobber_v11(void); -void abi_test_clobber_v12(void); -void abi_test_clobber_v13(void); -void abi_test_clobber_v14(void); -void abi_test_clobber_v15(void); -void abi_test_clobber_v16(void); -void abi_test_clobber_v17(void); -void abi_test_clobber_v18(void); -void abi_test_clobber_v19(void); -void abi_test_clobber_v20(void); -void abi_test_clobber_v21(void); -void abi_test_clobber_v22(void); -void abi_test_clobber_v23(void); -void abi_test_clobber_v24(void); -void abi_test_clobber_v25(void); -void abi_test_clobber_v26(void); -void abi_test_clobber_v27(void); -void abi_test_clobber_v28(void); -void abi_test_clobber_v29(void); -void abi_test_clobber_v30(void); -void abi_test_clobber_v31(void); - -void abi_test_clobber_cr0(void); -void abi_test_clobber_cr1(void); -void abi_test_clobber_cr2(void); -void abi_test_clobber_cr3(void); -void abi_test_clobber_cr4(void); -void abi_test_clobber_cr5(void); -void abi_test_clobber_cr6(void); -void abi_test_clobber_cr7(void); - -void abi_test_clobber_ctr(void); -void abi_test_clobber_lr(void); - -} // extern "C" - -TEST(ABITest, PPC64LE) { - // abi_test_trampoline hides unsaved registers from the caller, so we can - // safely call the abi_test_clobber_* functions below. - abi_test::internal::CallerState state; - RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state)); - CHECK_ABI_NO_UNWIND(abi_test_trampoline, - reinterpret_cast<crypto_word_t>(abi_test_clobber_r14), - &state, nullptr, 0, 0 /* no breakpoint */); - - CHECK_ABI_NO_UNWIND(abi_test_clobber_r0); - CHECK_ABI_NO_UNWIND(abi_test_clobber_r2); - CHECK_ABI_NO_UNWIND(abi_test_clobber_r3); - CHECK_ABI_NO_UNWIND(abi_test_clobber_r4); - CHECK_ABI_NO_UNWIND(abi_test_clobber_r5); - CHECK_ABI_NO_UNWIND(abi_test_clobber_r6); - CHECK_ABI_NO_UNWIND(abi_test_clobber_r7); - CHECK_ABI_NO_UNWIND(abi_test_clobber_r8); - CHECK_ABI_NO_UNWIND(abi_test_clobber_r9); - CHECK_ABI_NO_UNWIND(abi_test_clobber_r10); - CHECK_ABI_NO_UNWIND(abi_test_clobber_r11); - CHECK_ABI_NO_UNWIND(abi_test_clobber_r12); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r14), - "r14 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r15), - "r15 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r16), - "r16 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r17), - "r17 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r18), - "r18 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r19), - "r19 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r20), - "r20 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r21), - "r21 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r22), - "r22 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r23), - "r23 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r24), - "r24 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r25), - "r25 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r26), - "r26 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r27), - "r27 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r28), - "r28 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r29), - "r29 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r30), - "r30 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r31), - "r31 was not restored after return"); - - CHECK_ABI_NO_UNWIND(abi_test_clobber_f0); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f1); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f2); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f3); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f4); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f5); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f6); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f7); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f8); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f9); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f10); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f11); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f12); - CHECK_ABI_NO_UNWIND(abi_test_clobber_f13); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f14), - "f14 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f15), - "f15 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f16), - "f16 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f17), - "f17 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f18), - "f18 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f19), - "f19 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f20), - "f20 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f21), - "f21 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f22), - "f22 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f23), - "f23 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f24), - "f24 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f25), - "f25 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f26), - "f26 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f27), - "f27 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f28), - "f28 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f29), - "f29 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f30), - "f30 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f31), - "f31 was not restored after return"); - - CHECK_ABI_NO_UNWIND(abi_test_clobber_v0); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v1); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v2); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v3); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v4); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v5); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v6); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v7); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v8); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v9); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v10); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v11); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v12); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v13); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v14); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v15); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v16); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v17); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v18); - CHECK_ABI_NO_UNWIND(abi_test_clobber_v19); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v20), - "v20 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v21), - "v21 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v22), - "v22 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v23), - "v23 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v24), - "v24 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v25), - "v25 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v26), - "v26 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v27), - "v27 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v28), - "v28 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v29), - "v29 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v30), - "v30 was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v31), - "v31 was not restored after return"); - - CHECK_ABI_NO_UNWIND(abi_test_clobber_cr0); - CHECK_ABI_NO_UNWIND(abi_test_clobber_cr1); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr2), - "cr was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr3), - "cr was not restored after return"); - EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr4), - "cr was not restored after return"); - CHECK_ABI_NO_UNWIND(abi_test_clobber_cr5); - CHECK_ABI_NO_UNWIND(abi_test_clobber_cr6); - CHECK_ABI_NO_UNWIND(abi_test_clobber_cr7); - - CHECK_ABI_NO_UNWIND(abi_test_clobber_ctr); - CHECK_ABI_NO_UNWIND(abi_test_clobber_lr); -} -#endif // OPENSSL_PPC64LE && SUPPORTS_ABI_TEST
diff --git a/crypto/cpu_ppc64le.c b/crypto/cpu_ppc64le.c deleted file mode 100644 index a802e37..0000000 --- a/crypto/cpu_ppc64le.c +++ /dev/null
@@ -1,38 +0,0 @@ -/* Copyright (c) 2016, Google Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - -#include <openssl/base.h> - -#if defined(OPENSSL_PPC64LE) - -#include <sys/auxv.h> - -#include "internal.h" - - -#if !defined(PPC_FEATURE2_HAS_VCRYPTO) -// PPC_FEATURE2_HAS_VCRYPTO was taken from section 4.1.2.3 of the “OpenPOWER -// ABI for Linux Supplement”. -#define PPC_FEATURE2_HAS_VCRYPTO 0x02000000 -#endif - -void OPENSSL_cpuid_setup(void) { - OPENSSL_ppc64le_hwcap2 = getauxval(AT_HWCAP2); -} - -int CRYPTO_is_PPC64LE_vcrypto_capable(void) { - return (OPENSSL_ppc64le_hwcap2 & PPC_FEATURE2_HAS_VCRYPTO) != 0; -} - -#endif // OPENSSL_PPC64LE
diff --git a/crypto/crypto.c b/crypto/crypto.c index 12cbb888..beaae0f 100644 --- a/crypto/crypto.c +++ b/crypto/crypto.c
@@ -25,12 +25,10 @@ "ossl_ssize_t should be the same size as size_t"); #if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_STATIC_ARMCAP) && \ - (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ - defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \ - defined(OPENSSL_PPC64LE)) -// x86, x86_64, the ARMs and ppc64le need to record the result of a -// cpuid/getauxval call for the asm to work correctly, unless compiled without -// asm code. + (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ + defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) +// x86, x86_64, and the ARMs need to record the result of a cpuid/getauxval call +// for the asm to work correctly, unless compiled without asm code. #define NEED_CPUID #else @@ -41,8 +39,7 @@ #define BORINGSSL_NO_STATIC_INITIALIZER #endif -#endif // !NO_ASM && !STATIC_ARMCAP && - // (X86 || X86_64 || ARM || AARCH64 || PPC64LE) +#endif // !NO_ASM && !STATIC_ARMCAP && (X86 || X86_64 || ARM || AARCH64) // Our assembly does not use the GOT to reference symbols, which means @@ -81,10 +78,6 @@ // This value must be explicitly initialized to zero. See similar comment above. HIDDEN uint32_t OPENSSL_ia32cap_P[4] = {0}; -#elif defined(OPENSSL_PPC64LE) - -HIDDEN unsigned long OPENSSL_ppc64le_hwcap2 = 0; - #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) #include <openssl/arm_arch.h>
diff --git a/crypto/fipsmodule/CMakeLists.txt b/crypto/fipsmodule/CMakeLists.txt index 82505b7..2bfadab 100644 --- a/crypto/fipsmodule/CMakeLists.txt +++ b/crypto/fipsmodule/CMakeLists.txt
@@ -20,8 +20,6 @@ perlasm(BCM_SOURCES arm sha256-armv4 sha/asm/sha256-armv4.pl) perlasm(BCM_SOURCES arm sha512-armv4 sha/asm/sha512-armv4.pl) perlasm(BCM_SOURCES arm vpaes-armv7 aes/asm/vpaes-armv7.pl) -perlasm(BCM_SOURCES ppc64le aesp8-ppc aes/asm/aesp8-ppc.pl) -perlasm(BCM_SOURCES ppc64le ghashp8-ppc modes/asm/ghashp8-ppc.pl) perlasm(BCM_SOURCES x86 aesni-x86 aes/asm/aesni-x86.pl) perlasm(BCM_SOURCES x86 bn-586 bn/asm/bn-586.pl) perlasm(BCM_SOURCES x86 co-586 bn/asm/co-586.pl)
diff --git a/crypto/fipsmodule/aes/asm/aesp8-ppc.pl b/crypto/fipsmodule/aes/asm/aesp8-ppc.pl deleted file mode 100644 index 061f6b7..0000000 --- a/crypto/fipsmodule/aes/asm/aesp8-ppc.pl +++ /dev/null
@@ -1,3809 +0,0 @@ -#! /usr/bin/env perl -# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved. -# -# Licensed under the OpenSSL license (the "License"). You may not use -# this file except in compliance with the License. You can obtain a copy -# in the file LICENSE in the source distribution or at -# https://www.openssl.org/source/license.html - -# -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# This module implements support for AES instructions as per PowerISA -# specification version 2.07, first implemented by POWER8 processor. -# The module is endian-agnostic in sense that it supports both big- -# and little-endian cases. Data alignment in parallelizable modes is -# handled with VSX loads and stores, which implies MSR.VSX flag being -# set. It should also be noted that ISA specification doesn't prohibit -# alignment exceptions for these instructions on page boundaries. -# Initially alignment was handled in pure AltiVec/VMX way [when data -# is aligned programmatically, which in turn guarantees exception- -# free execution], but it turned to hamper performance when vcipher -# instructions are interleaved. It's reckoned that eventual -# misalignment penalties at page boundaries are in average lower -# than additional overhead in pure AltiVec approach. -# -# May 2016 -# -# Add XTS subroutine, 9x on little- and 12x improvement on big-endian -# systems were measured. -# -###################################################################### -# Current large-block performance in cycles per byte processed with -# 128-bit key (less is better). -# -# CBC en-/decrypt CTR XTS -# POWER8[le] 3.96/0.72 0.74 1.1 -# POWER8[be] 3.75/0.65 0.66 1.0 -# POWER9[le] 4.02/0.86 0.84 1.05 -# POWER9[be] 3.99/0.78 0.79 0.97 - -$flavour = shift; -$output = shift; - -if ($flavour =~ /64/) { - $SIZE_T =8; - $LRSAVE =2*$SIZE_T; - $STU ="stdu"; - $POP ="ld"; - $PUSH ="std"; - $UCMP ="cmpld"; - $SHL ="sldi"; -} elsif ($flavour =~ /32/) { - $SIZE_T =4; - $LRSAVE =$SIZE_T; - $STU ="stwu"; - $POP ="lwz"; - $PUSH ="stw"; - $UCMP ="cmplw"; - $SHL ="slwi"; -} else { die "nonsense $flavour"; } - -$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!"; -*STDOUT=*OUT; - -$FRAME=8*$SIZE_T; -$prefix="aes_hw"; - -$sp="r1"; -$vrsave="r12"; - -######################################################################### -{{{ # Key setup procedures # -my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); -my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); -my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); - -$code.=<<___; -.machine "any" - -.text - -.align 7 -Lrcon: -.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev -.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev -.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev -.long 0,0,0,0 ?asis -Lconsts: - mflr r0 - bcl 20,31,\$+4 - mflr $ptr #vvvvv "distance between . and rcon - addi $ptr,$ptr,-0x48 - mtlr r0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 -.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" - -.globl .${prefix}_set_encrypt_key -.align 5 -.${prefix}_set_encrypt_key: -Lset_encrypt_key: - mflr r11 - $PUSH r11,$LRSAVE($sp) - - li $ptr,-1 - ${UCMP}i $inp,0 - beq- Lenc_key_abort # if ($inp==0) return -1; - ${UCMP}i $out,0 - beq- Lenc_key_abort # if ($out==0) return -1; - li $ptr,-2 - cmpwi $bits,128 - blt- Lenc_key_abort - cmpwi $bits,256 - bgt- Lenc_key_abort - andi. r0,$bits,0x3f - bne- Lenc_key_abort - - lis r0,0xfff0 - mfspr $vrsave,256 - mtspr 256,r0 - - bl Lconsts - mtlr r11 - - neg r9,$inp - lvx $in0,0,$inp - addi $inp,$inp,15 # 15 is not typo - lvsr $key,0,r9 # borrow $key - li r8,0x20 - cmpwi $bits,192 - lvx $in1,0,$inp - le?vspltisb $mask,0x0f # borrow $mask - lvx $rcon,0,$ptr - le?vxor $key,$key,$mask # adjust for byte swap - lvx $mask,r8,$ptr - addi $ptr,$ptr,0x10 - vperm $in0,$in0,$in1,$key # align [and byte swap in LE] - li $cnt,8 - vxor $zero,$zero,$zero - mtctr $cnt - - ?lvsr $outperm,0,$out - vspltisb $outmask,-1 - lvx $outhead,0,$out - ?vperm $outmask,$zero,$outmask,$outperm - - blt Loop128 - addi $inp,$inp,8 - beq L192 - addi $inp,$inp,8 - b L256 - -.align 4 -Loop128: - vperm $key,$in0,$in0,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vadduwm $rcon,$rcon,$rcon - vxor $in0,$in0,$key - bdnz Loop128 - - lvx $rcon,0,$ptr # last two round keys - - vperm $key,$in0,$in0,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vadduwm $rcon,$rcon,$rcon - vxor $in0,$in0,$key - - vperm $key,$in0,$in0,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vxor $in0,$in0,$key - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - - addi $inp,$out,15 # 15 is not typo - addi $out,$out,0x50 - - li $rounds,10 - b Ldone - -.align 4 -L192: - lvx $tmp,0,$inp - li $cnt,4 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $out,$out,16 - vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] - vspltisb $key,8 # borrow $key - mtctr $cnt - vsububm $mask,$mask,$key # adjust the mask - -Loop192: - vperm $key,$in1,$in1,$mask # roate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vcipherlast $key,$key,$rcon - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - - vsldoi $stage,$zero,$in1,8 - vspltw $tmp,$in0,3 - vxor $tmp,$tmp,$in1 - vsldoi $in1,$zero,$in1,12 # >>32 - vadduwm $rcon,$rcon,$rcon - vxor $in1,$in1,$tmp - vxor $in0,$in0,$key - vxor $in1,$in1,$key - vsldoi $stage,$stage,$in0,8 - - vperm $key,$in1,$in1,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$stage,$stage,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vsldoi $stage,$in0,$in1,8 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vperm $outtail,$stage,$stage,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - stvx $stage,0,$out - addi $out,$out,16 - - vspltw $tmp,$in0,3 - vxor $tmp,$tmp,$in1 - vsldoi $in1,$zero,$in1,12 # >>32 - vadduwm $rcon,$rcon,$rcon - vxor $in1,$in1,$tmp - vxor $in0,$in0,$key - vxor $in1,$in1,$key - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $inp,$out,15 # 15 is not typo - addi $out,$out,16 - bdnz Loop192 - - li $rounds,12 - addi $out,$out,0x20 - b Ldone - -.align 4 -L256: - lvx $tmp,0,$inp - li $cnt,7 - li $rounds,14 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $out,$out,16 - vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] - mtctr $cnt - -Loop256: - vperm $key,$in1,$in1,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in1,$in1,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vadduwm $rcon,$rcon,$rcon - vxor $in0,$in0,$key - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $inp,$out,15 # 15 is not typo - addi $out,$out,16 - bdz Ldone - - vspltw $key,$in0,3 # just splat - vsldoi $tmp,$zero,$in1,12 # >>32 - vsbox $key,$key - - vxor $in1,$in1,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in1,$in1,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in1,$in1,$tmp - - vxor $in1,$in1,$key - b Loop256 - -.align 4 -Ldone: - lvx $in1,0,$inp # redundant in aligned case - vsel $in1,$outhead,$in1,$outmask - stvx $in1,0,$inp - li $ptr,0 - mtspr 256,$vrsave - stw $rounds,0($out) - -Lenc_key_abort: - mr r3,$ptr - blr - .long 0 - .byte 0,12,0x14,1,0,0,3,0 - .long 0 -.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key - -.globl .${prefix}_set_decrypt_key -.align 5 -.${prefix}_set_decrypt_key: - $STU $sp,-$FRAME($sp) - mflr r10 - $PUSH r10,`$FRAME+$LRSAVE`($sp) - bl Lset_encrypt_key - mtlr r10 - - cmpwi r3,0 - bne- Ldec_key_abort - - slwi $cnt,$rounds,4 - subi $inp,$out,240 # first round key - srwi $rounds,$rounds,1 - add $out,$inp,$cnt # last round key - mtctr $rounds - -Ldeckey: - lwz r0, 0($inp) - lwz r6, 4($inp) - lwz r7, 8($inp) - lwz r8, 12($inp) - addi $inp,$inp,16 - lwz r9, 0($out) - lwz r10,4($out) - lwz r11,8($out) - lwz r12,12($out) - stw r0, 0($out) - stw r6, 4($out) - stw r7, 8($out) - stw r8, 12($out) - subi $out,$out,16 - stw r9, -16($inp) - stw r10,-12($inp) - stw r11,-8($inp) - stw r12,-4($inp) - bdnz Ldeckey - - xor r3,r3,r3 # return value -Ldec_key_abort: - addi $sp,$sp,$FRAME - blr - .long 0 - .byte 0,12,4,1,0x80,0,3,0 - .long 0 -.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key -___ -}}} -######################################################################### -{{{ # Single block en- and decrypt procedures # -sub gen_block () { -my $dir = shift; -my $n = $dir eq "de" ? "n" : ""; -my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); - -$code.=<<___; -.globl .${prefix}_${dir}crypt -.align 5 -.${prefix}_${dir}crypt: - lwz $rounds,240($key) - lis r0,0xfc00 - mfspr $vrsave,256 - li $idx,15 # 15 is not typo - mtspr 256,r0 - - lvx v0,0,$inp - neg r11,$out - lvx v1,$idx,$inp - lvsl v2,0,$inp # inpperm - le?vspltisb v4,0x0f - ?lvsl v3,0,r11 # outperm - le?vxor v2,v2,v4 - li $idx,16 - vperm v0,v0,v1,v2 # align [and byte swap in LE] - lvx v1,0,$key - ?lvsl v5,0,$key # keyperm - srwi $rounds,$rounds,1 - lvx v2,$idx,$key - addi $idx,$idx,16 - subi $rounds,$rounds,1 - ?vperm v1,v1,v2,v5 # align round key - - vxor v0,v0,v1 - lvx v1,$idx,$key - addi $idx,$idx,16 - mtctr $rounds - -Loop_${dir}c: - ?vperm v2,v2,v1,v5 - v${n}cipher v0,v0,v2 - lvx v2,$idx,$key - addi $idx,$idx,16 - ?vperm v1,v1,v2,v5 - v${n}cipher v0,v0,v1 - lvx v1,$idx,$key - addi $idx,$idx,16 - bdnz Loop_${dir}c - - ?vperm v2,v2,v1,v5 - v${n}cipher v0,v0,v2 - lvx v2,$idx,$key - ?vperm v1,v1,v2,v5 - v${n}cipherlast v0,v0,v1 - - vspltisb v2,-1 - vxor v1,v1,v1 - li $idx,15 # 15 is not typo - ?vperm v2,v1,v2,v3 # outmask - le?vxor v3,v3,v4 - lvx v1,0,$out # outhead - vperm v0,v0,v0,v3 # rotate [and byte swap in LE] - vsel v1,v1,v0,v2 - lvx v4,$idx,$out - stvx v1,0,$out - vsel v0,v0,v4,v2 - stvx v0,$idx,$out - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,3,0 - .long 0 -.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt -___ -} -&gen_block("en"); -&gen_block("de"); -}}} -######################################################################### -{{{ # CBC en- and decrypt procedures # -my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); -my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); -my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= - map("v$_",(4..10)); -$code.=<<___; -.globl .${prefix}_cbc_encrypt -.align 5 -.${prefix}_cbc_encrypt: - ${UCMP}i $len,16 - bltlr- - - cmpwi $enc,0 # test direction - lis r0,0xffe0 - mfspr $vrsave,256 - mtspr 256,r0 - - li $idx,15 - vxor $rndkey0,$rndkey0,$rndkey0 - le?vspltisb $tmp,0x0f - - lvx $ivec,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - le?vxor $inpperm,$inpperm,$tmp - vperm $ivec,$ivec,$inptail,$inpperm - - neg r11,$inp - ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) - - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inptail,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - ?lvsr $outperm,0,$out # prepare for unaligned store - vspltisb $outmask,-1 - lvx $outhead,0,$out - ?vperm $outmask,$rndkey0,$outmask,$outperm - le?vxor $outperm,$outperm,$tmp - - srwi $rounds,$rounds,1 - li $idx,16 - subi $rounds,$rounds,1 - beq Lcbc_dec - -Lcbc_enc: - vmr $inout,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - mtctr $rounds - subi $len,$len,16 # len-=16 - - lvx $rndkey0,0,$key - vperm $inout,$inout,$inptail,$inpperm - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - vxor $inout,$inout,$ivec - -Loop_cbc_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - bdnz Loop_cbc_enc - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipherlast $ivec,$inout,$rndkey0 - ${UCMP}i $len,16 - - vperm $tmp,$ivec,$ivec,$outperm - vsel $inout,$outhead,$tmp,$outmask - vmr $outhead,$tmp - stvx $inout,0,$out - addi $out,$out,16 - bge Lcbc_enc - - b Lcbc_done - -.align 4 -Lcbc_dec: - ${UCMP}i $len,128 - bge _aesp8_cbc_decrypt8x - vmr $tmp,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - mtctr $rounds - subi $len,$len,16 # len-=16 - - lvx $rndkey0,0,$key - vperm $tmp,$tmp,$inptail,$inpperm - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$tmp,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - -Loop_cbc_dec: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - bdnz Loop_cbc_dec - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipherlast $inout,$inout,$rndkey0 - ${UCMP}i $len,16 - - vxor $inout,$inout,$ivec - vmr $ivec,$tmp - vperm $tmp,$inout,$inout,$outperm - vsel $inout,$outhead,$tmp,$outmask - vmr $outhead,$tmp - stvx $inout,0,$out - addi $out,$out,16 - bge Lcbc_dec - -Lcbc_done: - addi $out,$out,-1 - lvx $inout,0,$out # redundant in aligned case - vsel $inout,$outhead,$inout,$outmask - stvx $inout,0,$out - - neg $enc,$ivp # write [unaligned] iv - li $idx,15 # 15 is not typo - vxor $rndkey0,$rndkey0,$rndkey0 - vspltisb $outmask,-1 - le?vspltisb $tmp,0x0f - ?lvsl $outperm,0,$enc - ?vperm $outmask,$rndkey0,$outmask,$outperm - le?vxor $outperm,$outperm,$tmp - lvx $outhead,0,$ivp - vperm $ivec,$ivec,$ivec,$outperm - vsel $inout,$outhead,$ivec,$outmask - lvx $inptail,$idx,$ivp - stvx $inout,0,$ivp - vsel $inout,$ivec,$inptail,$outmask - stvx $inout,$idx,$ivp - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,6,0 - .long 0 -___ -######################################################################### -{{ # Optimized CBC decrypt procedure # -my $key_="r11"; -my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); - $x00=0 if ($flavour =~ /osx/); -my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); -my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); -my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys - # v26-v31 last 6 round keys -my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment - -$code.=<<___; -.align 5 -_aesp8_cbc_decrypt8x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - li r10,`$FRAME+8*16+15` - li r11,`$FRAME+8*16+31` - stvx v20,r10,$sp # ABI says so - addi r10,r10,32 - stvx v21,r11,$sp - addi r11,r11,32 - stvx v22,r10,$sp - addi r10,r10,32 - stvx v23,r11,$sp - addi r11,r11,32 - stvx v24,r10,$sp - addi r10,r10,32 - stvx v25,r11,$sp - addi r11,r11,32 - stvx v26,r10,$sp - addi r10,r10,32 - stvx v27,r11,$sp - addi r11,r11,32 - stvx v28,r10,$sp - addi r10,r10,32 - stvx v29,r11,$sp - addi r11,r11,32 - stvx v30,r10,$sp - stvx v31,r11,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - subi $rounds,$rounds,3 # -4 in total - subi $len,$len,128 # bias - - lvx $rndkey0,$x00,$key # load key schedule - lvx v30,$x10,$key - addi $key,$key,0x20 - lvx v31,$x00,$key - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,`$FRAME+15` - mtctr $rounds - -Load_cbc_dec_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key - addi $key,$key,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_cbc_dec_key - - lvx v26,$x10,$key - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,`$FRAME+15` # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key - ?vperm v29,v29,v30,$keyperm - lvx $out0,$x70,$key # borrow $out0 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$out0,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - #lvx $inptail,0,$inp # "caller" already did this - #addi $inp,$inp,15 # 15 is not typo - subi $inp,$inp,15 # undo "caller" - - le?li $idx,8 - lvx_u $in0,$x00,$inp # load first 8 "words" - le?lvsl $inpperm,0,$idx - le?vspltisb $tmp,0x0f - lvx_u $in1,$x10,$inp - le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u - lvx_u $in2,$x20,$inp - le?vperm $in0,$in0,$in0,$inpperm - lvx_u $in3,$x30,$inp - le?vperm $in1,$in1,$in1,$inpperm - lvx_u $in4,$x40,$inp - le?vperm $in2,$in2,$in2,$inpperm - vxor $out0,$in0,$rndkey0 - lvx_u $in5,$x50,$inp - le?vperm $in3,$in3,$in3,$inpperm - vxor $out1,$in1,$rndkey0 - lvx_u $in6,$x60,$inp - le?vperm $in4,$in4,$in4,$inpperm - vxor $out2,$in2,$rndkey0 - lvx_u $in7,$x70,$inp - addi $inp,$inp,0x80 - le?vperm $in5,$in5,$in5,$inpperm - vxor $out3,$in3,$rndkey0 - le?vperm $in6,$in6,$in6,$inpperm - vxor $out4,$in4,$rndkey0 - le?vperm $in7,$in7,$in7,$inpperm - vxor $out5,$in5,$rndkey0 - vxor $out6,$in6,$rndkey0 - vxor $out7,$in7,$rndkey0 - - mtctr $rounds - b Loop_cbc_dec8x -.align 5 -Loop_cbc_dec8x: - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_cbc_dec8x - - subic $len,$len,128 # $len-=128 - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - - subfe. r0,r0,r0 # borrow?-1:0 - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - - and r0,r0,$len - vncipher $out0,$out0,v26 - vncipher $out1,$out1,v26 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - vncipher $out4,$out4,v26 - vncipher $out5,$out5,v26 - vncipher $out6,$out6,v26 - vncipher $out7,$out7,v26 - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in7 are loaded - # with last "words" - vncipher $out0,$out0,v27 - vncipher $out1,$out1,v27 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vncipher $out4,$out4,v27 - vncipher $out5,$out5,v27 - vncipher $out6,$out6,v27 - vncipher $out7,$out7,v27 - - addi $key_,$sp,`$FRAME+15` # rewind $key_ - vncipher $out0,$out0,v28 - vncipher $out1,$out1,v28 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vncipher $out4,$out4,v28 - vncipher $out5,$out5,v28 - vncipher $out6,$out6,v28 - vncipher $out7,$out7,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vncipher $out0,$out0,v29 - vncipher $out1,$out1,v29 - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vncipher $out4,$out4,v29 - vncipher $out5,$out5,v29 - vncipher $out6,$out6,v29 - vncipher $out7,$out7,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - - vncipher $out0,$out0,v30 - vxor $ivec,$ivec,v31 # xor with last round key - vncipher $out1,$out1,v30 - vxor $in0,$in0,v31 - vncipher $out2,$out2,v30 - vxor $in1,$in1,v31 - vncipher $out3,$out3,v30 - vxor $in2,$in2,v31 - vncipher $out4,$out4,v30 - vxor $in3,$in3,v31 - vncipher $out5,$out5,v30 - vxor $in4,$in4,v31 - vncipher $out6,$out6,v30 - vxor $in5,$in5,v31 - vncipher $out7,$out7,v30 - vxor $in6,$in6,v31 - - vncipherlast $out0,$out0,$ivec - vncipherlast $out1,$out1,$in0 - lvx_u $in0,$x00,$inp # load next input block - vncipherlast $out2,$out2,$in1 - lvx_u $in1,$x10,$inp - vncipherlast $out3,$out3,$in2 - le?vperm $in0,$in0,$in0,$inpperm - lvx_u $in2,$x20,$inp - vncipherlast $out4,$out4,$in3 - le?vperm $in1,$in1,$in1,$inpperm - lvx_u $in3,$x30,$inp - vncipherlast $out5,$out5,$in4 - le?vperm $in2,$in2,$in2,$inpperm - lvx_u $in4,$x40,$inp - vncipherlast $out6,$out6,$in5 - le?vperm $in3,$in3,$in3,$inpperm - lvx_u $in5,$x50,$inp - vncipherlast $out7,$out7,$in6 - le?vperm $in4,$in4,$in4,$inpperm - lvx_u $in6,$x60,$inp - vmr $ivec,$in7 - le?vperm $in5,$in5,$in5,$inpperm - lvx_u $in7,$x70,$inp - addi $inp,$inp,0x80 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $in6,$in6,$in6,$inpperm - vxor $out0,$in0,$rndkey0 - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $in7,$in7,$in7,$inpperm - vxor $out1,$in1,$rndkey0 - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - vxor $out2,$in2,$rndkey0 - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - vxor $out3,$in3,$rndkey0 - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - vxor $out4,$in4,$rndkey0 - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x50,$out - vxor $out5,$in5,$rndkey0 - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x60,$out - vxor $out6,$in6,$rndkey0 - stvx_u $out7,$x70,$out - addi $out,$out,0x80 - vxor $out7,$in7,$rndkey0 - - mtctr $rounds - beq Loop_cbc_dec8x # did $len-=128 borrow? - - addic. $len,$len,128 - beq Lcbc_dec8x_done - nop - nop - -Loop_cbc_dec8x_tail: # up to 7 "words" tail... - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_cbc_dec8x_tail - - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - - vncipher $out1,$out1,v26 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - vncipher $out4,$out4,v26 - vncipher $out5,$out5,v26 - vncipher $out6,$out6,v26 - vncipher $out7,$out7,v26 - - vncipher $out1,$out1,v27 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vncipher $out4,$out4,v27 - vncipher $out5,$out5,v27 - vncipher $out6,$out6,v27 - vncipher $out7,$out7,v27 - - vncipher $out1,$out1,v28 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vncipher $out4,$out4,v28 - vncipher $out5,$out5,v28 - vncipher $out6,$out6,v28 - vncipher $out7,$out7,v28 - - vncipher $out1,$out1,v29 - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vncipher $out4,$out4,v29 - vncipher $out5,$out5,v29 - vncipher $out6,$out6,v29 - vncipher $out7,$out7,v29 - - vncipher $out1,$out1,v30 - vxor $ivec,$ivec,v31 # last round key - vncipher $out2,$out2,v30 - vxor $in1,$in1,v31 - vncipher $out3,$out3,v30 - vxor $in2,$in2,v31 - vncipher $out4,$out4,v30 - vxor $in3,$in3,v31 - vncipher $out5,$out5,v30 - vxor $in4,$in4,v31 - vncipher $out6,$out6,v30 - vxor $in5,$in5,v31 - vncipher $out7,$out7,v30 - vxor $in6,$in6,v31 - - cmplwi $len,32 # switch($len) - blt Lcbc_dec8x_one - nop - beq Lcbc_dec8x_two - cmplwi $len,64 - blt Lcbc_dec8x_three - nop - beq Lcbc_dec8x_four - cmplwi $len,96 - blt Lcbc_dec8x_five - nop - beq Lcbc_dec8x_six - -Lcbc_dec8x_seven: - vncipherlast $out1,$out1,$ivec - vncipherlast $out2,$out2,$in1 - vncipherlast $out3,$out3,$in2 - vncipherlast $out4,$out4,$in3 - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out1,$out1,$out1,$inpperm - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x00,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x10,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x20,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x30,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x40,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x50,$out - stvx_u $out7,$x60,$out - addi $out,$out,0x70 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_six: - vncipherlast $out2,$out2,$ivec - vncipherlast $out3,$out3,$in2 - vncipherlast $out4,$out4,$in3 - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out2,$out2,$out2,$inpperm - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x00,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x10,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x20,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x30,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x40,$out - stvx_u $out7,$x50,$out - addi $out,$out,0x60 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_five: - vncipherlast $out3,$out3,$ivec - vncipherlast $out4,$out4,$in3 - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out3,$out3,$out3,$inpperm - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x00,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x10,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x20,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x30,$out - stvx_u $out7,$x40,$out - addi $out,$out,0x50 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_four: - vncipherlast $out4,$out4,$ivec - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out4,$out4,$out4,$inpperm - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x00,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x10,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x20,$out - stvx_u $out7,$x30,$out - addi $out,$out,0x40 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_three: - vncipherlast $out5,$out5,$ivec - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out5,$out5,$out5,$inpperm - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x00,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x10,$out - stvx_u $out7,$x20,$out - addi $out,$out,0x30 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_two: - vncipherlast $out6,$out6,$ivec - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out6,$out6,$out6,$inpperm - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x00,$out - stvx_u $out7,$x10,$out - addi $out,$out,0x20 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_one: - vncipherlast $out7,$out7,$ivec - vmr $ivec,$in7 - - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out7,0,$out - addi $out,$out,0x10 - -Lcbc_dec8x_done: - le?vperm $ivec,$ivec,$ivec,$inpperm - stvx_u $ivec,0,$ivp # write [unaligned] iv - - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $inpperm,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x04,0,0x80,6,6,0 - .long 0 -.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt -___ -}} }}} - -######################################################################### -{{{ # CTR procedure[s] # -my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); -my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); -my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= - map("v$_",(4..11)); -my $dat=$tmp; - -$code.=<<___; -.globl .${prefix}_ctr32_encrypt_blocks -.align 5 -.${prefix}_ctr32_encrypt_blocks: - ${UCMP}i $len,1 - bltlr- - - lis r0,0xfff0 - mfspr $vrsave,256 - mtspr 256,r0 - - li $idx,15 - vxor $rndkey0,$rndkey0,$rndkey0 - le?vspltisb $tmp,0x0f - - lvx $ivec,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - vspltisb $one,1 - le?vxor $inpperm,$inpperm,$tmp - vperm $ivec,$ivec,$inptail,$inpperm - vsldoi $one,$rndkey0,$one,1 - - neg r11,$inp - ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) - - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inptail,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - srwi $rounds,$rounds,1 - li $idx,16 - subi $rounds,$rounds,1 - - ${UCMP}i $len,8 - bge _aesp8_ctr32_encrypt8x - - ?lvsr $outperm,0,$out # prepare for unaligned store - vspltisb $outmask,-1 - lvx $outhead,0,$out - ?vperm $outmask,$rndkey0,$outmask,$outperm - le?vxor $outperm,$outperm,$tmp - - lvx $rndkey0,0,$key - mtctr $rounds - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$ivec,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - b Loop_ctr32_enc - -.align 5 -Loop_ctr32_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - bdnz Loop_ctr32_enc - - vadduwm $ivec,$ivec,$one - vmr $dat,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - subic. $len,$len,1 # blocks-- - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - vperm $dat,$dat,$inptail,$inpperm - li $idx,16 - ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm - lvx $rndkey0,0,$key - vxor $dat,$dat,$rndkey1 # last round key - vcipherlast $inout,$inout,$dat - - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - vperm $inout,$inout,$inout,$outperm - vsel $dat,$outhead,$inout,$outmask - mtctr $rounds - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vmr $outhead,$inout - vxor $inout,$ivec,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - stvx $dat,0,$out - addi $out,$out,16 - bne Loop_ctr32_enc - - addi $out,$out,-1 - lvx $inout,0,$out # redundant in aligned case - vsel $inout,$outhead,$inout,$outmask - stvx $inout,0,$out - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,6,0 - .long 0 -___ -######################################################################### -{{ # Optimized CTR procedure # -my $key_="r11"; -my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); - $x00=0 if ($flavour =~ /osx/); -my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); -my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); -my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys - # v26-v31 last 6 round keys -my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment -my ($two,$three,$four)=($outhead,$outperm,$outmask); - -$code.=<<___; -.align 5 -_aesp8_ctr32_encrypt8x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - li r10,`$FRAME+8*16+15` - li r11,`$FRAME+8*16+31` - stvx v20,r10,$sp # ABI says so - addi r10,r10,32 - stvx v21,r11,$sp - addi r11,r11,32 - stvx v22,r10,$sp - addi r10,r10,32 - stvx v23,r11,$sp - addi r11,r11,32 - stvx v24,r10,$sp - addi r10,r10,32 - stvx v25,r11,$sp - addi r11,r11,32 - stvx v26,r10,$sp - addi r10,r10,32 - stvx v27,r11,$sp - addi r11,r11,32 - stvx v28,r10,$sp - addi r10,r10,32 - stvx v29,r11,$sp - addi r11,r11,32 - stvx v30,r10,$sp - stvx v31,r11,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - subi $rounds,$rounds,3 # -4 in total - - lvx $rndkey0,$x00,$key # load key schedule - lvx v30,$x10,$key - addi $key,$key,0x20 - lvx v31,$x00,$key - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,`$FRAME+15` - mtctr $rounds - -Load_ctr32_enc_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key - addi $key,$key,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_ctr32_enc_key - - lvx v26,$x10,$key - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,`$FRAME+15` # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key - ?vperm v29,v29,v30,$keyperm - lvx $out0,$x70,$key # borrow $out0 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$out0,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - vadduwm $two,$one,$one - subi $inp,$inp,15 # undo "caller" - $SHL $len,$len,4 - - vadduwm $out1,$ivec,$one # counter values ... - vadduwm $out2,$ivec,$two - vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] - le?li $idx,8 - vadduwm $out3,$out1,$two - vxor $out1,$out1,$rndkey0 - le?lvsl $inpperm,0,$idx - vadduwm $out4,$out2,$two - vxor $out2,$out2,$rndkey0 - le?vspltisb $tmp,0x0f - vadduwm $out5,$out3,$two - vxor $out3,$out3,$rndkey0 - le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u - vadduwm $out6,$out4,$two - vxor $out4,$out4,$rndkey0 - vadduwm $out7,$out5,$two - vxor $out5,$out5,$rndkey0 - vadduwm $ivec,$out6,$two # next counter value - vxor $out6,$out6,$rndkey0 - vxor $out7,$out7,$rndkey0 - - mtctr $rounds - b Loop_ctr32_enc8x -.align 5 -Loop_ctr32_enc8x: - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - vcipher $out6,$out6,v24 - vcipher $out7,$out7,v24 -Loop_ctr32_enc8x_middle: - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - vcipher $out6,$out6,v25 - vcipher $out7,$out7,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_ctr32_enc8x - - subic r11,$len,256 # $len-256, borrow $key_ - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - vcipher $out6,$out6,v24 - vcipher $out7,$out7,v24 - - subfe r0,r0,r0 # borrow?-1:0 - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - vcipher $out6,$out6,v25 - vcipher $out7,$out7,v25 - - and r0,r0,r11 - addi $key_,$sp,`$FRAME+15` # rewind $key_ - vcipher $out0,$out0,v26 - vcipher $out1,$out1,v26 - vcipher $out2,$out2,v26 - vcipher $out3,$out3,v26 - vcipher $out4,$out4,v26 - vcipher $out5,$out5,v26 - vcipher $out6,$out6,v26 - vcipher $out7,$out7,v26 - lvx v24,$x00,$key_ # re-pre-load round[1] - - subic $len,$len,129 # $len-=129 - vcipher $out0,$out0,v27 - addi $len,$len,1 # $len-=128 really - vcipher $out1,$out1,v27 - vcipher $out2,$out2,v27 - vcipher $out3,$out3,v27 - vcipher $out4,$out4,v27 - vcipher $out5,$out5,v27 - vcipher $out6,$out6,v27 - vcipher $out7,$out7,v27 - lvx v25,$x10,$key_ # re-pre-load round[2] - - vcipher $out0,$out0,v28 - lvx_u $in0,$x00,$inp # load input - vcipher $out1,$out1,v28 - lvx_u $in1,$x10,$inp - vcipher $out2,$out2,v28 - lvx_u $in2,$x20,$inp - vcipher $out3,$out3,v28 - lvx_u $in3,$x30,$inp - vcipher $out4,$out4,v28 - lvx_u $in4,$x40,$inp - vcipher $out5,$out5,v28 - lvx_u $in5,$x50,$inp - vcipher $out6,$out6,v28 - lvx_u $in6,$x60,$inp - vcipher $out7,$out7,v28 - lvx_u $in7,$x70,$inp - addi $inp,$inp,0x80 - - vcipher $out0,$out0,v29 - le?vperm $in0,$in0,$in0,$inpperm - vcipher $out1,$out1,v29 - le?vperm $in1,$in1,$in1,$inpperm - vcipher $out2,$out2,v29 - le?vperm $in2,$in2,$in2,$inpperm - vcipher $out3,$out3,v29 - le?vperm $in3,$in3,$in3,$inpperm - vcipher $out4,$out4,v29 - le?vperm $in4,$in4,$in4,$inpperm - vcipher $out5,$out5,v29 - le?vperm $in5,$in5,$in5,$inpperm - vcipher $out6,$out6,v29 - le?vperm $in6,$in6,$in6,$inpperm - vcipher $out7,$out7,v29 - le?vperm $in7,$in7,$in7,$inpperm - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in7 are loaded - # with last "words" - subfe. r0,r0,r0 # borrow?-1:0 - vcipher $out0,$out0,v30 - vxor $in0,$in0,v31 # xor with last round key - vcipher $out1,$out1,v30 - vxor $in1,$in1,v31 - vcipher $out2,$out2,v30 - vxor $in2,$in2,v31 - vcipher $out3,$out3,v30 - vxor $in3,$in3,v31 - vcipher $out4,$out4,v30 - vxor $in4,$in4,v31 - vcipher $out5,$out5,v30 - vxor $in5,$in5,v31 - vcipher $out6,$out6,v30 - vxor $in6,$in6,v31 - vcipher $out7,$out7,v30 - vxor $in7,$in7,v31 - - bne Lctr32_enc8x_break # did $len-129 borrow? - - vcipherlast $in0,$out0,$in0 - vcipherlast $in1,$out1,$in1 - vadduwm $out1,$ivec,$one # counter values ... - vcipherlast $in2,$out2,$in2 - vadduwm $out2,$ivec,$two - vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] - vcipherlast $in3,$out3,$in3 - vadduwm $out3,$out1,$two - vxor $out1,$out1,$rndkey0 - vcipherlast $in4,$out4,$in4 - vadduwm $out4,$out2,$two - vxor $out2,$out2,$rndkey0 - vcipherlast $in5,$out5,$in5 - vadduwm $out5,$out3,$two - vxor $out3,$out3,$rndkey0 - vcipherlast $in6,$out6,$in6 - vadduwm $out6,$out4,$two - vxor $out4,$out4,$rndkey0 - vcipherlast $in7,$out7,$in7 - vadduwm $out7,$out5,$two - vxor $out5,$out5,$rndkey0 - le?vperm $in0,$in0,$in0,$inpperm - vadduwm $ivec,$out6,$two # next counter value - vxor $out6,$out6,$rndkey0 - le?vperm $in1,$in1,$in1,$inpperm - vxor $out7,$out7,$rndkey0 - mtctr $rounds - - vcipher $out0,$out0,v24 - stvx_u $in0,$x00,$out - le?vperm $in2,$in2,$in2,$inpperm - vcipher $out1,$out1,v24 - stvx_u $in1,$x10,$out - le?vperm $in3,$in3,$in3,$inpperm - vcipher $out2,$out2,v24 - stvx_u $in2,$x20,$out - le?vperm $in4,$in4,$in4,$inpperm - vcipher $out3,$out3,v24 - stvx_u $in3,$x30,$out - le?vperm $in5,$in5,$in5,$inpperm - vcipher $out4,$out4,v24 - stvx_u $in4,$x40,$out - le?vperm $in6,$in6,$in6,$inpperm - vcipher $out5,$out5,v24 - stvx_u $in5,$x50,$out - le?vperm $in7,$in7,$in7,$inpperm - vcipher $out6,$out6,v24 - stvx_u $in6,$x60,$out - vcipher $out7,$out7,v24 - stvx_u $in7,$x70,$out - addi $out,$out,0x80 - - b Loop_ctr32_enc8x_middle - -.align 5 -Lctr32_enc8x_break: - cmpwi $len,-0x60 - blt Lctr32_enc8x_one - nop - beq Lctr32_enc8x_two - cmpwi $len,-0x40 - blt Lctr32_enc8x_three - nop - beq Lctr32_enc8x_four - cmpwi $len,-0x20 - blt Lctr32_enc8x_five - nop - beq Lctr32_enc8x_six - cmpwi $len,0x00 - blt Lctr32_enc8x_seven - -Lctr32_enc8x_eight: - vcipherlast $out0,$out0,$in0 - vcipherlast $out1,$out1,$in1 - vcipherlast $out2,$out2,$in2 - vcipherlast $out3,$out3,$in3 - vcipherlast $out4,$out4,$in4 - vcipherlast $out5,$out5,$in5 - vcipherlast $out6,$out6,$in6 - vcipherlast $out7,$out7,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x50,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x60,$out - stvx_u $out7,$x70,$out - addi $out,$out,0x80 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_seven: - vcipherlast $out0,$out0,$in1 - vcipherlast $out1,$out1,$in2 - vcipherlast $out2,$out2,$in3 - vcipherlast $out3,$out3,$in4 - vcipherlast $out4,$out4,$in5 - vcipherlast $out5,$out5,$in6 - vcipherlast $out6,$out6,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x50,$out - stvx_u $out6,$x60,$out - addi $out,$out,0x70 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_six: - vcipherlast $out0,$out0,$in2 - vcipherlast $out1,$out1,$in3 - vcipherlast $out2,$out2,$in4 - vcipherlast $out3,$out3,$in5 - vcipherlast $out4,$out4,$in6 - vcipherlast $out5,$out5,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - stvx_u $out5,$x50,$out - addi $out,$out,0x60 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_five: - vcipherlast $out0,$out0,$in3 - vcipherlast $out1,$out1,$in4 - vcipherlast $out2,$out2,$in5 - vcipherlast $out3,$out3,$in6 - vcipherlast $out4,$out4,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - stvx_u $out4,$x40,$out - addi $out,$out,0x50 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_four: - vcipherlast $out0,$out0,$in4 - vcipherlast $out1,$out1,$in5 - vcipherlast $out2,$out2,$in6 - vcipherlast $out3,$out3,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - stvx_u $out3,$x30,$out - addi $out,$out,0x40 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_three: - vcipherlast $out0,$out0,$in5 - vcipherlast $out1,$out1,$in6 - vcipherlast $out2,$out2,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - stvx_u $out2,$x20,$out - addi $out,$out,0x30 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_two: - vcipherlast $out0,$out0,$in6 - vcipherlast $out1,$out1,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - stvx_u $out1,$x10,$out - addi $out,$out,0x20 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_one: - vcipherlast $out0,$out0,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - stvx_u $out0,0,$out - addi $out,$out,0x10 - -Lctr32_enc8x_done: - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $inpperm,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x04,0,0x80,6,6,0 - .long 0 -.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks -___ -}} }}} - -######################################################################### -{{{ # XTS procedures # -# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # -# const AES_KEY *key1, const AES_KEY *key2, # -# [const] unsigned char iv[16]); # -# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # -# input tweak value is assumed to be encrypted already, and last tweak # -# value, one suitable for consecutive call on same chunk of data, is # -# written back to original buffer. In addition, in "tweak chaining" # -# mode only complete input blocks are processed. # - -my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); -my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); -my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); -my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); -my $taillen = $key2; - - ($inp,$idx) = ($idx,$inp); # reassign - -$code.=<<___; -.globl .${prefix}_xts_encrypt -.align 5 -.${prefix}_xts_encrypt: - mr $inp,r3 # reassign - li r3,-1 - ${UCMP}i $len,16 - bltlr- - - lis r0,0xfff0 - mfspr r12,256 # save vrsave - li r11,0 - mtspr 256,r0 - - vspltisb $seven,0x07 # 0x070707..07 - le?lvsl $leperm,r11,r11 - le?vspltisb $tmp,0x0f - le?vxor $leperm,$leperm,$seven - - li $idx,15 - lvx $tweak,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - le?vxor $inpperm,$inpperm,$tmp - vperm $tweak,$tweak,$inptail,$inpperm - - neg r11,$inp - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inout,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - ${UCMP}i $key2,0 # key2==NULL? - beq Lxts_enc_no_key2 - - ?lvsl $keyperm,0,$key2 # prepare for unaligned key - lwz $rounds,240($key2) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - lvx $rndkey0,0,$key2 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - mtctr $rounds - -Ltweak_xts_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - bdnz Ltweak_xts_enc - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipherlast $tweak,$tweak,$rndkey0 - - li $ivp,0 # don't chain the tweak - b Lxts_enc - -Lxts_enc_no_key2: - li $idx,-16 - and $len,$len,$idx # in "tweak chaining" - # mode only complete - # blocks are processed -Lxts_enc: - lvx $inptail,0,$inp - addi $inp,$inp,16 - - ?lvsl $keyperm,0,$key1 # prepare for unaligned key - lwz $rounds,240($key1) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - vslb $eighty7,$seven,$seven # 0x808080..80 - vor $eighty7,$eighty7,$seven # 0x878787..87 - vspltisb $tmp,1 # 0x010101..01 - vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 - - ${UCMP}i $len,96 - bge _aesp8_xts_encrypt6x - - andi. $taillen,$len,15 - subic r0,$len,32 - subi $taillen,$taillen,16 - subfe r0,r0,r0 - and r0,r0,$taillen - add $inp,$inp,r0 - - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - mtctr $rounds - b Loop_xts_enc - -.align 5 -Loop_xts_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - bdnz Loop_xts_enc - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $rndkey0,$rndkey0,$tweak - vcipherlast $output,$inout,$rndkey0 - - le?vperm $tmp,$output,$output,$leperm - be?nop - le?stvx_u $tmp,0,$out - be?stvx_u $output,0,$out - addi $out,$out,16 - - subic. $len,$len,16 - beq Lxts_enc_done - - vmr $inout,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - - subic r0,$len,32 - subfe r0,r0,r0 - and r0,r0,$taillen - add $inp,$inp,r0 - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $output,$output,$rndkey0 # just in case $len<16 - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - - mtctr $rounds - ${UCMP}i $len,16 - bge Loop_xts_enc - - vxor $output,$output,$tweak - lvsr $inpperm,0,$len # $inpperm is no longer needed - vxor $inptail,$inptail,$inptail # $inptail is no longer needed - vspltisb $tmp,-1 - vperm $inptail,$inptail,$tmp,$inpperm - vsel $inout,$inout,$output,$inptail - - subi r11,$out,17 - subi $out,$out,16 - mtctr $len - li $len,16 -Loop_xts_enc_steal: - lbzu r0,1(r11) - stb r0,16(r11) - bdnz Loop_xts_enc_steal - - mtctr $rounds - b Loop_xts_enc # one more time... - -Lxts_enc_done: - ${UCMP}i $ivp,0 - beq Lxts_enc_ret - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_enc_ret: - mtspr 256,r12 # restore vrsave - li r3,0 - blr - .long 0 - .byte 0,12,0x04,0,0x80,6,6,0 - .long 0 -.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt - -.globl .${prefix}_xts_decrypt -.align 5 -.${prefix}_xts_decrypt: - mr $inp,r3 # reassign - li r3,-1 - ${UCMP}i $len,16 - bltlr- - - lis r0,0xfff8 - mfspr r12,256 # save vrsave - li r11,0 - mtspr 256,r0 - - andi. r0,$len,15 - neg r0,r0 - andi. r0,r0,16 - sub $len,$len,r0 - - vspltisb $seven,0x07 # 0x070707..07 - le?lvsl $leperm,r11,r11 - le?vspltisb $tmp,0x0f - le?vxor $leperm,$leperm,$seven - - li $idx,15 - lvx $tweak,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - le?vxor $inpperm,$inpperm,$tmp - vperm $tweak,$tweak,$inptail,$inpperm - - neg r11,$inp - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inout,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - ${UCMP}i $key2,0 # key2==NULL? - beq Lxts_dec_no_key2 - - ?lvsl $keyperm,0,$key2 # prepare for unaligned key - lwz $rounds,240($key2) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - lvx $rndkey0,0,$key2 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - mtctr $rounds - -Ltweak_xts_dec: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - bdnz Ltweak_xts_dec - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipherlast $tweak,$tweak,$rndkey0 - - li $ivp,0 # don't chain the tweak - b Lxts_dec - -Lxts_dec_no_key2: - neg $idx,$len - andi. $idx,$idx,15 - add $len,$len,$idx # in "tweak chaining" - # mode only complete - # blocks are processed -Lxts_dec: - lvx $inptail,0,$inp - addi $inp,$inp,16 - - ?lvsl $keyperm,0,$key1 # prepare for unaligned key - lwz $rounds,240($key1) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - vslb $eighty7,$seven,$seven # 0x808080..80 - vor $eighty7,$eighty7,$seven # 0x878787..87 - vspltisb $tmp,1 # 0x010101..01 - vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 - - ${UCMP}i $len,96 - bge _aesp8_xts_decrypt6x - - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - mtctr $rounds - - ${UCMP}i $len,16 - blt Ltail_xts_dec - be?b Loop_xts_dec - -.align 5 -Loop_xts_dec: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - bdnz Loop_xts_dec - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $rndkey0,$rndkey0,$tweak - vncipherlast $output,$inout,$rndkey0 - - le?vperm $tmp,$output,$output,$leperm - be?nop - le?stvx_u $tmp,0,$out - be?stvx_u $output,0,$out - addi $out,$out,16 - - subic. $len,$len,16 - beq Lxts_dec_done - - vmr $inout,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - - mtctr $rounds - ${UCMP}i $len,16 - bge Loop_xts_dec - -Ltail_xts_dec: - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak1,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak1,$tweak1,$tmp - - subi $inp,$inp,16 - add $inp,$inp,$len - - vxor $inout,$inout,$tweak # :-( - vxor $inout,$inout,$tweak1 # :-) - -Loop_xts_dec_short: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - bdnz Loop_xts_dec_short - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $rndkey0,$rndkey0,$tweak1 - vncipherlast $output,$inout,$rndkey0 - - le?vperm $tmp,$output,$output,$leperm - be?nop - le?stvx_u $tmp,0,$out - be?stvx_u $output,0,$out - - vmr $inout,$inptail - lvx $inptail,0,$inp - #addi $inp,$inp,16 - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - - lvsr $inpperm,0,$len # $inpperm is no longer needed - vxor $inptail,$inptail,$inptail # $inptail is no longer needed - vspltisb $tmp,-1 - vperm $inptail,$inptail,$tmp,$inpperm - vsel $inout,$inout,$output,$inptail - - vxor $rndkey0,$rndkey0,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - - subi r11,$out,1 - mtctr $len - li $len,16 -Loop_xts_dec_steal: - lbzu r0,1(r11) - stb r0,16(r11) - bdnz Loop_xts_dec_steal - - mtctr $rounds - b Loop_xts_dec # one more time... - -Lxts_dec_done: - ${UCMP}i $ivp,0 - beq Lxts_dec_ret - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_dec_ret: - mtspr 256,r12 # restore vrsave - li r3,0 - blr - .long 0 - .byte 0,12,0x04,0,0x80,6,6,0 - .long 0 -.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt -___ -######################################################################### -{{ # Optimized XTS procedures # -my $key_=$key2; -my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); - $x00=0 if ($flavour =~ /osx/); -my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); -my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); -my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); -my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys - # v26-v31 last 6 round keys -my ($keyperm)=($out0); # aliases with "caller", redundant assignment -my $taillen=$x70; - -$code.=<<___; -.align 5 -_aesp8_xts_encrypt6x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - mflr r11 - li r7,`$FRAME+8*16+15` - li r3,`$FRAME+8*16+31` - $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) - stvx v20,r7,$sp # ABI says so - addi r7,r7,32 - stvx v21,r3,$sp - addi r3,r3,32 - stvx v22,r7,$sp - addi r7,r7,32 - stvx v23,r3,$sp - addi r3,r3,32 - stvx v24,r7,$sp - addi r7,r7,32 - stvx v25,r3,$sp - addi r3,r3,32 - stvx v26,r7,$sp - addi r7,r7,32 - stvx v27,r3,$sp - addi r3,r3,32 - stvx v28,r7,$sp - addi r7,r7,32 - stvx v29,r3,$sp - addi r3,r3,32 - stvx v30,r7,$sp - stvx v31,r3,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - subi $rounds,$rounds,3 # -4 in total - - lvx $rndkey0,$x00,$key1 # load key schedule - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - lvx v31,$x00,$key1 - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,`$FRAME+15` - mtctr $rounds - -Load_xts_enc_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key1 - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_xts_enc_key - - lvx v26,$x10,$key1 - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key1 - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key1 - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,`$FRAME+15` # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key1 - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key1 - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key1 - ?vperm v29,v29,v30,$keyperm - lvx $twk5,$x70,$key1 # borrow $twk5 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$twk5,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - vperm $in0,$inout,$inptail,$inpperm - subi $inp,$inp,31 # undo "caller" - vxor $twk0,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $out0,$in0,$twk0 - vxor $tweak,$tweak,$tmp - - lvx_u $in1,$x10,$inp - vxor $twk1,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - le?vperm $in1,$in1,$in1,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out1,$in1,$twk1 - vxor $tweak,$tweak,$tmp - - lvx_u $in2,$x20,$inp - andi. $taillen,$len,15 - vxor $twk2,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - le?vperm $in2,$in2,$in2,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out2,$in2,$twk2 - vxor $tweak,$tweak,$tmp - - lvx_u $in3,$x30,$inp - sub $len,$len,$taillen - vxor $twk3,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - le?vperm $in3,$in3,$in3,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out3,$in3,$twk3 - vxor $tweak,$tweak,$tmp - - lvx_u $in4,$x40,$inp - subi $len,$len,0x60 - vxor $twk4,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - le?vperm $in4,$in4,$in4,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out4,$in4,$twk4 - vxor $tweak,$tweak,$tmp - - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - vxor $twk5,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - le?vperm $in5,$in5,$in5,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out5,$in5,$twk5 - vxor $tweak,$tweak,$tmp - - vxor v31,v31,$rndkey0 - mtctr $rounds - b Loop_xts_enc6x - -.align 5 -Loop_xts_enc6x: - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_enc6x - - subic $len,$len,96 # $len-=96 - vxor $in0,$twk0,v31 # xor with last round key - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk0,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vsldoi $tmp,$tmp,$tmp,15 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - - subfe. r0,r0,r0 # borrow?-1:0 - vand $tmp,$tmp,$eighty7 - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vxor $tweak,$tweak,$tmp - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vxor $in1,$twk1,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk1,$tweak,$rndkey0 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - - and r0,r0,$len - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vcipher $out0,$out0,v26 - vcipher $out1,$out1,v26 - vand $tmp,$tmp,$eighty7 - vcipher $out2,$out2,v26 - vcipher $out3,$out3,v26 - vxor $tweak,$tweak,$tmp - vcipher $out4,$out4,v26 - vcipher $out5,$out5,v26 - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in5 are loaded - # with last "words" - vxor $in2,$twk2,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk2,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vcipher $out0,$out0,v27 - vcipher $out1,$out1,v27 - vsldoi $tmp,$tmp,$tmp,15 - vcipher $out2,$out2,v27 - vcipher $out3,$out3,v27 - vand $tmp,$tmp,$eighty7 - vcipher $out4,$out4,v27 - vcipher $out5,$out5,v27 - - addi $key_,$sp,`$FRAME+15` # rewind $key_ - vxor $tweak,$tweak,$tmp - vcipher $out0,$out0,v28 - vcipher $out1,$out1,v28 - vxor $in3,$twk3,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk3,$tweak,$rndkey0 - vcipher $out2,$out2,v28 - vcipher $out3,$out3,v28 - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vcipher $out4,$out4,v28 - vcipher $out5,$out5,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vand $tmp,$tmp,$eighty7 - - vcipher $out0,$out0,v29 - vcipher $out1,$out1,v29 - vxor $tweak,$tweak,$tmp - vcipher $out2,$out2,v29 - vcipher $out3,$out3,v29 - vxor $in4,$twk4,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk4,$tweak,$rndkey0 - vcipher $out4,$out4,v29 - vcipher $out5,$out5,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - - vcipher $out0,$out0,v30 - vcipher $out1,$out1,v30 - vand $tmp,$tmp,$eighty7 - vcipher $out2,$out2,v30 - vcipher $out3,$out3,v30 - vxor $tweak,$tweak,$tmp - vcipher $out4,$out4,v30 - vcipher $out5,$out5,v30 - vxor $in5,$twk5,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk5,$tweak,$rndkey0 - - vcipherlast $out0,$out0,$in0 - lvx_u $in0,$x00,$inp # load next input block - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vcipherlast $out1,$out1,$in1 - lvx_u $in1,$x10,$inp - vcipherlast $out2,$out2,$in2 - le?vperm $in0,$in0,$in0,$leperm - lvx_u $in2,$x20,$inp - vand $tmp,$tmp,$eighty7 - vcipherlast $out3,$out3,$in3 - le?vperm $in1,$in1,$in1,$leperm - lvx_u $in3,$x30,$inp - vcipherlast $out4,$out4,$in4 - le?vperm $in2,$in2,$in2,$leperm - lvx_u $in4,$x40,$inp - vxor $tweak,$tweak,$tmp - vcipherlast $tmp,$out5,$in5 # last block might be needed - # in stealing mode - le?vperm $in3,$in3,$in3,$leperm - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - le?vperm $in4,$in4,$in4,$leperm - le?vperm $in5,$in5,$in5,$leperm - - le?vperm $out0,$out0,$out0,$leperm - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk0 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - vxor $out1,$in1,$twk1 - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - vxor $out2,$in2,$twk2 - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - vxor $out3,$in3,$twk3 - le?vperm $out5,$tmp,$tmp,$leperm - stvx_u $out4,$x40,$out - vxor $out4,$in4,$twk4 - le?stvx_u $out5,$x50,$out - be?stvx_u $tmp, $x50,$out - vxor $out5,$in5,$twk5 - addi $out,$out,0x60 - - mtctr $rounds - beq Loop_xts_enc6x # did $len-=96 borrow? - - addic. $len,$len,0x60 - beq Lxts_enc6x_zero - cmpwi $len,0x20 - blt Lxts_enc6x_one - nop - beq Lxts_enc6x_two - cmpwi $len,0x40 - blt Lxts_enc6x_three - nop - beq Lxts_enc6x_four - -Lxts_enc6x_five: - vxor $out0,$in1,$twk0 - vxor $out1,$in2,$twk1 - vxor $out2,$in3,$twk2 - vxor $out3,$in4,$twk3 - vxor $out4,$in5,$twk4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk5 # unused tweak - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - vxor $tmp,$out4,$twk5 # last block prep for stealing - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - stvx_u $out4,$x40,$out - addi $out,$out,0x50 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_four: - vxor $out0,$in2,$twk0 - vxor $out1,$in3,$twk1 - vxor $out2,$in4,$twk2 - vxor $out3,$in5,$twk3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk4 # unused tweak - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - vxor $tmp,$out3,$twk4 # last block prep for stealing - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - stvx_u $out3,$x30,$out - addi $out,$out,0x40 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_three: - vxor $out0,$in3,$twk0 - vxor $out1,$in4,$twk1 - vxor $out2,$in5,$twk2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk3 # unused tweak - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $tmp,$out2,$twk3 # last block prep for stealing - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - stvx_u $out2,$x20,$out - addi $out,$out,0x30 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_two: - vxor $out0,$in4,$twk0 - vxor $out1,$in5,$twk1 - vxor $out2,$out2,$out2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk2 # unused tweak - vxor $tmp,$out1,$twk2 # last block prep for stealing - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - stvx_u $out1,$x10,$out - addi $out,$out,0x20 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_one: - vxor $out0,$in5,$twk0 - nop -Loop_xts_enc1x: - vcipher $out0,$out0,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_enc1x - - add $inp,$inp,$taillen - cmpwi $taillen,0 - vcipher $out0,$out0,v24 - - subi $inp,$inp,16 - vcipher $out0,$out0,v25 - - lvsr $inpperm,0,$taillen - vcipher $out0,$out0,v26 - - lvx_u $in0,0,$inp - vcipher $out0,$out0,v27 - - addi $key_,$sp,`$FRAME+15` # rewind $key_ - vcipher $out0,$out0,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vcipher $out0,$out0,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $twk0,$twk0,v31 - - le?vperm $in0,$in0,$in0,$leperm - vcipher $out0,$out0,v30 - - vperm $in0,$in0,$in0,$inpperm - vcipherlast $out0,$out0,$twk0 - - vmr $twk0,$twk1 # unused tweak - vxor $tmp,$out0,$twk1 # last block prep for stealing - le?vperm $out0,$out0,$out0,$leperm - stvx_u $out0,$x00,$out # store output - addi $out,$out,0x10 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_zero: - cmpwi $taillen,0 - beq Lxts_enc6x_done - - add $inp,$inp,$taillen - subi $inp,$inp,16 - lvx_u $in0,0,$inp - lvsr $inpperm,0,$taillen # $in5 is no more - le?vperm $in0,$in0,$in0,$leperm - vperm $in0,$in0,$in0,$inpperm - vxor $tmp,$tmp,$twk0 -Lxts_enc6x_steal: - vxor $in0,$in0,$twk0 - vxor $out0,$out0,$out0 - vspltisb $out1,-1 - vperm $out0,$out0,$out1,$inpperm - vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? - - subi r30,$out,17 - subi $out,$out,16 - mtctr $taillen -Loop_xts_enc6x_steal: - lbzu r0,1(r30) - stb r0,16(r30) - bdnz Loop_xts_enc6x_steal - - li $taillen,0 - mtctr $rounds - b Loop_xts_enc1x # one more time... - -.align 4 -Lxts_enc6x_done: - ${UCMP}i $ivp,0 - beq Lxts_enc6x_ret - - vxor $tweak,$twk0,$rndkey0 - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_enc6x_ret: - mtlr r11 - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $seven,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x04,1,0x80,6,6,0 - .long 0 - -.align 5 -_aesp8_xts_enc5x: - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - lvx v25,$x10,$key_ # round[4] - bdnz _aesp8_xts_enc5x - - add $inp,$inp,$taillen - cmpwi $taillen,0 - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - - subi $inp,$inp,16 - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vxor $twk0,$twk0,v31 - - vcipher $out0,$out0,v26 - lvsr $inpperm,0,$taillen # $in5 is no more - vcipher $out1,$out1,v26 - vcipher $out2,$out2,v26 - vcipher $out3,$out3,v26 - vcipher $out4,$out4,v26 - vxor $in1,$twk1,v31 - - vcipher $out0,$out0,v27 - lvx_u $in0,0,$inp - vcipher $out1,$out1,v27 - vcipher $out2,$out2,v27 - vcipher $out3,$out3,v27 - vcipher $out4,$out4,v27 - vxor $in2,$twk2,v31 - - addi $key_,$sp,`$FRAME+15` # rewind $key_ - vcipher $out0,$out0,v28 - vcipher $out1,$out1,v28 - vcipher $out2,$out2,v28 - vcipher $out3,$out3,v28 - vcipher $out4,$out4,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vxor $in3,$twk3,v31 - - vcipher $out0,$out0,v29 - le?vperm $in0,$in0,$in0,$leperm - vcipher $out1,$out1,v29 - vcipher $out2,$out2,v29 - vcipher $out3,$out3,v29 - vcipher $out4,$out4,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $in4,$twk4,v31 - - vcipher $out0,$out0,v30 - vperm $in0,$in0,$in0,$inpperm - vcipher $out1,$out1,v30 - vcipher $out2,$out2,v30 - vcipher $out3,$out3,v30 - vcipher $out4,$out4,v30 - - vcipherlast $out0,$out0,$twk0 - vcipherlast $out1,$out1,$in1 - vcipherlast $out2,$out2,$in2 - vcipherlast $out3,$out3,$in3 - vcipherlast $out4,$out4,$in4 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - -.align 5 -_aesp8_xts_decrypt6x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - mflr r11 - li r7,`$FRAME+8*16+15` - li r3,`$FRAME+8*16+31` - $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) - stvx v20,r7,$sp # ABI says so - addi r7,r7,32 - stvx v21,r3,$sp - addi r3,r3,32 - stvx v22,r7,$sp - addi r7,r7,32 - stvx v23,r3,$sp - addi r3,r3,32 - stvx v24,r7,$sp - addi r7,r7,32 - stvx v25,r3,$sp - addi r3,r3,32 - stvx v26,r7,$sp - addi r7,r7,32 - stvx v27,r3,$sp - addi r3,r3,32 - stvx v28,r7,$sp - addi r7,r7,32 - stvx v29,r3,$sp - addi r3,r3,32 - stvx v30,r7,$sp - stvx v31,r3,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - subi $rounds,$rounds,3 # -4 in total - - lvx $rndkey0,$x00,$key1 # load key schedule - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - lvx v31,$x00,$key1 - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,`$FRAME+15` - mtctr $rounds - -Load_xts_dec_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key1 - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_xts_dec_key - - lvx v26,$x10,$key1 - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key1 - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key1 - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,`$FRAME+15` # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key1 - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key1 - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key1 - ?vperm v29,v29,v30,$keyperm - lvx $twk5,$x70,$key1 # borrow $twk5 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$twk5,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - vperm $in0,$inout,$inptail,$inpperm - subi $inp,$inp,31 # undo "caller" - vxor $twk0,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $out0,$in0,$twk0 - vxor $tweak,$tweak,$tmp - - lvx_u $in1,$x10,$inp - vxor $twk1,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - le?vperm $in1,$in1,$in1,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out1,$in1,$twk1 - vxor $tweak,$tweak,$tmp - - lvx_u $in2,$x20,$inp - andi. $taillen,$len,15 - vxor $twk2,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - le?vperm $in2,$in2,$in2,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out2,$in2,$twk2 - vxor $tweak,$tweak,$tmp - - lvx_u $in3,$x30,$inp - sub $len,$len,$taillen - vxor $twk3,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - le?vperm $in3,$in3,$in3,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out3,$in3,$twk3 - vxor $tweak,$tweak,$tmp - - lvx_u $in4,$x40,$inp - subi $len,$len,0x60 - vxor $twk4,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - le?vperm $in4,$in4,$in4,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out4,$in4,$twk4 - vxor $tweak,$tweak,$tmp - - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - vxor $twk5,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - le?vperm $in5,$in5,$in5,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out5,$in5,$twk5 - vxor $tweak,$tweak,$tmp - - vxor v31,v31,$rndkey0 - mtctr $rounds - b Loop_xts_dec6x - -.align 5 -Loop_xts_dec6x: - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_dec6x - - subic $len,$len,96 # $len-=96 - vxor $in0,$twk0,v31 # xor with last round key - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk0,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vsldoi $tmp,$tmp,$tmp,15 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - - subfe. r0,r0,r0 # borrow?-1:0 - vand $tmp,$tmp,$eighty7 - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vxor $tweak,$tweak,$tmp - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vxor $in1,$twk1,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk1,$tweak,$rndkey0 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - - and r0,r0,$len - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vncipher $out0,$out0,v26 - vncipher $out1,$out1,v26 - vand $tmp,$tmp,$eighty7 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - vxor $tweak,$tweak,$tmp - vncipher $out4,$out4,v26 - vncipher $out5,$out5,v26 - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in5 are loaded - # with last "words" - vxor $in2,$twk2,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk2,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vncipher $out0,$out0,v27 - vncipher $out1,$out1,v27 - vsldoi $tmp,$tmp,$tmp,15 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vand $tmp,$tmp,$eighty7 - vncipher $out4,$out4,v27 - vncipher $out5,$out5,v27 - - addi $key_,$sp,`$FRAME+15` # rewind $key_ - vxor $tweak,$tweak,$tmp - vncipher $out0,$out0,v28 - vncipher $out1,$out1,v28 - vxor $in3,$twk3,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk3,$tweak,$rndkey0 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vncipher $out4,$out4,v28 - vncipher $out5,$out5,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vand $tmp,$tmp,$eighty7 - - vncipher $out0,$out0,v29 - vncipher $out1,$out1,v29 - vxor $tweak,$tweak,$tmp - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vxor $in4,$twk4,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk4,$tweak,$rndkey0 - vncipher $out4,$out4,v29 - vncipher $out5,$out5,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - - vncipher $out0,$out0,v30 - vncipher $out1,$out1,v30 - vand $tmp,$tmp,$eighty7 - vncipher $out2,$out2,v30 - vncipher $out3,$out3,v30 - vxor $tweak,$tweak,$tmp - vncipher $out4,$out4,v30 - vncipher $out5,$out5,v30 - vxor $in5,$twk5,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk5,$tweak,$rndkey0 - - vncipherlast $out0,$out0,$in0 - lvx_u $in0,$x00,$inp # load next input block - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vncipherlast $out1,$out1,$in1 - lvx_u $in1,$x10,$inp - vncipherlast $out2,$out2,$in2 - le?vperm $in0,$in0,$in0,$leperm - lvx_u $in2,$x20,$inp - vand $tmp,$tmp,$eighty7 - vncipherlast $out3,$out3,$in3 - le?vperm $in1,$in1,$in1,$leperm - lvx_u $in3,$x30,$inp - vncipherlast $out4,$out4,$in4 - le?vperm $in2,$in2,$in2,$leperm - lvx_u $in4,$x40,$inp - vxor $tweak,$tweak,$tmp - vncipherlast $out5,$out5,$in5 - le?vperm $in3,$in3,$in3,$leperm - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - le?vperm $in4,$in4,$in4,$leperm - le?vperm $in5,$in5,$in5,$leperm - - le?vperm $out0,$out0,$out0,$leperm - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk0 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - vxor $out1,$in1,$twk1 - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - vxor $out2,$in2,$twk2 - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - vxor $out3,$in3,$twk3 - le?vperm $out5,$out5,$out5,$leperm - stvx_u $out4,$x40,$out - vxor $out4,$in4,$twk4 - stvx_u $out5,$x50,$out - vxor $out5,$in5,$twk5 - addi $out,$out,0x60 - - mtctr $rounds - beq Loop_xts_dec6x # did $len-=96 borrow? - - addic. $len,$len,0x60 - beq Lxts_dec6x_zero - cmpwi $len,0x20 - blt Lxts_dec6x_one - nop - beq Lxts_dec6x_two - cmpwi $len,0x40 - blt Lxts_dec6x_three - nop - beq Lxts_dec6x_four - -Lxts_dec6x_five: - vxor $out0,$in1,$twk0 - vxor $out1,$in2,$twk1 - vxor $out2,$in3,$twk2 - vxor $out3,$in4,$twk3 - vxor $out4,$in5,$twk4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk5 # unused tweak - vxor $twk1,$tweak,$rndkey0 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk1 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - stvx_u $out4,$x40,$out - addi $out,$out,0x50 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_four: - vxor $out0,$in2,$twk0 - vxor $out1,$in3,$twk1 - vxor $out2,$in4,$twk2 - vxor $out3,$in5,$twk3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk4 # unused tweak - vmr $twk1,$twk5 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk5 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - stvx_u $out3,$x30,$out - addi $out,$out,0x40 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_three: - vxor $out0,$in3,$twk0 - vxor $out1,$in4,$twk1 - vxor $out2,$in5,$twk2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk3 # unused tweak - vmr $twk1,$twk4 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk4 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - stvx_u $out2,$x20,$out - addi $out,$out,0x30 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_two: - vxor $out0,$in4,$twk0 - vxor $out1,$in5,$twk1 - vxor $out2,$out2,$out2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk2 # unused tweak - vmr $twk1,$twk3 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk3 - stvx_u $out1,$x10,$out - addi $out,$out,0x20 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_one: - vxor $out0,$in5,$twk0 - nop -Loop_xts_dec1x: - vncipher $out0,$out0,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_dec1x - - subi r0,$taillen,1 - vncipher $out0,$out0,v24 - - andi. r0,r0,16 - cmpwi $taillen,0 - vncipher $out0,$out0,v25 - - sub $inp,$inp,r0 - vncipher $out0,$out0,v26 - - lvx_u $in0,0,$inp - vncipher $out0,$out0,v27 - - addi $key_,$sp,`$FRAME+15` # rewind $key_ - vncipher $out0,$out0,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vncipher $out0,$out0,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $twk0,$twk0,v31 - - le?vperm $in0,$in0,$in0,$leperm - vncipher $out0,$out0,v30 - - mtctr $rounds - vncipherlast $out0,$out0,$twk0 - - vmr $twk0,$twk1 # unused tweak - vmr $twk1,$twk2 - le?vperm $out0,$out0,$out0,$leperm - stvx_u $out0,$x00,$out # store output - addi $out,$out,0x10 - vxor $out0,$in0,$twk2 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_zero: - cmpwi $taillen,0 - beq Lxts_dec6x_done - - lvx_u $in0,0,$inp - le?vperm $in0,$in0,$in0,$leperm - vxor $out0,$in0,$twk1 -Lxts_dec6x_steal: - vncipher $out0,$out0,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Lxts_dec6x_steal - - add $inp,$inp,$taillen - vncipher $out0,$out0,v24 - - cmpwi $taillen,0 - vncipher $out0,$out0,v25 - - lvx_u $in0,0,$inp - vncipher $out0,$out0,v26 - - lvsr $inpperm,0,$taillen # $in5 is no more - vncipher $out0,$out0,v27 - - addi $key_,$sp,`$FRAME+15` # rewind $key_ - vncipher $out0,$out0,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vncipher $out0,$out0,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $twk1,$twk1,v31 - - le?vperm $in0,$in0,$in0,$leperm - vncipher $out0,$out0,v30 - - vperm $in0,$in0,$in0,$inpperm - vncipherlast $tmp,$out0,$twk1 - - le?vperm $out0,$tmp,$tmp,$leperm - le?stvx_u $out0,0,$out - be?stvx_u $tmp,0,$out - - vxor $out0,$out0,$out0 - vspltisb $out1,-1 - vperm $out0,$out0,$out1,$inpperm - vsel $out0,$in0,$tmp,$out0 - vxor $out0,$out0,$twk0 - - subi r30,$out,1 - mtctr $taillen -Loop_xts_dec6x_steal: - lbzu r0,1(r30) - stb r0,16(r30) - bdnz Loop_xts_dec6x_steal - - li $taillen,0 - mtctr $rounds - b Loop_xts_dec1x # one more time... - -.align 4 -Lxts_dec6x_done: - ${UCMP}i $ivp,0 - beq Lxts_dec6x_ret - - vxor $tweak,$twk0,$rndkey0 - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_dec6x_ret: - mtlr r11 - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $seven,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x04,1,0x80,6,6,0 - .long 0 - -.align 5 -_aesp8_xts_dec5x: - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - lvx v25,$x10,$key_ # round[4] - bdnz _aesp8_xts_dec5x - - subi r0,$taillen,1 - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - - andi. r0,r0,16 - cmpwi $taillen,0 - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vxor $twk0,$twk0,v31 - - sub $inp,$inp,r0 - vncipher $out0,$out0,v26 - vncipher $out1,$out1,v26 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - vncipher $out4,$out4,v26 - vxor $in1,$twk1,v31 - - vncipher $out0,$out0,v27 - lvx_u $in0,0,$inp - vncipher $out1,$out1,v27 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vncipher $out4,$out4,v27 - vxor $in2,$twk2,v31 - - addi $key_,$sp,`$FRAME+15` # rewind $key_ - vncipher $out0,$out0,v28 - vncipher $out1,$out1,v28 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vncipher $out4,$out4,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vxor $in3,$twk3,v31 - - vncipher $out0,$out0,v29 - le?vperm $in0,$in0,$in0,$leperm - vncipher $out1,$out1,v29 - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vncipher $out4,$out4,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $in4,$twk4,v31 - - vncipher $out0,$out0,v30 - vncipher $out1,$out1,v30 - vncipher $out2,$out2,v30 - vncipher $out3,$out3,v30 - vncipher $out4,$out4,v30 - - vncipherlast $out0,$out0,$twk0 - vncipherlast $out1,$out1,$in1 - vncipherlast $out2,$out2,$in2 - vncipherlast $out3,$out3,$in3 - vncipherlast $out4,$out4,$in4 - mtctr $rounds - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 -___ -}} }}} - -my $consts=1; -foreach(split("\n",$code)) { - s/\`([^\`]*)\`/eval($1)/geo; - - # constants table endian-specific conversion - if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { - my $conv=$3; - my @bytes=(); - - # convert to endian-agnostic format - if ($1 eq "long") { - foreach (split(/,\s*/,$2)) { - my $l = /^0/?oct:int; - push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; - } - } else { - @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); - } - - # little-endian conversion - if ($flavour =~ /le$/o) { - SWITCH: for($conv) { - /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; - /\?rev/ && do { @bytes=reverse(@bytes); last; }; - } - } - - #emit - print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; - next; - } - $consts=0 if (m/Lconsts:/o); # end of table - - # instructions prefixed with '?' are endian-specific and need - # to be adjusted accordingly... - if ($flavour =~ /le$/o) { # little-endian - s/le\?//o or - s/be\?/#be#/o or - s/\?lvsr/lvsl/o or - s/\?lvsl/lvsr/o or - s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or - s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or - s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; - } else { # big-endian - s/le\?/#le#/o or - s/be\?//o or - s/\?([a-z]+)/$1/o; - } - - print $_,"\n"; -} - -close STDOUT or die "error closing STDOUT: $!";
diff --git a/crypto/fipsmodule/aes/internal.h b/crypto/fipsmodule/aes/internal.h index 0685bc4..98b2a14d 100644 --- a/crypto/fipsmodule/aes/internal.h +++ b/crypto/fipsmodule/aes/internal.h
@@ -59,12 +59,6 @@ OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); } #endif -#elif defined(OPENSSL_PPC64LE) -#define HWAES - -OPENSSL_INLINE int hwaes_capable(void) { - return CRYPTO_is_PPC64LE_vcrypto_capable(); -} #endif #endif // !NO_ASM
diff --git a/crypto/fipsmodule/bcm.c b/crypto/fipsmodule/bcm.c index 1792134..e2e4d90 100644 --- a/crypto/fipsmodule/bcm.c +++ b/crypto/fipsmodule/bcm.c
@@ -101,7 +101,6 @@ #include "self_check/fips.c" #include "self_check/self_check.c" #include "service_indicator/service_indicator.c" -#include "sha/sha1-altivec.c" #include "sha/sha1.c" #include "sha/sha256.c" #include "sha/sha512.c"
diff --git a/crypto/fipsmodule/bn/bn.c b/crypto/fipsmodule/bn/bn.c index f3fbb7a..93fae56 100644 --- a/crypto/fipsmodule/bn/bn.c +++ b/crypto/fipsmodule/bn/bn.c
@@ -386,23 +386,6 @@ } int bn_resize_words(BIGNUM *bn, size_t words) { -#if defined(OPENSSL_PPC64LE) - // This is a workaround for a miscompilation bug in Clang 7.0.1 on POWER. - // The unittests catch the miscompilation, if it occurs, and it manifests - // as a crash in |bn_fits_in_words|. - // - // The bug only triggers if building in FIPS mode and with -O3. Clang 8.0.1 - // has the same bug but this workaround is not effective there---I've not - // been able to find a workaround for 8.0.1. - // - // At the time of writing (2019-08-08), Clang git does *not* have this bug - // and does not need this workaroud. The current git version should go on to - // be Clang 10 thus, once we can depend on that, this can be removed. - if (value_barrier_w((size_t)bn->width == words)) { - return 1; - } -#endif - if ((size_t)bn->width <= words) { if (!bn_wexpand(bn, words)) { return 0;
diff --git a/crypto/fipsmodule/cipher/e_aes.c b/crypto/fipsmodule/cipher/e_aes.c index e8e03fe..0db77b8 100644 --- a/crypto/fipsmodule/cipher/e_aes.c +++ b/crypto/fipsmodule/cipher/e_aes.c
@@ -1468,8 +1468,6 @@ return hwaes_capable() && crypto_gcm_clmul_enabled(); #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) return hwaes_capable() && CRYPTO_is_ARMv8_PMULL_capable(); -#elif defined(OPENSSL_PPC64LE) - return CRYPTO_is_PPC64LE_vcrypto_capable(); #else return 0; #endif
diff --git a/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl b/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl deleted file mode 100644 index 0d12a77..0000000 --- a/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl +++ /dev/null
@@ -1,671 +0,0 @@ -#! /usr/bin/env perl -# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. -# -# Licensed under the OpenSSL license (the "License"). You may not use -# this file except in compliance with the License. You can obtain a copy -# in the file LICENSE in the source distribution or at -# https://www.openssl.org/source/license.html - -# -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# GHASH for for PowerISA v2.07. -# -# July 2014 -# -# Accurate performance measurements are problematic, because it's -# always virtualized setup with possibly throttled processor. -# Relative comparison is therefore more informative. This initial -# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x -# faster than "4-bit" integer-only compiler-generated 64-bit code. -# "Initial version" means that there is room for futher improvement. - -# May 2016 -# -# 2x aggregated reduction improves performance by 50% (resulting -# performance on POWER8 is 1 cycle per processed byte), and 4x -# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb). - -$flavour=shift; -$output =shift; - -if ($flavour =~ /64/) { - $SIZE_T=8; - $LRSAVE=2*$SIZE_T; - $STU="stdu"; - $POP="ld"; - $PUSH="std"; - $UCMP="cmpld"; - $SHRI="srdi"; -} elsif ($flavour =~ /32/) { - $SIZE_T=4; - $LRSAVE=$SIZE_T; - $STU="stwu"; - $POP="lwz"; - $PUSH="stw"; - $UCMP="cmplw"; - $SHRI="srwi"; -} else { die "nonsense $flavour"; } - -$sp="r1"; -$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!"; -*STDOUT=*OUT; - -my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block - -my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); -my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); -my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19)); -my $vrsave="r12"; - -$code=<<___; -.machine "any" - -.text - -.globl .gcm_init_p8 -.align 5 -.gcm_init_p8: - li r0,-4096 - li r8,0x10 - mfspr $vrsave,256 - li r9,0x20 - mtspr 256,r0 - li r10,0x30 - lvx_u $H,0,r4 # load H - - vspltisb $xC2,-16 # 0xf0 - vspltisb $t0,1 # one - vaddubm $xC2,$xC2,$xC2 # 0xe0 - vxor $zero,$zero,$zero - vor $xC2,$xC2,$t0 # 0xe1 - vsldoi $xC2,$xC2,$zero,15 # 0xe1... - vsldoi $t1,$zero,$t0,1 # ...1 - vaddubm $xC2,$xC2,$xC2 # 0xc2... - vspltisb $t2,7 - vor $xC2,$xC2,$t1 # 0xc2....01 - vspltb $t1,$H,0 # most significant byte - vsl $H,$H,$t0 # H<<=1 - vsrab $t1,$t1,$t2 # broadcast carry bit - vand $t1,$t1,$xC2 - vxor $IN,$H,$t1 # twisted H - - vsldoi $H,$IN,$IN,8 # twist even more ... - vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 - vsldoi $Hl,$zero,$H,8 # ... and split - vsldoi $Hh,$H,$zero,8 - - stvx_u $xC2,0,r3 # save pre-computed table - stvx_u $Hl,r8,r3 - li r8,0x40 - stvx_u $H, r9,r3 - li r9,0x50 - stvx_u $Hh,r10,r3 - li r10,0x60 - - vpmsumd $Xl,$IN,$Hl # H.lo·H.lo - vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi - vpmsumd $Xh,$IN,$Hh # H.hi·H.hi - - vpmsumd $t2,$Xl,$xC2 # 1st reduction phase - - vsldoi $t0,$Xm,$zero,8 - vsldoi $t1,$zero,$Xm,8 - vxor $Xl,$Xl,$t0 - vxor $Xh,$Xh,$t1 - - vsldoi $Xl,$Xl,$Xl,8 - vxor $Xl,$Xl,$t2 - - vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase - vpmsumd $Xl,$Xl,$xC2 - vxor $t1,$t1,$Xh - vxor $IN1,$Xl,$t1 - - vsldoi $H2,$IN1,$IN1,8 - vsldoi $H2l,$zero,$H2,8 - vsldoi $H2h,$H2,$zero,8 - - stvx_u $H2l,r8,r3 # save H^2 - li r8,0x70 - stvx_u $H2,r9,r3 - li r9,0x80 - stvx_u $H2h,r10,r3 - li r10,0x90 -___ -{ -my ($t4,$t5,$t6) = ($Hl,$H,$Hh); -$code.=<<___; - vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo - vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo - vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi - vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi - vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi - vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi - - vpmsumd $t2,$Xl,$xC2 # 1st reduction phase - vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase - - vsldoi $t0,$Xm,$zero,8 - vsldoi $t1,$zero,$Xm,8 - vsldoi $t4,$Xm1,$zero,8 - vsldoi $t5,$zero,$Xm1,8 - vxor $Xl,$Xl,$t0 - vxor $Xh,$Xh,$t1 - vxor $Xl1,$Xl1,$t4 - vxor $Xh1,$Xh1,$t5 - - vsldoi $Xl,$Xl,$Xl,8 - vsldoi $Xl1,$Xl1,$Xl1,8 - vxor $Xl,$Xl,$t2 - vxor $Xl1,$Xl1,$t6 - - vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase - vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase - vpmsumd $Xl,$Xl,$xC2 - vpmsumd $Xl1,$Xl1,$xC2 - vxor $t1,$t1,$Xh - vxor $t5,$t5,$Xh1 - vxor $Xl,$Xl,$t1 - vxor $Xl1,$Xl1,$t5 - - vsldoi $H,$Xl,$Xl,8 - vsldoi $H2,$Xl1,$Xl1,8 - vsldoi $Hl,$zero,$H,8 - vsldoi $Hh,$H,$zero,8 - vsldoi $H2l,$zero,$H2,8 - vsldoi $H2h,$H2,$zero,8 - - stvx_u $Hl,r8,r3 # save H^3 - li r8,0xa0 - stvx_u $H,r9,r3 - li r9,0xb0 - stvx_u $Hh,r10,r3 - li r10,0xc0 - stvx_u $H2l,r8,r3 # save H^4 - stvx_u $H2,r9,r3 - stvx_u $H2h,r10,r3 - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,2,0 - .long 0 -.size .gcm_init_p8,.-.gcm_init_p8 -___ -} -$code.=<<___; -.globl .gcm_gmult_p8 -.align 5 -.gcm_gmult_p8: - lis r0,0xfff8 - li r8,0x10 - mfspr $vrsave,256 - li r9,0x20 - mtspr 256,r0 - li r10,0x30 - lvx_u $IN,0,$Xip # load Xi - - lvx_u $Hl,r8,$Htbl # load pre-computed table - le?lvsl $lemask,r0,r0 - lvx_u $H, r9,$Htbl - le?vspltisb $t0,0x07 - lvx_u $Hh,r10,$Htbl - le?vxor $lemask,$lemask,$t0 - lvx_u $xC2,0,$Htbl - le?vperm $IN,$IN,$IN,$lemask - vxor $zero,$zero,$zero - - vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo - vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi - vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi - - vpmsumd $t2,$Xl,$xC2 # 1st reduction phase - - vsldoi $t0,$Xm,$zero,8 - vsldoi $t1,$zero,$Xm,8 - vxor $Xl,$Xl,$t0 - vxor $Xh,$Xh,$t1 - - vsldoi $Xl,$Xl,$Xl,8 - vxor $Xl,$Xl,$t2 - - vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase - vpmsumd $Xl,$Xl,$xC2 - vxor $t1,$t1,$Xh - vxor $Xl,$Xl,$t1 - - le?vperm $Xl,$Xl,$Xl,$lemask - stvx_u $Xl,0,$Xip # write out Xi - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,2,0 - .long 0 -.size .gcm_gmult_p8,.-.gcm_gmult_p8 - -.globl .gcm_ghash_p8 -.align 5 -.gcm_ghash_p8: - li r0,-4096 - li r8,0x10 - mfspr $vrsave,256 - li r9,0x20 - mtspr 256,r0 - li r10,0x30 - lvx_u $Xl,0,$Xip # load Xi - - lvx_u $Hl,r8,$Htbl # load pre-computed table - li r8,0x40 - le?lvsl $lemask,r0,r0 - lvx_u $H, r9,$Htbl - li r9,0x50 - le?vspltisb $t0,0x07 - lvx_u $Hh,r10,$Htbl - li r10,0x60 - le?vxor $lemask,$lemask,$t0 - lvx_u $xC2,0,$Htbl - le?vperm $Xl,$Xl,$Xl,$lemask - vxor $zero,$zero,$zero - - ${UCMP}i $len,64 - bge Lgcm_ghash_p8_4x - - lvx_u $IN,0,$inp - addi $inp,$inp,16 - subic. $len,$len,16 - le?vperm $IN,$IN,$IN,$lemask - vxor $IN,$IN,$Xl - beq Lshort - - lvx_u $H2l,r8,$Htbl # load H^2 - li r8,16 - lvx_u $H2, r9,$Htbl - add r9,$inp,$len # end of input - lvx_u $H2h,r10,$Htbl - be?b Loop_2x - -.align 5 -Loop_2x: - lvx_u $IN1,0,$inp - le?vperm $IN1,$IN1,$IN1,$lemask - - subic $len,$len,32 - vpmsumd $Xl,$IN,$H2l # H^2.lo·Xi.lo - vpmsumd $Xl1,$IN1,$Hl # H.lo·Xi+1.lo - subfe r0,r0,r0 # borrow?-1:0 - vpmsumd $Xm,$IN,$H2 # H^2.hi·Xi.lo+H^2.lo·Xi.hi - vpmsumd $Xm1,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+1.hi - and r0,r0,$len - vpmsumd $Xh,$IN,$H2h # H^2.hi·Xi.hi - vpmsumd $Xh1,$IN1,$Hh # H.hi·Xi+1.hi - add $inp,$inp,r0 - - vxor $Xl,$Xl,$Xl1 - vxor $Xm,$Xm,$Xm1 - - vpmsumd $t2,$Xl,$xC2 # 1st reduction phase - - vsldoi $t0,$Xm,$zero,8 - vsldoi $t1,$zero,$Xm,8 - vxor $Xh,$Xh,$Xh1 - vxor $Xl,$Xl,$t0 - vxor $Xh,$Xh,$t1 - - vsldoi $Xl,$Xl,$Xl,8 - vxor $Xl,$Xl,$t2 - lvx_u $IN,r8,$inp - addi $inp,$inp,32 - - vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase - vpmsumd $Xl,$Xl,$xC2 - le?vperm $IN,$IN,$IN,$lemask - vxor $t1,$t1,$Xh - vxor $IN,$IN,$t1 - vxor $IN,$IN,$Xl - $UCMP r9,$inp - bgt Loop_2x # done yet? - - cmplwi $len,0 - bne Leven - -Lshort: - vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo - vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi - vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi - - vpmsumd $t2,$Xl,$xC2 # 1st reduction phase - - vsldoi $t0,$Xm,$zero,8 - vsldoi $t1,$zero,$Xm,8 - vxor $Xl,$Xl,$t0 - vxor $Xh,$Xh,$t1 - - vsldoi $Xl,$Xl,$Xl,8 - vxor $Xl,$Xl,$t2 - - vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase - vpmsumd $Xl,$Xl,$xC2 - vxor $t1,$t1,$Xh - -Leven: - vxor $Xl,$Xl,$t1 - le?vperm $Xl,$Xl,$Xl,$lemask - stvx_u $Xl,0,$Xip # write out Xi - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,4,0 - .long 0 -___ -{ -my ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h, - $Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31)); -my $IN0=$IN; -my ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h); - -$code.=<<___; -.align 5 -.gcm_ghash_p8_4x: -Lgcm_ghash_p8_4x: - $STU $sp,-$FRAME($sp) - li r10,`15+6*$SIZE_T` - li r11,`31+6*$SIZE_T` - stvx v20,r10,$sp - addi r10,r10,32 - stvx v21,r11,$sp - addi r11,r11,32 - stvx v22,r10,$sp - addi r10,r10,32 - stvx v23,r11,$sp - addi r11,r11,32 - stvx v24,r10,$sp - addi r10,r10,32 - stvx v25,r11,$sp - addi r11,r11,32 - stvx v26,r10,$sp - addi r10,r10,32 - stvx v27,r11,$sp - addi r11,r11,32 - stvx v28,r10,$sp - addi r10,r10,32 - stvx v29,r11,$sp - addi r11,r11,32 - stvx v30,r10,$sp - li r10,0x60 - stvx v31,r11,$sp - li r0,-1 - stw $vrsave,`$FRAME-4`($sp) # save vrsave - mtspr 256,r0 # preserve all AltiVec registers - - lvsl $t0,0,r8 # 0x0001..0e0f - #lvx_u $H2l,r8,$Htbl # load H^2 - li r8,0x70 - lvx_u $H2, r9,$Htbl - li r9,0x80 - vspltisb $t1,8 # 0x0808..0808 - #lvx_u $H2h,r10,$Htbl - li r10,0x90 - lvx_u $H3l,r8,$Htbl # load H^3 - li r8,0xa0 - lvx_u $H3, r9,$Htbl - li r9,0xb0 - lvx_u $H3h,r10,$Htbl - li r10,0xc0 - lvx_u $H4l,r8,$Htbl # load H^4 - li r8,0x10 - lvx_u $H4, r9,$Htbl - li r9,0x20 - lvx_u $H4h,r10,$Htbl - li r10,0x30 - - vsldoi $t2,$zero,$t1,8 # 0x0000..0808 - vaddubm $hiperm,$t0,$t2 # 0x0001..1617 - vaddubm $loperm,$t1,$hiperm # 0x0809..1e1f - - $SHRI $len,$len,4 # this allows to use sign bit - # as carry - lvx_u $IN0,0,$inp # load input - lvx_u $IN1,r8,$inp - subic. $len,$len,8 - lvx_u $IN2,r9,$inp - lvx_u $IN3,r10,$inp - addi $inp,$inp,0x40 - le?vperm $IN0,$IN0,$IN0,$lemask - le?vperm $IN1,$IN1,$IN1,$lemask - le?vperm $IN2,$IN2,$IN2,$lemask - le?vperm $IN3,$IN3,$IN3,$lemask - - vxor $Xh,$IN0,$Xl - - vpmsumd $Xl1,$IN1,$H3l - vpmsumd $Xm1,$IN1,$H3 - vpmsumd $Xh1,$IN1,$H3h - - vperm $H21l,$H2,$H,$hiperm - vperm $t0,$IN2,$IN3,$loperm - vperm $H21h,$H2,$H,$loperm - vperm $t1,$IN2,$IN3,$hiperm - vpmsumd $Xm2,$IN2,$H2 # H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo - vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+2.lo+H.lo·Xi+3.lo - vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi - vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+2.hi+H.hi·Xi+3.hi - - vxor $Xm2,$Xm2,$Xm1 - vxor $Xl3,$Xl3,$Xl1 - vxor $Xm3,$Xm3,$Xm2 - vxor $Xh3,$Xh3,$Xh1 - - blt Ltail_4x - -Loop_4x: - lvx_u $IN0,0,$inp - lvx_u $IN1,r8,$inp - subic. $len,$len,4 - lvx_u $IN2,r9,$inp - lvx_u $IN3,r10,$inp - addi $inp,$inp,0x40 - le?vperm $IN1,$IN1,$IN1,$lemask - le?vperm $IN2,$IN2,$IN2,$lemask - le?vperm $IN3,$IN3,$IN3,$lemask - le?vperm $IN0,$IN0,$IN0,$lemask - - vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo - vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi - vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi - vpmsumd $Xl1,$IN1,$H3l - vpmsumd $Xm1,$IN1,$H3 - vpmsumd $Xh1,$IN1,$H3h - - vxor $Xl,$Xl,$Xl3 - vxor $Xm,$Xm,$Xm3 - vxor $Xh,$Xh,$Xh3 - vperm $t0,$IN2,$IN3,$loperm - vperm $t1,$IN2,$IN3,$hiperm - - vpmsumd $t2,$Xl,$xC2 # 1st reduction phase - vpmsumd $Xl3,$t0,$H21l # H.lo·Xi+3.lo +H^2.lo·Xi+2.lo - vpmsumd $Xh3,$t1,$H21h # H.hi·Xi+3.hi +H^2.hi·Xi+2.hi - - vsldoi $t0,$Xm,$zero,8 - vsldoi $t1,$zero,$Xm,8 - vxor $Xl,$Xl,$t0 - vxor $Xh,$Xh,$t1 - - vsldoi $Xl,$Xl,$Xl,8 - vxor $Xl,$Xl,$t2 - - vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase - vpmsumd $Xm2,$IN2,$H2 # H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi - vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi - vpmsumd $Xl,$Xl,$xC2 - - vxor $Xl3,$Xl3,$Xl1 - vxor $Xh3,$Xh3,$Xh1 - vxor $Xh,$Xh,$IN0 - vxor $Xm2,$Xm2,$Xm1 - vxor $Xh,$Xh,$t1 - vxor $Xm3,$Xm3,$Xm2 - vxor $Xh,$Xh,$Xl - bge Loop_4x - -Ltail_4x: - vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo - vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi - vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi - - vxor $Xl,$Xl,$Xl3 - vxor $Xm,$Xm,$Xm3 - - vpmsumd $t2,$Xl,$xC2 # 1st reduction phase - - vsldoi $t0,$Xm,$zero,8 - vsldoi $t1,$zero,$Xm,8 - vxor $Xh,$Xh,$Xh3 - vxor $Xl,$Xl,$t0 - vxor $Xh,$Xh,$t1 - - vsldoi $Xl,$Xl,$Xl,8 - vxor $Xl,$Xl,$t2 - - vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase - vpmsumd $Xl,$Xl,$xC2 - vxor $t1,$t1,$Xh - vxor $Xl,$Xl,$t1 - - addic. $len,$len,4 - beq Ldone_4x - - lvx_u $IN0,0,$inp - ${UCMP}i $len,2 - li $len,-4 - blt Lone - lvx_u $IN1,r8,$inp - beq Ltwo - -Lthree: - lvx_u $IN2,r9,$inp - le?vperm $IN0,$IN0,$IN0,$lemask - le?vperm $IN1,$IN1,$IN1,$lemask - le?vperm $IN2,$IN2,$IN2,$lemask - - vxor $Xh,$IN0,$Xl - vmr $H4l,$H3l - vmr $H4, $H3 - vmr $H4h,$H3h - - vperm $t0,$IN1,$IN2,$loperm - vperm $t1,$IN1,$IN2,$hiperm - vpmsumd $Xm2,$IN1,$H2 # H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo - vpmsumd $Xm3,$IN2,$H # H.hi·Xi+2.lo +H.lo·Xi+2.hi - vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+1.lo+H.lo·Xi+2.lo - vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+1.hi+H.hi·Xi+2.hi - - vxor $Xm3,$Xm3,$Xm2 - b Ltail_4x - -.align 4 -Ltwo: - le?vperm $IN0,$IN0,$IN0,$lemask - le?vperm $IN1,$IN1,$IN1,$lemask - - vxor $Xh,$IN0,$Xl - vperm $t0,$zero,$IN1,$loperm - vperm $t1,$zero,$IN1,$hiperm - - vsldoi $H4l,$zero,$H2,8 - vmr $H4, $H2 - vsldoi $H4h,$H2,$zero,8 - - vpmsumd $Xl3,$t0, $H21l # H.lo·Xi+1.lo - vpmsumd $Xm3,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+2.hi - vpmsumd $Xh3,$t1, $H21h # H.hi·Xi+1.hi - - b Ltail_4x - -.align 4 -Lone: - le?vperm $IN0,$IN0,$IN0,$lemask - - vsldoi $H4l,$zero,$H,8 - vmr $H4, $H - vsldoi $H4h,$H,$zero,8 - - vxor $Xh,$IN0,$Xl - vxor $Xl3,$Xl3,$Xl3 - vxor $Xm3,$Xm3,$Xm3 - vxor $Xh3,$Xh3,$Xh3 - - b Ltail_4x - -Ldone_4x: - le?vperm $Xl,$Xl,$Xl,$lemask - stvx_u $Xl,0,$Xip # write out Xi - - li r10,`15+6*$SIZE_T` - li r11,`31+6*$SIZE_T` - mtspr 256,$vrsave - lvx v20,r10,$sp - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - addi $sp,$sp,$FRAME - blr - .long 0 - .byte 0,12,0x04,0,0x80,0,4,0 - .long 0 -___ -} -$code.=<<___; -.size .gcm_ghash_p8,.-.gcm_ghash_p8 - -.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" -.align 2 -___ - -foreach (split("\n",$code)) { - s/\`([^\`]*)\`/eval $1/geo; - - if ($flavour =~ /le$/o) { # little-endian - s/le\?//o or - s/be\?/#be#/o; - } else { - s/le\?/#le#/o or - s/be\?//o; - } - print $_,"\n"; -} - -close STDOUT or die "error closing STDOUT: $!"; # enforce flush
diff --git a/crypto/fipsmodule/modes/gcm.c b/crypto/fipsmodule/modes/gcm.c index 11a0b20..f22fa9d 100644 --- a/crypto/fipsmodule/modes/gcm.c +++ b/crypto/fipsmodule/modes/gcm.c
@@ -230,13 +230,6 @@ *out_hash = gcm_ghash_neon; return; } -#elif defined(GHASH_ASM_PPC64LE) - if (CRYPTO_is_PPC64LE_vcrypto_capable()) { - gcm_init_p8(out_table, H); - *out_mult = gcm_gmult_p8; - *out_hash = gcm_ghash_p8; - return; - } #endif gcm_init_nohw(out_table, H);
diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc index 324d0e8..e7fcd5c 100644 --- a/crypto/fipsmodule/modes/gcm_test.cc +++ b/crypto/fipsmodule/modes/gcm_test.cc
@@ -222,15 +222,5 @@ } } #endif - -#if defined(GHASH_ASM_PPC64LE) - if (CRYPTO_is_PPC64LE_vcrypto_capable()) { - CHECK_ABI(gcm_init_p8, Htable, kH); - CHECK_ABI(gcm_gmult_p8, X, Htable); - for (size_t blocks : kBlockCounts) { - CHECK_ABI(gcm_ghash_p8, X, Htable, buf, 16 * blocks); - } - } -#endif // GHASH_ASM_PPC64LE } #endif // SUPPORTS_ABI_TEST && !OPENSSL_NO_ASM
diff --git a/crypto/fipsmodule/modes/internal.h b/crypto/fipsmodule/modes/internal.h index 8a0a75f..d77efca 100644 --- a/crypto/fipsmodule/modes/internal.h +++ b/crypto/fipsmodule/modes/internal.h
@@ -308,13 +308,6 @@ void *Xi, uint8_t *ivec, const AES_KEY *key); #endif -#elif defined(OPENSSL_PPC64LE) -#define GHASH_ASM_PPC64LE -#define GCM_FUNCREF -void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]); -void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]); -void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, - size_t len); #endif #endif // OPENSSL_NO_ASM
diff --git a/crypto/fipsmodule/rand/rand.c b/crypto/fipsmodule/rand/rand.c index cb1ee7d..41754c6 100644 --- a/crypto/fipsmodule/rand/rand.c +++ b/crypto/fipsmodule/rand/rand.c
@@ -416,11 +416,6 @@ // Take a read lock around accesses to |state->drbg|. This is needed to // avoid returning bad entropy if we race with // |rand_thread_state_clear_all|. - // - // This lock must be taken after any calls to |CRYPTO_sysrand| to avoid a - // bug on ppc64le. glibc may implement pthread locks by wrapping user code - // in a hardware transaction, but, on some older versions of glibc and the - // kernel, syscalls made with |syscall| did not abort the transaction. CRYPTO_STATIC_MUTEX_lock_read(state_clear_all_lock_bss_get()); #endif if (!CTR_DRBG_reseed(&state->drbg, seed, reseed_additional_data,
diff --git a/crypto/fipsmodule/sha/internal.h b/crypto/fipsmodule/sha/internal.h index cc90914..605f166 100644 --- a/crypto/fipsmodule/sha/internal.h +++ b/crypto/fipsmodule/sha/internal.h
@@ -22,23 +22,14 @@ #endif -#if defined(OPENSSL_PPC64LE) || \ - (!defined(OPENSSL_NO_ASM) && \ - (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ - defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))) -// POWER has an intrinsics-based implementation of SHA-1 and thus the functions -// normally defined in assembly are available even with |OPENSSL_NO_ASM| in -// this case. -#define SHA1_ASM -void sha1_block_data_order(uint32_t *state, const uint8_t *in, - size_t num_blocks); -#endif - #if !defined(OPENSSL_NO_ASM) && \ (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) +#define SHA1_ASM #define SHA256_ASM #define SHA512_ASM +void sha1_block_data_order(uint32_t *state, const uint8_t *in, + size_t num_blocks); void sha256_block_data_order(uint32_t *state, const uint8_t *in, size_t num_blocks); void sha512_block_data_order(uint64_t *state, const uint8_t *in,
diff --git a/crypto/fipsmodule/sha/sha1-altivec.c b/crypto/fipsmodule/sha/sha1-altivec.c deleted file mode 100644 index 3152827..0000000 --- a/crypto/fipsmodule/sha/sha1-altivec.c +++ /dev/null
@@ -1,361 +0,0 @@ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] */ - -// Altivec-optimized SHA1 in C. This is tested on ppc64le only. -// -// References: -// https://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1 -// http://arctic.org/~dean/crypto/sha1.html -// -// This code used the generic SHA-1 from OpenSSL as a basis and AltiVec -// optimisations were added on top. - -#include <openssl/sha.h> - -#if defined(OPENSSL_PPC64LE) - -#include <altivec.h> - -void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num); - -static uint32_t rotate(uint32_t a, int n) { return (a << n) | (a >> (32 - n)); } - -typedef vector unsigned int vec_uint32_t; -typedef vector unsigned char vec_uint8_t; - -// Vector constants -static const vec_uint8_t k_swap_endianness = {3, 2, 1, 0, 7, 6, 5, 4, - 11, 10, 9, 8, 15, 14, 13, 12}; - -// Shift amounts for byte and bit shifts and rotations -static const vec_uint8_t k_4_bytes = {32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32}; -static const vec_uint8_t k_12_bytes = {96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96}; - -#define K_00_19 0x5a827999UL -#define K_20_39 0x6ed9eba1UL -#define K_40_59 0x8f1bbcdcUL -#define K_60_79 0xca62c1d6UL - -// Vector versions of the above. -static const vec_uint32_t K_00_19_x_4 = {K_00_19, K_00_19, K_00_19, K_00_19}; -static const vec_uint32_t K_20_39_x_4 = {K_20_39, K_20_39, K_20_39, K_20_39}; -static const vec_uint32_t K_40_59_x_4 = {K_40_59, K_40_59, K_40_59, K_40_59}; -static const vec_uint32_t K_60_79_x_4 = {K_60_79, K_60_79, K_60_79, K_60_79}; - -// vector message scheduling: compute message schedule for round i..i+3 where i -// is divisible by 4. We return the schedule w[i..i+3] as a vector. In -// addition, we also precompute sum w[i..+3] and an additive constant K. This -// is done to offload some computation of f() in the integer execution units. -// -// Byte shifting code below may not be correct for big-endian systems. -static vec_uint32_t sched_00_15(vec_uint32_t *pre_added, const void *data, - vec_uint32_t k) { - const vector unsigned char unaligned_data = - vec_vsx_ld(0, (const unsigned char*) data); - const vec_uint32_t v = (vec_uint32_t) unaligned_data; - const vec_uint32_t w = vec_perm(v, v, k_swap_endianness); - vec_st(w + k, 0, pre_added); - return w; -} - -// Compute w[i..i+3] using these steps for i in [16, 20, 24, 28] -// -// w'[i ] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) <<< 1 -// w'[i+1] = (w[i-2] ^ w[i-7] ^ w[i-13] ^ w[i-15]) <<< 1 -// w'[i+2] = (w[i-1] ^ w[i-6] ^ w[i-12] ^ w[i-14]) <<< 1 -// w'[i+3] = ( 0 ^ w[i-5] ^ w[i-11] ^ w[i-13]) <<< 1 -// -// w[ i] = w'[ i] -// w[i+1] = w'[i+1] -// w[i+2] = w'[i+2] -// w[i+3] = w'[i+3] ^ (w'[i] <<< 1) -static vec_uint32_t sched_16_31(vec_uint32_t *pre_added, vec_uint32_t minus_4, - vec_uint32_t minus_8, vec_uint32_t minus_12, - vec_uint32_t minus_16, vec_uint32_t k) { - const vec_uint32_t minus_3 = vec_sro(minus_4, k_4_bytes); - const vec_uint32_t minus_14 = vec_sld((minus_12), (minus_16), 8); - const vec_uint32_t k_1_bit = vec_splat_u32(1); - const vec_uint32_t w_prime = - vec_rl(minus_3 ^ minus_8 ^ minus_14 ^ minus_16, k_1_bit); - const vec_uint32_t w = - w_prime ^ vec_rl(vec_slo(w_prime, k_12_bytes), k_1_bit); - vec_st(w + k, 0, pre_added); - return w; -} - -// Compute w[i..i+3] using this relation for i in [32, 36, 40 ... 76] -// w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]), 2) <<< 2 -static vec_uint32_t sched_32_79(vec_uint32_t *pre_added, vec_uint32_t minus_4, - vec_uint32_t minus_8, vec_uint32_t minus_16, - vec_uint32_t minus_28, vec_uint32_t minus_32, - vec_uint32_t k) { - const vec_uint32_t minus_6 = vec_sld(minus_4, minus_8, 8); - const vec_uint32_t k_2_bits = vec_splat_u32(2); - const vec_uint32_t w = - vec_rl(minus_6 ^ minus_16 ^ minus_28 ^ minus_32, k_2_bits); - vec_st(w + k, 0, pre_added); - return w; -} - -// As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be simplified -// to the code in F_00_19. Wei attributes these optimisations to Peter -// Gutmann's SHS code, and he attributes it to Rich Schroeppel. #define -// F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) I've just become aware of another -// tweak to be made, again from Wei Dai, in F_40_59, (x&a)|(y&a) -> (x|y)&a -#define F_00_19(b, c, d) ((((c) ^ (d)) & (b)) ^ (d)) -#define F_20_39(b, c, d) ((b) ^ (c) ^ (d)) -#define F_40_59(b, c, d) (((b) & (c)) | (((b) | (c)) & (d))) -#define F_60_79(b, c, d) F_20_39(b, c, d) - -// We pre-added the K constants during message scheduling. -#define BODY_00_19(i, a, b, c, d, e, f) \ - do { \ - (f) = w[i] + (e) + rotate((a), 5) + F_00_19((b), (c), (d)); \ - (b) = rotate((b), 30); \ - } while (0) - -#define BODY_20_39(i, a, b, c, d, e, f) \ - do { \ - (f) = w[i] + (e) + rotate((a), 5) + F_20_39((b), (c), (d)); \ - (b) = rotate((b), 30); \ - } while (0) - -#define BODY_40_59(i, a, b, c, d, e, f) \ - do { \ - (f) = w[i] + (e) + rotate((a), 5) + F_40_59((b), (c), (d)); \ - (b) = rotate((b), 30); \ - } while (0) - -#define BODY_60_79(i, a, b, c, d, e, f) \ - do { \ - (f) = w[i] + (e) + rotate((a), 5) + F_60_79((b), (c), (d)); \ - (b) = rotate((b), 30); \ - } while (0) - -void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num) { - uint32_t A, B, C, D, E, T; - - A = state[0]; - B = state[1]; - C = state[2]; - D = state[3]; - E = state[4]; - - for (;;) { - vec_uint32_t vw[20]; - const uint32_t *w = (const uint32_t *)&vw; - - vec_uint32_t k = K_00_19_x_4; - const vec_uint32_t w0 = sched_00_15(vw + 0, data + 0, k); - BODY_00_19(0, A, B, C, D, E, T); - BODY_00_19(1, T, A, B, C, D, E); - BODY_00_19(2, E, T, A, B, C, D); - BODY_00_19(3, D, E, T, A, B, C); - - const vec_uint32_t w4 = sched_00_15(vw + 1, data + 16, k); - BODY_00_19(4, C, D, E, T, A, B); - BODY_00_19(5, B, C, D, E, T, A); - BODY_00_19(6, A, B, C, D, E, T); - BODY_00_19(7, T, A, B, C, D, E); - - const vec_uint32_t w8 = sched_00_15(vw + 2, data + 32, k); - BODY_00_19(8, E, T, A, B, C, D); - BODY_00_19(9, D, E, T, A, B, C); - BODY_00_19(10, C, D, E, T, A, B); - BODY_00_19(11, B, C, D, E, T, A); - - const vec_uint32_t w12 = sched_00_15(vw + 3, data + 48, k); - BODY_00_19(12, A, B, C, D, E, T); - BODY_00_19(13, T, A, B, C, D, E); - BODY_00_19(14, E, T, A, B, C, D); - BODY_00_19(15, D, E, T, A, B, C); - - const vec_uint32_t w16 = sched_16_31(vw + 4, w12, w8, w4, w0, k); - BODY_00_19(16, C, D, E, T, A, B); - BODY_00_19(17, B, C, D, E, T, A); - BODY_00_19(18, A, B, C, D, E, T); - BODY_00_19(19, T, A, B, C, D, E); - - k = K_20_39_x_4; - const vec_uint32_t w20 = sched_16_31(vw + 5, w16, w12, w8, w4, k); - BODY_20_39(20, E, T, A, B, C, D); - BODY_20_39(21, D, E, T, A, B, C); - BODY_20_39(22, C, D, E, T, A, B); - BODY_20_39(23, B, C, D, E, T, A); - - const vec_uint32_t w24 = sched_16_31(vw + 6, w20, w16, w12, w8, k); - BODY_20_39(24, A, B, C, D, E, T); - BODY_20_39(25, T, A, B, C, D, E); - BODY_20_39(26, E, T, A, B, C, D); - BODY_20_39(27, D, E, T, A, B, C); - - const vec_uint32_t w28 = sched_16_31(vw + 7, w24, w20, w16, w12, k); - BODY_20_39(28, C, D, E, T, A, B); - BODY_20_39(29, B, C, D, E, T, A); - BODY_20_39(30, A, B, C, D, E, T); - BODY_20_39(31, T, A, B, C, D, E); - - const vec_uint32_t w32 = sched_32_79(vw + 8, w28, w24, w16, w4, w0, k); - BODY_20_39(32, E, T, A, B, C, D); - BODY_20_39(33, D, E, T, A, B, C); - BODY_20_39(34, C, D, E, T, A, B); - BODY_20_39(35, B, C, D, E, T, A); - - const vec_uint32_t w36 = sched_32_79(vw + 9, w32, w28, w20, w8, w4, k); - BODY_20_39(36, A, B, C, D, E, T); - BODY_20_39(37, T, A, B, C, D, E); - BODY_20_39(38, E, T, A, B, C, D); - BODY_20_39(39, D, E, T, A, B, C); - - k = K_40_59_x_4; - const vec_uint32_t w40 = sched_32_79(vw + 10, w36, w32, w24, w12, w8, k); - BODY_40_59(40, C, D, E, T, A, B); - BODY_40_59(41, B, C, D, E, T, A); - BODY_40_59(42, A, B, C, D, E, T); - BODY_40_59(43, T, A, B, C, D, E); - - const vec_uint32_t w44 = sched_32_79(vw + 11, w40, w36, w28, w16, w12, k); - BODY_40_59(44, E, T, A, B, C, D); - BODY_40_59(45, D, E, T, A, B, C); - BODY_40_59(46, C, D, E, T, A, B); - BODY_40_59(47, B, C, D, E, T, A); - - const vec_uint32_t w48 = sched_32_79(vw + 12, w44, w40, w32, w20, w16, k); - BODY_40_59(48, A, B, C, D, E, T); - BODY_40_59(49, T, A, B, C, D, E); - BODY_40_59(50, E, T, A, B, C, D); - BODY_40_59(51, D, E, T, A, B, C); - - const vec_uint32_t w52 = sched_32_79(vw + 13, w48, w44, w36, w24, w20, k); - BODY_40_59(52, C, D, E, T, A, B); - BODY_40_59(53, B, C, D, E, T, A); - BODY_40_59(54, A, B, C, D, E, T); - BODY_40_59(55, T, A, B, C, D, E); - - const vec_uint32_t w56 = sched_32_79(vw + 14, w52, w48, w40, w28, w24, k); - BODY_40_59(56, E, T, A, B, C, D); - BODY_40_59(57, D, E, T, A, B, C); - BODY_40_59(58, C, D, E, T, A, B); - BODY_40_59(59, B, C, D, E, T, A); - - k = K_60_79_x_4; - const vec_uint32_t w60 = sched_32_79(vw + 15, w56, w52, w44, w32, w28, k); - BODY_60_79(60, A, B, C, D, E, T); - BODY_60_79(61, T, A, B, C, D, E); - BODY_60_79(62, E, T, A, B, C, D); - BODY_60_79(63, D, E, T, A, B, C); - - const vec_uint32_t w64 = sched_32_79(vw + 16, w60, w56, w48, w36, w32, k); - BODY_60_79(64, C, D, E, T, A, B); - BODY_60_79(65, B, C, D, E, T, A); - BODY_60_79(66, A, B, C, D, E, T); - BODY_60_79(67, T, A, B, C, D, E); - - const vec_uint32_t w68 = sched_32_79(vw + 17, w64, w60, w52, w40, w36, k); - BODY_60_79(68, E, T, A, B, C, D); - BODY_60_79(69, D, E, T, A, B, C); - BODY_60_79(70, C, D, E, T, A, B); - BODY_60_79(71, B, C, D, E, T, A); - - const vec_uint32_t w72 = sched_32_79(vw + 18, w68, w64, w56, w44, w40, k); - BODY_60_79(72, A, B, C, D, E, T); - BODY_60_79(73, T, A, B, C, D, E); - BODY_60_79(74, E, T, A, B, C, D); - BODY_60_79(75, D, E, T, A, B, C); - - // We don't use the last value - (void)sched_32_79(vw + 19, w72, w68, w60, w48, w44, k); - BODY_60_79(76, C, D, E, T, A, B); - BODY_60_79(77, B, C, D, E, T, A); - BODY_60_79(78, A, B, C, D, E, T); - BODY_60_79(79, T, A, B, C, D, E); - - const uint32_t mask = 0xffffffffUL; - state[0] = (state[0] + E) & mask; - state[1] = (state[1] + T) & mask; - state[2] = (state[2] + A) & mask; - state[3] = (state[3] + B) & mask; - state[4] = (state[4] + C) & mask; - - data += 64; - if (--num == 0) { - break; - } - - A = state[0]; - B = state[1]; - C = state[2]; - D = state[3]; - E = state[4]; - } -} - -#endif // OPENSSL_PPC64LE - -#undef K_00_19 -#undef K_20_39 -#undef K_40_59 -#undef K_60_79 -#undef F_00_19 -#undef F_20_39 -#undef F_40_59 -#undef F_60_79 -#undef BODY_00_19 -#undef BODY_20_39 -#undef BODY_40_59 -#undef BODY_60_79
diff --git a/crypto/internal.h b/crypto/internal.h index 63e6a66..f9a243e 100644 --- a/crypto/internal.h +++ b/crypto/internal.h
@@ -166,7 +166,7 @@ #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM) || \ - defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE) + defined(OPENSSL_AARCH64) // OPENSSL_cpuid_setup initializes the platform-specific feature cache. void OPENSSL_cpuid_setup(void); #endif @@ -1251,16 +1251,6 @@ #endif // OPENSSL_ARM || OPENSSL_AARCH64 -#if defined(OPENSSL_PPC64LE) - -// CRYPTO_is_PPC64LE_vcrypto_capable returns true iff the current CPU supports -// the Vector.AES category of instructions. -int CRYPTO_is_PPC64LE_vcrypto_capable(void); - -extern unsigned long OPENSSL_ppc64le_hwcap2; - -#endif // OPENSSL_PPC64LE - #if defined(BORINGSSL_DISPATCH_TEST) // Runtime CPU dispatch testing support
diff --git a/crypto/perlasm/ppc-xlate.pl b/crypto/perlasm/ppc-xlate.pl deleted file mode 100644 index 1c51577..0000000 --- a/crypto/perlasm/ppc-xlate.pl +++ /dev/null
@@ -1,320 +0,0 @@ -#! /usr/bin/env perl -# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved. -# -# Licensed under the OpenSSL license (the "License"). You may not use -# this file except in compliance with the License. You can obtain a copy -# in the file LICENSE in the source distribution or at -# https://www.openssl.org/source/license.html - -my $flavour = shift; -my $output = shift; -open STDOUT,">$output" || die "can't open $output: $!"; - -my %GLOBALS; -my %TYPES; -my $dotinlocallabels=($flavour=~/linux/)?1:0; - -################################################################ -# directives which need special treatment on different platforms -################################################################ -my $type = sub { - my ($dir,$name,$type) = @_; - - $TYPES{$name} = $type; - if ($flavour =~ /linux/) { - $name =~ s|^\.||; - ".type $name,$type"; - } else { - ""; - } -}; -my $globl = sub { - my $junk = shift; - my $name = shift; - my $global = \$GLOBALS{$name}; - my $type = \$TYPES{$name}; - my $ret; - - $name =~ s|^\.||; - - SWITCH: for ($flavour) { - /aix/ && do { if (!$$type) { - $$type = "\@function"; - } - if ($$type =~ /function/) { - $name = ".$name"; - } - last; - }; - /osx/ && do { $name = "_$name"; - last; - }; - /linux.*(32|64le)/ - && do { $ret .= ".globl $name"; - if (!$$type) { - $ret .= "\n.type $name,\@function"; - $$type = "\@function"; - } - last; - }; - /linux.*64/ && do { $ret .= ".globl $name"; - if (!$$type) { - $ret .= "\n.type $name,\@function"; - $$type = "\@function"; - } - if ($$type =~ /function/) { - $ret .= "\n.section \".opd\",\"aw\""; - $ret .= "\n.align 3"; - $ret .= "\n$name:"; - $ret .= "\n.quad .$name,.TOC.\@tocbase,0"; - $ret .= "\n.previous"; - $name = ".$name"; - } - last; - }; - } - - $ret = ".globl $name" if (!$ret); - $$global = $name; - $ret; -}; -my $text = sub { - my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; - $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); - $ret; -}; -my $machine = sub { - my $junk = shift; - my $arch = shift; - if ($flavour =~ /osx/) - { $arch =~ s/\"//g; - $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); - } - ".machine $arch"; -}; -my $size = sub { - if ($flavour =~ /linux/) - { shift; - my $name = shift; - my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name; - my $ret = ".size $$real,.-$$real"; - $name =~ s|^\.||; - if ($$real ne $name) { - $ret .= "\n.size $name,.-$$real"; - } - $ret; - } - else - { ""; } -}; -my $asciz = sub { - shift; - my $line = join(",",@_); - if ($line =~ /^"(.*)"$/) - { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } - else - { ""; } -}; -my $quad = sub { - shift; - my @ret; - my ($hi,$lo); - for (@_) { - if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) - { $hi=$1?"0x$1":"0"; $lo="0x$2"; } - elsif (/^([0-9]+)$/o) - { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl - else - { $hi=undef; $lo=$_; } - - if (defined($hi)) - { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } - else - { push(@ret,".quad $lo"); } - } - join("\n",@ret); -}; - -################################################################ -# simplified mnemonics not handled by at least one assembler -################################################################ -my $cmplw = sub { - my $f = shift; - my $cr = 0; $cr = shift if ($#_>1); - # Some out-of-date 32-bit GNU assembler just can't handle cmplw... - ($flavour =~ /linux.*32/) ? - " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : - " cmplw ".join(',',$cr,@_); -}; -my $bdnz = sub { - my $f = shift; - my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint - " bc $bo,0,".shift; -} if ($flavour!~/linux/); -my $bltlr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : - " bclr $bo,0"; -}; -my $bnelr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -my $beqlr = sub { - my $f = shift; - my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two -# arguments is 64, with "operand out of range" error. -my $extrdi = sub { - my ($f,$ra,$rs,$n,$b) = @_; - $b = ($b+$n)&63; $n = 64-$n; - " rldicl $ra,$rs,$b,$n"; -}; -my $vmr = sub { - my ($f,$vx,$vy) = @_; - " vor $vx,$vy,$vy"; -}; - -# Some ABIs specify vrsave, special-purpose register #256, as reserved -# for system use. -my $no_vrsave = ($flavour =~ /aix|linux64le/); -my $mtspr = sub { - my ($f,$idx,$ra) = @_; - if ($idx == 256 && $no_vrsave) { - " or $ra,$ra,$ra"; - } else { - " mtspr $idx,$ra"; - } -}; -my $mfspr = sub { - my ($f,$rd,$idx) = @_; - if ($idx == 256 && $no_vrsave) { - " li $rd,-1"; - } else { - " mfspr $rd,$idx"; - } -}; - -# PowerISA 2.06 stuff -sub vsxmem_op { - my ($f, $vrt, $ra, $rb, $op) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); -} -# made-up unaligned memory reference AltiVec/VMX instructions -my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x -my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x -my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx -my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx -my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x -my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x - -# PowerISA 2.07 stuff -sub vcrypto_op { - my ($f, $vrt, $vra, $vrb, $op) = @_; - " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; -} -my $vcipher = sub { vcrypto_op(@_, 1288); }; -my $vcipherlast = sub { vcrypto_op(@_, 1289); }; -my $vncipher = sub { vcrypto_op(@_, 1352); }; -my $vncipherlast= sub { vcrypto_op(@_, 1353); }; -my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; -my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; -my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; -my $vpmsumb = sub { vcrypto_op(@_, 1032); }; -my $vpmsumd = sub { vcrypto_op(@_, 1224); }; -my $vpmsubh = sub { vcrypto_op(@_, 1096); }; -my $vpmsumw = sub { vcrypto_op(@_, 1160); }; -my $vaddudm = sub { vcrypto_op(@_, 192); }; - -my $mtsle = sub { - my ($f, $arg) = @_; - " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); -}; - -# PowerISA 3.0 stuff -my $maddhdu = sub { - my ($f, $rt, $ra, $rb, $rc) = @_; - " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|49; -}; -my $maddld = sub { - my ($f, $rt, $ra, $rb, $rc) = @_; - " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|51; -}; - -my $darn = sub { - my ($f, $rt, $l) = @_; - " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1); -}; - -print <<___; -// This file is generated from a similarly-named Perl script in the BoringSSL -// source tree. Do not edit by hand. - -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) -#define OPENSSL_NO_ASM -#endif -#endif - -#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__) && defined(__ELF__) -___ - -while($line=<>) { - - $line =~ s|[#!;].*$||; # get rid of asm-style comments... - $line =~ s|/\*.*\*/||; # ... and C-style comments... - $line =~ s|^\s+||; # ... and skip white spaces in beginning... - $line =~ s|\s+$||; # ... and at the end - - { - $line =~ s|\.L(\w+)|L$1|g; # common denominator for Locallabel - $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); - } - - { - $line =~ s|(^[\.\w]+)\:\s*||; - my $label = $1; - if ($label) { - my $xlated = ($GLOBALS{$label} or $label); - print "$xlated:"; - if ($flavour =~ /linux.*64le/) { - if ($TYPES{$label} =~ /function/) { - printf "\n.localentry %s,0\n",$xlated; - } - } - } - } - - { - $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; - my $c = $1; $c = "\t" if ($c eq ""); - my $mnemonic = $2; - my $f = $3; - my $opcode = eval("\$$mnemonic"); - $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); - if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } - elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } - } - - print $line if ($line); - print "\n"; -} - -print <<___; -#endif // !OPENSSL_NO_ASM && __powerpc64__ && __ELF__ -#if defined(__ELF__) -// See https://www.airs.com/blog/archives/518. -.section .note.GNU-stack,"",\%progbits -#endif -___ - -close STDOUT or die "error closing STDOUT: $!";
diff --git a/crypto/test/abi_test.h b/crypto/test/abi_test.h index 1ba82b1..24340c9 100644 --- a/crypto/test/abi_test.h +++ b/crypto/test/abi_test.h
@@ -179,78 +179,7 @@ CALLER_STATE_REGISTER(uint64_t, x28) \ CALLER_STATE_REGISTER(uint64_t, x29) -#elif defined(OPENSSL_PPC64LE) - -// CRReg only compares the CR2-CR4 bits of a CR register. -struct CRReg { - uint32_t masked() const { return value & 0x00fff000; } - bool operator==(CRReg r) const { return masked() == r.masked(); } - bool operator!=(CRReg r) const { return masked() != r.masked(); } - uint32_t value; -}; - -// References: -// ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf -// -// Note vector and floating-point registers on POWER have two different names. -// Originally, there were 32 floating-point registers and 32 vector registers, -// labelled f0-f31 and v0-v31 respectively. Later, VSX (Vector Scalar Extension) -// unified them into 64 registers vs0-vs63. f0-f31 map to the lower halves of -// vs0-vs31. v0-v31 map to vs32-vs63. The ABI was defined in terms of pre-VSX -// names, so we use those names here. In particular, f14-f31 are -// callee-saved, but the upper halves of vs14-vs31 are not. -#define LOOP_CALLER_STATE_REGISTERS() \ - CALLER_STATE_REGISTER(Reg128, v20) \ - CALLER_STATE_REGISTER(Reg128, v21) \ - CALLER_STATE_REGISTER(Reg128, v22) \ - CALLER_STATE_REGISTER(Reg128, v23) \ - CALLER_STATE_REGISTER(Reg128, v24) \ - CALLER_STATE_REGISTER(Reg128, v25) \ - CALLER_STATE_REGISTER(Reg128, v26) \ - CALLER_STATE_REGISTER(Reg128, v27) \ - CALLER_STATE_REGISTER(Reg128, v28) \ - CALLER_STATE_REGISTER(Reg128, v29) \ - CALLER_STATE_REGISTER(Reg128, v30) \ - CALLER_STATE_REGISTER(Reg128, v31) \ - CALLER_STATE_REGISTER(uint64_t, r14) \ - CALLER_STATE_REGISTER(uint64_t, r15) \ - CALLER_STATE_REGISTER(uint64_t, r16) \ - CALLER_STATE_REGISTER(uint64_t, r17) \ - CALLER_STATE_REGISTER(uint64_t, r18) \ - CALLER_STATE_REGISTER(uint64_t, r19) \ - CALLER_STATE_REGISTER(uint64_t, r20) \ - CALLER_STATE_REGISTER(uint64_t, r21) \ - CALLER_STATE_REGISTER(uint64_t, r22) \ - CALLER_STATE_REGISTER(uint64_t, r23) \ - CALLER_STATE_REGISTER(uint64_t, r24) \ - CALLER_STATE_REGISTER(uint64_t, r25) \ - CALLER_STATE_REGISTER(uint64_t, r26) \ - CALLER_STATE_REGISTER(uint64_t, r27) \ - CALLER_STATE_REGISTER(uint64_t, r28) \ - CALLER_STATE_REGISTER(uint64_t, r29) \ - CALLER_STATE_REGISTER(uint64_t, r30) \ - CALLER_STATE_REGISTER(uint64_t, r31) \ - CALLER_STATE_REGISTER(uint64_t, f14) \ - CALLER_STATE_REGISTER(uint64_t, f15) \ - CALLER_STATE_REGISTER(uint64_t, f16) \ - CALLER_STATE_REGISTER(uint64_t, f17) \ - CALLER_STATE_REGISTER(uint64_t, f18) \ - CALLER_STATE_REGISTER(uint64_t, f19) \ - CALLER_STATE_REGISTER(uint64_t, f20) \ - CALLER_STATE_REGISTER(uint64_t, f21) \ - CALLER_STATE_REGISTER(uint64_t, f22) \ - CALLER_STATE_REGISTER(uint64_t, f23) \ - CALLER_STATE_REGISTER(uint64_t, f24) \ - CALLER_STATE_REGISTER(uint64_t, f25) \ - CALLER_STATE_REGISTER(uint64_t, f26) \ - CALLER_STATE_REGISTER(uint64_t, f27) \ - CALLER_STATE_REGISTER(uint64_t, f28) \ - CALLER_STATE_REGISTER(uint64_t, f29) \ - CALLER_STATE_REGISTER(uint64_t, f30) \ - CALLER_STATE_REGISTER(uint64_t, f31) \ - CALLER_STATE_REGISTER(CRReg, cr) - -#endif // X86_64 || X86 || ARM || AARCH64 || PPC64LE +#endif // X86_64 || X86 || ARM || AARCH64 // Enable ABI testing if all of the following are true. // @@ -302,12 +231,6 @@ // on 32-bit architectures for simplicity. static_assert(sizeof(T) == 4, "parameter types must be word-sized"); return (crypto_word_t)t; -#elif defined(OPENSSL_PPC64LE) - // ELFv2, section 2.2.2.3 says the parameter save area sign- or zero-extends - // parameters passed in memory. Section 2.2.3 is unclear on how to handle - // register parameters, but section 2.2.2.3 additionally says that the memory - // copy of a parameter is identical to the register one. - return (crypto_word_t)t; #elif defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64) // AAPCS64, section 5.4.2, clauses C.7 and C.14 says any remaining bits in // aarch are unspecified. iOS64 contradicts this and says the callee extends @@ -362,9 +285,9 @@ template <typename R, typename... Args> inline crypto_word_t CheckImpl(Result *out, bool unwind, R (*func)(Args...), typename DeductionGuard<Args>::Type... args) { - // We only support up to 8 arguments, so all arguments on aarch64 and ppc64le - // are passed in registers. This is simpler and avoids the iOS discrepancy - // around packing small arguments on the stack. (See the iOS64 reference.) + // We only support up to 8 arguments, so all arguments on aarch64 are passed + // in registers. This is simpler and avoids the iOS discrepancy around packing + // small arguments on the stack. (See the iOS64 reference.) static_assert(sizeof...(args) <= 8, "too many arguments for abi_test_trampoline");
diff --git a/crypto/test/asm/trampoline-ppc.pl b/crypto/test/asm/trampoline-ppc.pl deleted file mode 100755 index b29c361..0000000 --- a/crypto/test/asm/trampoline-ppc.pl +++ /dev/null
@@ -1,262 +0,0 @@ -#!/usr/bin/env perl -# Copyright (c) 2019, Google Inc. -# -# Permission to use, copy, modify, and/or distribute this software for any -# purpose with or without fee is hereby granted, provided that the above -# copyright notice and this permission notice appear in all copies. -# -# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -# This file defines helper functions for crypto/test/abi_test.h on ppc64le. See -# that header for details on how to use this. -# -# For convenience, this file is linked into libcrypto, where consuming builds -# already support architecture-specific sources. The static linker should drop -# this code in non-test binaries. This includes a shared library build of -# libcrypto, provided --gc-sections or equivalent is used. -# -# References: -# -# ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf - -use strict; - -my $flavour = shift; -my $output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; -my $dir = $1; -my $xlate; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\""; -*STDOUT = *OUT; - -unless ($flavour =~ /linux.*64le/) { - die "This file only supports the ELFv2 ABI, used by ppc64le"; -} - -my $code = ""; - -sub load_or_store_regs { - # $op is "l" or "st". - my ($op, $base_reg, $base_offset) = @_; - # Vector registers. - foreach (20..31) { - my $offset = $base_offset + ($_ - 20) * 16; - # Vector registers only support indexed register addressing. - $code .= "\tli\tr11, $offset\n"; - $code .= "\t${op}vx\tv$_, r11, $base_reg\n"; - } - # Save general registers. - foreach (14..31) { - my $offset = $base_offset + 192 + ($_ - 14) * 8; - $code .= "\t${op}d\tr$_, $offset($base_reg)\n"; - } - # Save floating point registers. - foreach (14..31) { - my $offset = $base_offset + 336 + ($_ - 14) * 8; - $code .= "\t${op}fd\tf$_, $offset($base_reg)\n"; - } -} - -sub load_regs { - my ($base_reg, $base_offset) = @_; - load_or_store_regs("l", $base_reg, $base_offset); -} - -sub store_regs { - my ($base_reg, $base_offset) = @_; - load_or_store_regs("st", $base_reg, $base_offset); -} - -my ($func, $state, $argv, $argc) = ("r3", "r4", "r5", "r6"); -$code .= <<____; -.machine "any" -.text - -# abi_test_trampoline loads callee-saved registers from |state|, calls |func| -# with |argv|, then saves the callee-saved registers into |state|. It returns -# the result of |func|. The |unwind| argument is unused. -# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state, -# const uint64_t *argv, size_t argc, -# uint64_t unwind); -.globl abi_test_trampoline -.align 5 -abi_test_trampoline: - # LR is saved into the caller's stack frame. - mflr r0 - std r0, 16(r1) - - # Allocate 66*8 = 528 bytes of stack frame. From the top of the stack - # to the bottom, the stack frame is: - # - # 0(r1) - Back chain pointer - # 8(r1) - CR save area - # 16(r1) - LR save area (for |func|) - # 24(r1) - TOC pointer save area - # 32(r1) - Saved copy of |state| - # 40(r1) - Padding - # 48(r1) - Vector register save area (v20-v31, 12 registers) - # 240(r1) - General register save area (r14-r31, 18 registers) - # 384(r1) - Floating point register save area (f14-f31, 18 registers) - # - # Note the layouts of the register save areas and CallerState match. - # - # In the ELFv2 ABI, the parameter save area is optional if the function - # is non-variadic and all parameters fit in registers. We only support - # such functions, so we omit it to test that |func| does not rely on it. - stdu r1, -528(r1) - - mfcr r0 - std r0, 8(r1) # Save CR - std r2, 24(r1) # Save TOC - std $state, 32(r1) # Save |state| -____ -# Save registers to the stack. -store_regs("r1", 48); -# Load registers from the caller. -load_regs($state, 0); -$code .= <<____; - # Load CR from |state|. - ld r0, 480($state) - mtcr r0 - - # Move parameters into temporary registers so they are not clobbered. - addi r11, $argv, -8 # Adjust for ldu below - mr r12, $func - - # Load parameters into registers. - cmpdi $argc, 0 - beq .Largs_done - mtctr $argc - ldu r3, 8(r11) - bdz .Largs_done - ldu r4, 8(r11) - bdz .Largs_done - ldu r5, 8(r11) - bdz .Largs_done - ldu r6, 8(r11) - bdz .Largs_done - ldu r7, 8(r11) - bdz .Largs_done - ldu r8, 8(r11) - bdz .Largs_done - ldu r9, 8(r11) - bdz .Largs_done - ldu r10, 8(r11) - -.Largs_done: - li r2, 0 # Clear TOC to test |func|'s global entry point - mtctr r12 - bctrl - ld r2, 24(r1) # Restore TOC - - ld $state, 32(r1) # Reload |state| -____ -# Output resulting registers to the caller. -store_regs($state, 0); -# Restore registers from the stack. -load_regs("r1", 48); -$code .= <<____; - mfcr r0 - std r0, 480($state) # Output CR to caller - ld r0, 8(r1) - mtcrf 0b00111000, r0 # Restore CR2-CR4 - addi r1, r1, 528 - ld r0, 16(r1) # Restore LR - mtlr r0 - blr -.size abi_test_trampoline,.-abi_test_trampoline -____ - -# abi_test_clobber_* clobbers the corresponding register. These are used to test -# the ABI-testing framework. -foreach (0..31) { - # r1 is the stack pointer. r13 is the thread pointer. - next if ($_ == 1 || $_ == 13); - $code .= <<____; -.globl abi_test_clobber_r$_ -.align 5 -abi_test_clobber_r$_: - li r$_, 0 - blr -.size abi_test_clobber_r$_,.-abi_test_clobber_r$_ -____ -} - -foreach (0..31) { - $code .= <<____; -.globl abi_test_clobber_f$_ -.align 4 -abi_test_clobber_f$_: - li r0, 0 - # Use the red zone. - std r0, -8(r1) - lfd f$_, -8(r1) - blr -.size abi_test_clobber_f$_,.-abi_test_clobber_f$_ -____ -} - -foreach (0..31) { - $code .= <<____; -.globl abi_test_clobber_v$_ -.align 4 -abi_test_clobber_v$_: - vxor v$_, v$_, v$_ - blr -.size abi_test_clobber_v$_,.-abi_test_clobber_v$_ -____ -} - -foreach (0..7) { - # PPC orders CR fields in big-endian, so the mask is reversed from what one - # would expect. - my $mask = 1 << (7 - $_); - $code .= <<____; -.globl abi_test_clobber_cr$_ -.align 4 -abi_test_clobber_cr$_: - # Flip the bits on cr$_ rather than setting to zero. With a four-bit - # register, zeroing it will do nothing 1 in 16 times. - mfcr r0 - not r0, r0 - mtcrf $mask, r0 - blr -.size abi_test_clobber_cr$_,.-abi_test_clobber_cr$_ -____ -} - -$code .= <<____; -.globl abi_test_clobber_ctr -.align 4 -abi_test_clobber_ctr: - li r0, 0 - mtctr r0 - blr -.size abi_test_clobber_ctr,.-abi_test_clobber_ctr - -.globl abi_test_clobber_lr -.align 4 -abi_test_clobber_lr: - mflr r0 - mtctr r0 - li r0, 0 - mtlr r0 - bctr -.size abi_test_clobber_lr,.-abi_test_clobber_lr - -____ - -print $code; -close STDOUT or die "error closing STDOUT: $!";
diff --git a/util/BUILD.toplevel b/util/BUILD.toplevel index e0d3148..c314389 100644 --- a/util/BUILD.toplevel +++ b/util/BUILD.toplevel
@@ -24,7 +24,6 @@ "crypto_sources_apple_x86_64", "crypto_sources_linux_aarch64", "crypto_sources_linux_arm", - "crypto_sources_linux_ppc64le", "crypto_sources_linux_x86", "crypto_sources_linux_x86_64", "fips_fragments", @@ -65,14 +64,6 @@ ] ] -config_setting( - name = "linux_ppc64le", - constraint_values = [ - "@platforms//os:linux", - "@platforms//cpu:ppc", - ], -) - posix_copts = [ # Assembler option --noexecstack adds .note.GNU-stack to each object to # ensure that binaries can be built with non-executable stack. @@ -110,7 +101,6 @@ # These selects must be kept in sync. crypto_sources_asm = select({ - ":linux_ppc64le": crypto_sources_linux_ppc64le, ":linux_armv7": crypto_sources_linux_arm, ":linux_arm64": crypto_sources_linux_aarch64, ":linux_x86_32": crypto_sources_linux_x86, @@ -138,7 +128,6 @@ "//conditions:default": [], }) boringssl_copts += select({ - ":linux_ppc64le": [], ":linux_armv7": [], ":linux_arm64": [], ":linux_x86_32": [],
diff --git a/util/fipstools/delocate/delocate.peg b/util/fipstools/delocate/delocate.peg index 0ffecea..6ec5f7a 100644 --- a/util/fipstools/delocate/delocate.peg +++ b/util/fipstools/delocate/delocate.peg
@@ -12,7 +12,7 @@ # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -# This is a rough parser for x86-64 and ppc64le assembly designed to work with +# This is a rough parser for x86-64 and aarch64 assembly designed to work with # https://github.com/pointlander/peg. delocate.go has a go:generate line for # rebuilding delocate.peg.go from this file.
diff --git a/util/generate_build_files.py b/util/generate_build_files.py index 4a93a7f..c319a55 100644 --- a/util/generate_build_files.py +++ b/util/generate_build_files.py
@@ -35,7 +35,6 @@ ('apple', 'x86_64', 'macosx', [], 'S'), ('linux', 'arm', 'linux32', [], 'S'), ('linux', 'aarch64', 'linux64', [], 'S'), - ('linux', 'ppc64le', 'linux64le', [], 'S'), ('linux', 'x86', 'elf', ['-fPIC', '-DOPENSSL_IA32_SSE2'], 'S'), ('linux', 'x86_64', 'elf', [], 'S'), ('win', 'x86', 'win32n', ['-DOPENSSL_IA32_SSE2'], 'asm'), @@ -142,7 +141,7 @@ if asm_outputs: blueprint.write(' target: {\n') for ((osname, arch), asm_files) in asm_outputs: - if osname != 'linux' or arch == 'ppc64le': + if osname != 'linux': continue if arch == 'aarch64': arch = 'arm64' @@ -480,8 +479,6 @@ elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "mips") # Just to avoid the “unknown processor” error. set(ARCH "generic") -elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le") - set(ARCH "ppc64le") else() message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR}) endif()