Remove ppc64le assembly.
We no longer have a need to support ppc64le, nor do we have any testing
story for the assembly we previously had. Remove all ppc64le-specific
assembly.
This CL stops short of removing it from base.h. That'll be done in a
follow-up CL, just to separate which removals are for the assembly and
which removals remove all support.
Update-Note: After this change, ppc64le builds drop assembly
optimizations and will fallback to a generic C-based AES implementation.
Change-Id: Ic8075638085761d66cebc276eb16c4770ce03920
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/56388
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/cmake/perlasm.cmake b/cmake/perlasm.cmake
index b9530cf..9828799 100644
--- a/cmake/perlasm.cmake
+++ b/cmake/perlasm.cmake
@@ -17,7 +17,6 @@
DEPENDS
${src}
${PROJECT_SOURCE_DIR}/crypto/perlasm/arm-xlate.pl
- ${PROJECT_SOURCE_DIR}/crypto/perlasm/ppc-xlate.pl
${PROJECT_SOURCE_DIR}/crypto/perlasm/x86_64-xlate.pl
${PROJECT_SOURCE_DIR}/crypto/perlasm/x86asm.pl
${PROJECT_SOURCE_DIR}/crypto/perlasm/x86gas.pl
@@ -41,9 +40,6 @@
add_perlasm_target("${dest}-apple.S" ${src} ios32)
add_perlasm_target("${dest}-linux.S" ${src} linux32)
append_to_parent_scope("${var}_ASM" "${dest}-apple.S" "${dest}-linux.S")
- elseif(arch STREQUAL "ppc64le")
- add_perlasm_target("${dest}-linux.S" ${src} linux64le)
- append_to_parent_scope("${var}_ASM" "${dest}-linux.S")
elseif(arch STREQUAL "x86")
add_perlasm_target("${dest}-apple.S" ${src} macosx -fPIC -DOPENSSL_IA32_SSE2)
add_perlasm_target("${dest}-linux.S" ${src} elf -fPIC -DOPENSSL_IA32_SSE2)
diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
index e1634a4..ec6d2ee 100644
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt
@@ -24,7 +24,6 @@
perlasm(CRYPTO_SOURCES aarch64 test/trampoline-armv8 test/asm/trampoline-armv8.pl)
perlasm(CRYPTO_SOURCES arm chacha/chacha-armv4 chacha/asm/chacha-armv4.pl)
perlasm(CRYPTO_SOURCES arm test/trampoline-armv4 test/asm/trampoline-armv4.pl)
-perlasm(CRYPTO_SOURCES ppc64le test/trampoline-ppc test/asm/trampoline-ppc.pl)
perlasm(CRYPTO_SOURCES x86 chacha/chacha-x86 chacha/asm/chacha-x86.pl)
perlasm(CRYPTO_SOURCES x86 test/trampoline-x86 test/asm/trampoline-x86.pl)
perlasm(CRYPTO_SOURCES x86_64 chacha/chacha-x86_64 chacha/asm/chacha-x86_64.pl)
@@ -135,7 +134,6 @@
cpu_arm_linux.c
cpu_arm.c
cpu_intel.c
- cpu_ppc64le.c
crypto.c
curve25519/curve25519.c
curve25519/spake25519.c
diff --git a/crypto/abi_self_test.cc b/crypto/abi_self_test.cc
index 9681498..a42bd1d 100644
--- a/crypto/abi_self_test.cc
+++ b/crypto/abi_self_test.cc
@@ -521,289 +521,3 @@
CHECK_ABI_NO_UNWIND(abi_test_clobber_v15_upper);
}
#endif // OPENSSL_AARCH64 && SUPPORTS_ABI_TEST
-
-#if defined(OPENSSL_PPC64LE) && defined(SUPPORTS_ABI_TEST)
-extern "C" {
-void abi_test_clobber_r0(void);
-// r1 is the stack pointer.
-void abi_test_clobber_r2(void);
-void abi_test_clobber_r3(void);
-void abi_test_clobber_r4(void);
-void abi_test_clobber_r5(void);
-void abi_test_clobber_r6(void);
-void abi_test_clobber_r7(void);
-void abi_test_clobber_r8(void);
-void abi_test_clobber_r9(void);
-void abi_test_clobber_r10(void);
-void abi_test_clobber_r11(void);
-void abi_test_clobber_r12(void);
-// r13 is the thread pointer.
-void abi_test_clobber_r14(void);
-void abi_test_clobber_r15(void);
-void abi_test_clobber_r16(void);
-void abi_test_clobber_r17(void);
-void abi_test_clobber_r18(void);
-void abi_test_clobber_r19(void);
-void abi_test_clobber_r20(void);
-void abi_test_clobber_r21(void);
-void abi_test_clobber_r22(void);
-void abi_test_clobber_r23(void);
-void abi_test_clobber_r24(void);
-void abi_test_clobber_r25(void);
-void abi_test_clobber_r26(void);
-void abi_test_clobber_r27(void);
-void abi_test_clobber_r28(void);
-void abi_test_clobber_r29(void);
-void abi_test_clobber_r30(void);
-void abi_test_clobber_r31(void);
-
-void abi_test_clobber_f0(void);
-void abi_test_clobber_f1(void);
-void abi_test_clobber_f2(void);
-void abi_test_clobber_f3(void);
-void abi_test_clobber_f4(void);
-void abi_test_clobber_f5(void);
-void abi_test_clobber_f6(void);
-void abi_test_clobber_f7(void);
-void abi_test_clobber_f8(void);
-void abi_test_clobber_f9(void);
-void abi_test_clobber_f10(void);
-void abi_test_clobber_f11(void);
-void abi_test_clobber_f12(void);
-void abi_test_clobber_f13(void);
-void abi_test_clobber_f14(void);
-void abi_test_clobber_f15(void);
-void abi_test_clobber_f16(void);
-void abi_test_clobber_f17(void);
-void abi_test_clobber_f18(void);
-void abi_test_clobber_f19(void);
-void abi_test_clobber_f20(void);
-void abi_test_clobber_f21(void);
-void abi_test_clobber_f22(void);
-void abi_test_clobber_f23(void);
-void abi_test_clobber_f24(void);
-void abi_test_clobber_f25(void);
-void abi_test_clobber_f26(void);
-void abi_test_clobber_f27(void);
-void abi_test_clobber_f28(void);
-void abi_test_clobber_f29(void);
-void abi_test_clobber_f30(void);
-void abi_test_clobber_f31(void);
-
-void abi_test_clobber_v0(void);
-void abi_test_clobber_v1(void);
-void abi_test_clobber_v2(void);
-void abi_test_clobber_v3(void);
-void abi_test_clobber_v4(void);
-void abi_test_clobber_v5(void);
-void abi_test_clobber_v6(void);
-void abi_test_clobber_v7(void);
-void abi_test_clobber_v8(void);
-void abi_test_clobber_v9(void);
-void abi_test_clobber_v10(void);
-void abi_test_clobber_v11(void);
-void abi_test_clobber_v12(void);
-void abi_test_clobber_v13(void);
-void abi_test_clobber_v14(void);
-void abi_test_clobber_v15(void);
-void abi_test_clobber_v16(void);
-void abi_test_clobber_v17(void);
-void abi_test_clobber_v18(void);
-void abi_test_clobber_v19(void);
-void abi_test_clobber_v20(void);
-void abi_test_clobber_v21(void);
-void abi_test_clobber_v22(void);
-void abi_test_clobber_v23(void);
-void abi_test_clobber_v24(void);
-void abi_test_clobber_v25(void);
-void abi_test_clobber_v26(void);
-void abi_test_clobber_v27(void);
-void abi_test_clobber_v28(void);
-void abi_test_clobber_v29(void);
-void abi_test_clobber_v30(void);
-void abi_test_clobber_v31(void);
-
-void abi_test_clobber_cr0(void);
-void abi_test_clobber_cr1(void);
-void abi_test_clobber_cr2(void);
-void abi_test_clobber_cr3(void);
-void abi_test_clobber_cr4(void);
-void abi_test_clobber_cr5(void);
-void abi_test_clobber_cr6(void);
-void abi_test_clobber_cr7(void);
-
-void abi_test_clobber_ctr(void);
-void abi_test_clobber_lr(void);
-
-} // extern "C"
-
-TEST(ABITest, PPC64LE) {
- // abi_test_trampoline hides unsaved registers from the caller, so we can
- // safely call the abi_test_clobber_* functions below.
- abi_test::internal::CallerState state;
- RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state));
- CHECK_ABI_NO_UNWIND(abi_test_trampoline,
- reinterpret_cast<crypto_word_t>(abi_test_clobber_r14),
- &state, nullptr, 0, 0 /* no breakpoint */);
-
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r0);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r2);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r3);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r4);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r5);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r6);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r7);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r8);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r9);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r10);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r11);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_r12);
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r14),
- "r14 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r15),
- "r15 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r16),
- "r16 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r17),
- "r17 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r18),
- "r18 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r19),
- "r19 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r20),
- "r20 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r21),
- "r21 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r22),
- "r22 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r23),
- "r23 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r24),
- "r24 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r25),
- "r25 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r26),
- "r26 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r27),
- "r27 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r28),
- "r28 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r29),
- "r29 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r30),
- "r30 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r31),
- "r31 was not restored after return");
-
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f0);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f1);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f2);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f3);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f4);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f5);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f6);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f7);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f8);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f9);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f10);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f11);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f12);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_f13);
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f14),
- "f14 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f15),
- "f15 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f16),
- "f16 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f17),
- "f17 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f18),
- "f18 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f19),
- "f19 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f20),
- "f20 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f21),
- "f21 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f22),
- "f22 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f23),
- "f23 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f24),
- "f24 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f25),
- "f25 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f26),
- "f26 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f27),
- "f27 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f28),
- "f28 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f29),
- "f29 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f30),
- "f30 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f31),
- "f31 was not restored after return");
-
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v0);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v1);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v2);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v3);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v4);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v5);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v6);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v7);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v8);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v9);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v10);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v11);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v12);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v13);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v14);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v15);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v16);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v17);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v18);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_v19);
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v20),
- "v20 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v21),
- "v21 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v22),
- "v22 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v23),
- "v23 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v24),
- "v24 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v25),
- "v25 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v26),
- "v26 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v27),
- "v27 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v28),
- "v28 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v29),
- "v29 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v30),
- "v30 was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v31),
- "v31 was not restored after return");
-
- CHECK_ABI_NO_UNWIND(abi_test_clobber_cr0);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_cr1);
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr2),
- "cr was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr3),
- "cr was not restored after return");
- EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr4),
- "cr was not restored after return");
- CHECK_ABI_NO_UNWIND(abi_test_clobber_cr5);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_cr6);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_cr7);
-
- CHECK_ABI_NO_UNWIND(abi_test_clobber_ctr);
- CHECK_ABI_NO_UNWIND(abi_test_clobber_lr);
-}
-#endif // OPENSSL_PPC64LE && SUPPORTS_ABI_TEST
diff --git a/crypto/cpu_ppc64le.c b/crypto/cpu_ppc64le.c
deleted file mode 100644
index a802e37..0000000
--- a/crypto/cpu_ppc64le.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright (c) 2016, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-
-#include <openssl/base.h>
-
-#if defined(OPENSSL_PPC64LE)
-
-#include <sys/auxv.h>
-
-#include "internal.h"
-
-
-#if !defined(PPC_FEATURE2_HAS_VCRYPTO)
-// PPC_FEATURE2_HAS_VCRYPTO was taken from section 4.1.2.3 of the “OpenPOWER
-// ABI for Linux Supplement”.
-#define PPC_FEATURE2_HAS_VCRYPTO 0x02000000
-#endif
-
-void OPENSSL_cpuid_setup(void) {
- OPENSSL_ppc64le_hwcap2 = getauxval(AT_HWCAP2);
-}
-
-int CRYPTO_is_PPC64LE_vcrypto_capable(void) {
- return (OPENSSL_ppc64le_hwcap2 & PPC_FEATURE2_HAS_VCRYPTO) != 0;
-}
-
-#endif // OPENSSL_PPC64LE
diff --git a/crypto/crypto.c b/crypto/crypto.c
index 12cbb888..beaae0f 100644
--- a/crypto/crypto.c
+++ b/crypto/crypto.c
@@ -25,12 +25,10 @@
"ossl_ssize_t should be the same size as size_t");
#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_STATIC_ARMCAP) && \
- (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
- defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
- defined(OPENSSL_PPC64LE))
-// x86, x86_64, the ARMs and ppc64le need to record the result of a
-// cpuid/getauxval call for the asm to work correctly, unless compiled without
-// asm code.
+ (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
+ defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
+// x86, x86_64, and the ARMs need to record the result of a cpuid/getauxval call
+// for the asm to work correctly, unless compiled without asm code.
#define NEED_CPUID
#else
@@ -41,8 +39,7 @@
#define BORINGSSL_NO_STATIC_INITIALIZER
#endif
-#endif // !NO_ASM && !STATIC_ARMCAP &&
- // (X86 || X86_64 || ARM || AARCH64 || PPC64LE)
+#endif // !NO_ASM && !STATIC_ARMCAP && (X86 || X86_64 || ARM || AARCH64)
// Our assembly does not use the GOT to reference symbols, which means
@@ -81,10 +78,6 @@
// This value must be explicitly initialized to zero. See similar comment above.
HIDDEN uint32_t OPENSSL_ia32cap_P[4] = {0};
-#elif defined(OPENSSL_PPC64LE)
-
-HIDDEN unsigned long OPENSSL_ppc64le_hwcap2 = 0;
-
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
#include <openssl/arm_arch.h>
diff --git a/crypto/fipsmodule/CMakeLists.txt b/crypto/fipsmodule/CMakeLists.txt
index 82505b7..2bfadab 100644
--- a/crypto/fipsmodule/CMakeLists.txt
+++ b/crypto/fipsmodule/CMakeLists.txt
@@ -20,8 +20,6 @@
perlasm(BCM_SOURCES arm sha256-armv4 sha/asm/sha256-armv4.pl)
perlasm(BCM_SOURCES arm sha512-armv4 sha/asm/sha512-armv4.pl)
perlasm(BCM_SOURCES arm vpaes-armv7 aes/asm/vpaes-armv7.pl)
-perlasm(BCM_SOURCES ppc64le aesp8-ppc aes/asm/aesp8-ppc.pl)
-perlasm(BCM_SOURCES ppc64le ghashp8-ppc modes/asm/ghashp8-ppc.pl)
perlasm(BCM_SOURCES x86 aesni-x86 aes/asm/aesni-x86.pl)
perlasm(BCM_SOURCES x86 bn-586 bn/asm/bn-586.pl)
perlasm(BCM_SOURCES x86 co-586 bn/asm/co-586.pl)
diff --git a/crypto/fipsmodule/aes/asm/aesp8-ppc.pl b/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
deleted file mode 100644
index 061f6b7..0000000
--- a/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
+++ /dev/null
@@ -1,3809 +0,0 @@
-#! /usr/bin/env perl
-# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License"). You may not use
-# this file except in compliance with the License. You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# This module implements support for AES instructions as per PowerISA
-# specification version 2.07, first implemented by POWER8 processor.
-# The module is endian-agnostic in sense that it supports both big-
-# and little-endian cases. Data alignment in parallelizable modes is
-# handled with VSX loads and stores, which implies MSR.VSX flag being
-# set. It should also be noted that ISA specification doesn't prohibit
-# alignment exceptions for these instructions on page boundaries.
-# Initially alignment was handled in pure AltiVec/VMX way [when data
-# is aligned programmatically, which in turn guarantees exception-
-# free execution], but it turned to hamper performance when vcipher
-# instructions are interleaved. It's reckoned that eventual
-# misalignment penalties at page boundaries are in average lower
-# than additional overhead in pure AltiVec approach.
-#
-# May 2016
-#
-# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
-# systems were measured.
-#
-######################################################################
-# Current large-block performance in cycles per byte processed with
-# 128-bit key (less is better).
-#
-# CBC en-/decrypt CTR XTS
-# POWER8[le] 3.96/0.72 0.74 1.1
-# POWER8[be] 3.75/0.65 0.66 1.0
-# POWER9[le] 4.02/0.86 0.84 1.05
-# POWER9[be] 3.99/0.78 0.79 0.97
-
-$flavour = shift;
-$output = shift;
-
-if ($flavour =~ /64/) {
- $SIZE_T =8;
- $LRSAVE =2*$SIZE_T;
- $STU ="stdu";
- $POP ="ld";
- $PUSH ="std";
- $UCMP ="cmpld";
- $SHL ="sldi";
-} elsif ($flavour =~ /32/) {
- $SIZE_T =4;
- $LRSAVE =$SIZE_T;
- $STU ="stwu";
- $POP ="lwz";
- $PUSH ="stw";
- $UCMP ="cmplw";
- $SHL ="slwi";
-} else { die "nonsense $flavour"; }
-
-$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!";
-*STDOUT=*OUT;
-
-$FRAME=8*$SIZE_T;
-$prefix="aes_hw";
-
-$sp="r1";
-$vrsave="r12";
-
-#########################################################################
-{{{ # Key setup procedures #
-my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
-my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
-my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
-
-$code.=<<___;
-.machine "any"
-
-.text
-
-.align 7
-Lrcon:
-.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
-.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
-.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
-.long 0,0,0,0 ?asis
-Lconsts:
- mflr r0
- bcl 20,31,\$+4
- mflr $ptr #vvvvv "distance between . and rcon
- addi $ptr,$ptr,-0x48
- mtlr r0
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
-.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-
-.globl .${prefix}_set_encrypt_key
-.align 5
-.${prefix}_set_encrypt_key:
-Lset_encrypt_key:
- mflr r11
- $PUSH r11,$LRSAVE($sp)
-
- li $ptr,-1
- ${UCMP}i $inp,0
- beq- Lenc_key_abort # if ($inp==0) return -1;
- ${UCMP}i $out,0
- beq- Lenc_key_abort # if ($out==0) return -1;
- li $ptr,-2
- cmpwi $bits,128
- blt- Lenc_key_abort
- cmpwi $bits,256
- bgt- Lenc_key_abort
- andi. r0,$bits,0x3f
- bne- Lenc_key_abort
-
- lis r0,0xfff0
- mfspr $vrsave,256
- mtspr 256,r0
-
- bl Lconsts
- mtlr r11
-
- neg r9,$inp
- lvx $in0,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- lvsr $key,0,r9 # borrow $key
- li r8,0x20
- cmpwi $bits,192
- lvx $in1,0,$inp
- le?vspltisb $mask,0x0f # borrow $mask
- lvx $rcon,0,$ptr
- le?vxor $key,$key,$mask # adjust for byte swap
- lvx $mask,r8,$ptr
- addi $ptr,$ptr,0x10
- vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
- li $cnt,8
- vxor $zero,$zero,$zero
- mtctr $cnt
-
- ?lvsr $outperm,0,$out
- vspltisb $outmask,-1
- lvx $outhead,0,$out
- ?vperm $outmask,$zero,$outmask,$outperm
-
- blt Loop128
- addi $inp,$inp,8
- beq L192
- addi $inp,$inp,8
- b L256
-
-.align 4
-Loop128:
- vperm $key,$in0,$in0,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vadduwm $rcon,$rcon,$rcon
- vxor $in0,$in0,$key
- bdnz Loop128
-
- lvx $rcon,0,$ptr # last two round keys
-
- vperm $key,$in0,$in0,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vadduwm $rcon,$rcon,$rcon
- vxor $in0,$in0,$key
-
- vperm $key,$in0,$in0,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vxor $in0,$in0,$key
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
-
- addi $inp,$out,15 # 15 is not typo
- addi $out,$out,0x50
-
- li $rounds,10
- b Ldone
-
-.align 4
-L192:
- lvx $tmp,0,$inp
- li $cnt,4
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $out,$out,16
- vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
- vspltisb $key,8 # borrow $key
- mtctr $cnt
- vsububm $mask,$mask,$key # adjust the mask
-
-Loop192:
- vperm $key,$in1,$in1,$mask # roate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vcipherlast $key,$key,$rcon
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
-
- vsldoi $stage,$zero,$in1,8
- vspltw $tmp,$in0,3
- vxor $tmp,$tmp,$in1
- vsldoi $in1,$zero,$in1,12 # >>32
- vadduwm $rcon,$rcon,$rcon
- vxor $in1,$in1,$tmp
- vxor $in0,$in0,$key
- vxor $in1,$in1,$key
- vsldoi $stage,$stage,$in0,8
-
- vperm $key,$in1,$in1,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$stage,$stage,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vsldoi $stage,$in0,$in1,8
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vperm $outtail,$stage,$stage,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- stvx $stage,0,$out
- addi $out,$out,16
-
- vspltw $tmp,$in0,3
- vxor $tmp,$tmp,$in1
- vsldoi $in1,$zero,$in1,12 # >>32
- vadduwm $rcon,$rcon,$rcon
- vxor $in1,$in1,$tmp
- vxor $in0,$in0,$key
- vxor $in1,$in1,$key
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $inp,$out,15 # 15 is not typo
- addi $out,$out,16
- bdnz Loop192
-
- li $rounds,12
- addi $out,$out,0x20
- b Ldone
-
-.align 4
-L256:
- lvx $tmp,0,$inp
- li $cnt,7
- li $rounds,14
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $out,$out,16
- vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
- mtctr $cnt
-
-Loop256:
- vperm $key,$in1,$in1,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in1,$in1,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vadduwm $rcon,$rcon,$rcon
- vxor $in0,$in0,$key
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $inp,$out,15 # 15 is not typo
- addi $out,$out,16
- bdz Ldone
-
- vspltw $key,$in0,3 # just splat
- vsldoi $tmp,$zero,$in1,12 # >>32
- vsbox $key,$key
-
- vxor $in1,$in1,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in1,$in1,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in1,$in1,$tmp
-
- vxor $in1,$in1,$key
- b Loop256
-
-.align 4
-Ldone:
- lvx $in1,0,$inp # redundant in aligned case
- vsel $in1,$outhead,$in1,$outmask
- stvx $in1,0,$inp
- li $ptr,0
- mtspr 256,$vrsave
- stw $rounds,0($out)
-
-Lenc_key_abort:
- mr r3,$ptr
- blr
- .long 0
- .byte 0,12,0x14,1,0,0,3,0
- .long 0
-.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
-
-.globl .${prefix}_set_decrypt_key
-.align 5
-.${prefix}_set_decrypt_key:
- $STU $sp,-$FRAME($sp)
- mflr r10
- $PUSH r10,`$FRAME+$LRSAVE`($sp)
- bl Lset_encrypt_key
- mtlr r10
-
- cmpwi r3,0
- bne- Ldec_key_abort
-
- slwi $cnt,$rounds,4
- subi $inp,$out,240 # first round key
- srwi $rounds,$rounds,1
- add $out,$inp,$cnt # last round key
- mtctr $rounds
-
-Ldeckey:
- lwz r0, 0($inp)
- lwz r6, 4($inp)
- lwz r7, 8($inp)
- lwz r8, 12($inp)
- addi $inp,$inp,16
- lwz r9, 0($out)
- lwz r10,4($out)
- lwz r11,8($out)
- lwz r12,12($out)
- stw r0, 0($out)
- stw r6, 4($out)
- stw r7, 8($out)
- stw r8, 12($out)
- subi $out,$out,16
- stw r9, -16($inp)
- stw r10,-12($inp)
- stw r11,-8($inp)
- stw r12,-4($inp)
- bdnz Ldeckey
-
- xor r3,r3,r3 # return value
-Ldec_key_abort:
- addi $sp,$sp,$FRAME
- blr
- .long 0
- .byte 0,12,4,1,0x80,0,3,0
- .long 0
-.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
-___
-}}}
-#########################################################################
-{{{ # Single block en- and decrypt procedures #
-sub gen_block () {
-my $dir = shift;
-my $n = $dir eq "de" ? "n" : "";
-my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
-
-$code.=<<___;
-.globl .${prefix}_${dir}crypt
-.align 5
-.${prefix}_${dir}crypt:
- lwz $rounds,240($key)
- lis r0,0xfc00
- mfspr $vrsave,256
- li $idx,15 # 15 is not typo
- mtspr 256,r0
-
- lvx v0,0,$inp
- neg r11,$out
- lvx v1,$idx,$inp
- lvsl v2,0,$inp # inpperm
- le?vspltisb v4,0x0f
- ?lvsl v3,0,r11 # outperm
- le?vxor v2,v2,v4
- li $idx,16
- vperm v0,v0,v1,v2 # align [and byte swap in LE]
- lvx v1,0,$key
- ?lvsl v5,0,$key # keyperm
- srwi $rounds,$rounds,1
- lvx v2,$idx,$key
- addi $idx,$idx,16
- subi $rounds,$rounds,1
- ?vperm v1,v1,v2,v5 # align round key
-
- vxor v0,v0,v1
- lvx v1,$idx,$key
- addi $idx,$idx,16
- mtctr $rounds
-
-Loop_${dir}c:
- ?vperm v2,v2,v1,v5
- v${n}cipher v0,v0,v2
- lvx v2,$idx,$key
- addi $idx,$idx,16
- ?vperm v1,v1,v2,v5
- v${n}cipher v0,v0,v1
- lvx v1,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_${dir}c
-
- ?vperm v2,v2,v1,v5
- v${n}cipher v0,v0,v2
- lvx v2,$idx,$key
- ?vperm v1,v1,v2,v5
- v${n}cipherlast v0,v0,v1
-
- vspltisb v2,-1
- vxor v1,v1,v1
- li $idx,15 # 15 is not typo
- ?vperm v2,v1,v2,v3 # outmask
- le?vxor v3,v3,v4
- lvx v1,0,$out # outhead
- vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
- vsel v1,v1,v0,v2
- lvx v4,$idx,$out
- stvx v1,0,$out
- vsel v0,v0,v4,v2
- stvx v0,$idx,$out
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,3,0
- .long 0
-.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
-___
-}
-&gen_block("en");
-&gen_block("de");
-}}}
-#########################################################################
-{{{ # CBC en- and decrypt procedures #
-my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
- map("v$_",(4..10));
-$code.=<<___;
-.globl .${prefix}_cbc_encrypt
-.align 5
-.${prefix}_cbc_encrypt:
- ${UCMP}i $len,16
- bltlr-
-
- cmpwi $enc,0 # test direction
- lis r0,0xffe0
- mfspr $vrsave,256
- mtspr 256,r0
-
- li $idx,15
- vxor $rndkey0,$rndkey0,$rndkey0
- le?vspltisb $tmp,0x0f
-
- lvx $ivec,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- le?vxor $inpperm,$inpperm,$tmp
- vperm $ivec,$ivec,$inptail,$inpperm
-
- neg r11,$inp
- ?lvsl $keyperm,0,$key # prepare for unaligned key
- lwz $rounds,240($key)
-
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inptail,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- ?lvsr $outperm,0,$out # prepare for unaligned store
- vspltisb $outmask,-1
- lvx $outhead,0,$out
- ?vperm $outmask,$rndkey0,$outmask,$outperm
- le?vxor $outperm,$outperm,$tmp
-
- srwi $rounds,$rounds,1
- li $idx,16
- subi $rounds,$rounds,1
- beq Lcbc_dec
-
-Lcbc_enc:
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- mtctr $rounds
- subi $len,$len,16 # len-=16
-
- lvx $rndkey0,0,$key
- vperm $inout,$inout,$inptail,$inpperm
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- vxor $inout,$inout,$ivec
-
-Loop_cbc_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_cbc_enc
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipherlast $ivec,$inout,$rndkey0
- ${UCMP}i $len,16
-
- vperm $tmp,$ivec,$ivec,$outperm
- vsel $inout,$outhead,$tmp,$outmask
- vmr $outhead,$tmp
- stvx $inout,0,$out
- addi $out,$out,16
- bge Lcbc_enc
-
- b Lcbc_done
-
-.align 4
-Lcbc_dec:
- ${UCMP}i $len,128
- bge _aesp8_cbc_decrypt8x
- vmr $tmp,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- mtctr $rounds
- subi $len,$len,16 # len-=16
-
- lvx $rndkey0,0,$key
- vperm $tmp,$tmp,$inptail,$inpperm
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$tmp,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
-
-Loop_cbc_dec:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_cbc_dec
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipherlast $inout,$inout,$rndkey0
- ${UCMP}i $len,16
-
- vxor $inout,$inout,$ivec
- vmr $ivec,$tmp
- vperm $tmp,$inout,$inout,$outperm
- vsel $inout,$outhead,$tmp,$outmask
- vmr $outhead,$tmp
- stvx $inout,0,$out
- addi $out,$out,16
- bge Lcbc_dec
-
-Lcbc_done:
- addi $out,$out,-1
- lvx $inout,0,$out # redundant in aligned case
- vsel $inout,$outhead,$inout,$outmask
- stvx $inout,0,$out
-
- neg $enc,$ivp # write [unaligned] iv
- li $idx,15 # 15 is not typo
- vxor $rndkey0,$rndkey0,$rndkey0
- vspltisb $outmask,-1
- le?vspltisb $tmp,0x0f
- ?lvsl $outperm,0,$enc
- ?vperm $outmask,$rndkey0,$outmask,$outperm
- le?vxor $outperm,$outperm,$tmp
- lvx $outhead,0,$ivp
- vperm $ivec,$ivec,$ivec,$outperm
- vsel $inout,$outhead,$ivec,$outmask
- lvx $inptail,$idx,$ivp
- stvx $inout,0,$ivp
- vsel $inout,$ivec,$inptail,$outmask
- stvx $inout,$idx,$ivp
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,6,0
- .long 0
-___
-#########################################################################
-{{ # Optimized CBC decrypt procedure #
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
- $x00=0 if ($flavour =~ /osx/);
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
-my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
- # v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
-
-$code.=<<___;
-.align 5
-_aesp8_cbc_decrypt8x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- li r10,`$FRAME+8*16+15`
- li r11,`$FRAME+8*16+31`
- stvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- stvx v21,r11,$sp
- addi r11,r11,32
- stvx v22,r10,$sp
- addi r10,r10,32
- stvx v23,r11,$sp
- addi r11,r11,32
- stvx v24,r10,$sp
- addi r10,r10,32
- stvx v25,r11,$sp
- addi r11,r11,32
- stvx v26,r10,$sp
- addi r10,r10,32
- stvx v27,r11,$sp
- addi r11,r11,32
- stvx v28,r10,$sp
- addi r10,r10,32
- stvx v29,r11,$sp
- addi r11,r11,32
- stvx v30,r10,$sp
- stvx v31,r11,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- subi $rounds,$rounds,3 # -4 in total
- subi $len,$len,128 # bias
-
- lvx $rndkey0,$x00,$key # load key schedule
- lvx v30,$x10,$key
- addi $key,$key,0x20
- lvx v31,$x00,$key
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,`$FRAME+15`
- mtctr $rounds
-
-Load_cbc_dec_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key
- addi $key,$key,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_cbc_dec_key
-
- lvx v26,$x10,$key
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key
- ?vperm v29,v29,v30,$keyperm
- lvx $out0,$x70,$key # borrow $out0
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$out0,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- #lvx $inptail,0,$inp # "caller" already did this
- #addi $inp,$inp,15 # 15 is not typo
- subi $inp,$inp,15 # undo "caller"
-
- le?li $idx,8
- lvx_u $in0,$x00,$inp # load first 8 "words"
- le?lvsl $inpperm,0,$idx
- le?vspltisb $tmp,0x0f
- lvx_u $in1,$x10,$inp
- le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
- lvx_u $in2,$x20,$inp
- le?vperm $in0,$in0,$in0,$inpperm
- lvx_u $in3,$x30,$inp
- le?vperm $in1,$in1,$in1,$inpperm
- lvx_u $in4,$x40,$inp
- le?vperm $in2,$in2,$in2,$inpperm
- vxor $out0,$in0,$rndkey0
- lvx_u $in5,$x50,$inp
- le?vperm $in3,$in3,$in3,$inpperm
- vxor $out1,$in1,$rndkey0
- lvx_u $in6,$x60,$inp
- le?vperm $in4,$in4,$in4,$inpperm
- vxor $out2,$in2,$rndkey0
- lvx_u $in7,$x70,$inp
- addi $inp,$inp,0x80
- le?vperm $in5,$in5,$in5,$inpperm
- vxor $out3,$in3,$rndkey0
- le?vperm $in6,$in6,$in6,$inpperm
- vxor $out4,$in4,$rndkey0
- le?vperm $in7,$in7,$in7,$inpperm
- vxor $out5,$in5,$rndkey0
- vxor $out6,$in6,$rndkey0
- vxor $out7,$in7,$rndkey0
-
- mtctr $rounds
- b Loop_cbc_dec8x
-.align 5
-Loop_cbc_dec8x:
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_cbc_dec8x
-
- subic $len,$len,128 # $len-=128
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
-
- subfe. r0,r0,r0 # borrow?-1:0
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
-
- and r0,r0,$len
- vncipher $out0,$out0,v26
- vncipher $out1,$out1,v26
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- vncipher $out4,$out4,v26
- vncipher $out5,$out5,v26
- vncipher $out6,$out6,v26
- vncipher $out7,$out7,v26
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in7 are loaded
- # with last "words"
- vncipher $out0,$out0,v27
- vncipher $out1,$out1,v27
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vncipher $out4,$out4,v27
- vncipher $out5,$out5,v27
- vncipher $out6,$out6,v27
- vncipher $out7,$out7,v27
-
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- vncipher $out0,$out0,v28
- vncipher $out1,$out1,v28
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vncipher $out4,$out4,v28
- vncipher $out5,$out5,v28
- vncipher $out6,$out6,v28
- vncipher $out7,$out7,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vncipher $out0,$out0,v29
- vncipher $out1,$out1,v29
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vncipher $out4,$out4,v29
- vncipher $out5,$out5,v29
- vncipher $out6,$out6,v29
- vncipher $out7,$out7,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
-
- vncipher $out0,$out0,v30
- vxor $ivec,$ivec,v31 # xor with last round key
- vncipher $out1,$out1,v30
- vxor $in0,$in0,v31
- vncipher $out2,$out2,v30
- vxor $in1,$in1,v31
- vncipher $out3,$out3,v30
- vxor $in2,$in2,v31
- vncipher $out4,$out4,v30
- vxor $in3,$in3,v31
- vncipher $out5,$out5,v30
- vxor $in4,$in4,v31
- vncipher $out6,$out6,v30
- vxor $in5,$in5,v31
- vncipher $out7,$out7,v30
- vxor $in6,$in6,v31
-
- vncipherlast $out0,$out0,$ivec
- vncipherlast $out1,$out1,$in0
- lvx_u $in0,$x00,$inp # load next input block
- vncipherlast $out2,$out2,$in1
- lvx_u $in1,$x10,$inp
- vncipherlast $out3,$out3,$in2
- le?vperm $in0,$in0,$in0,$inpperm
- lvx_u $in2,$x20,$inp
- vncipherlast $out4,$out4,$in3
- le?vperm $in1,$in1,$in1,$inpperm
- lvx_u $in3,$x30,$inp
- vncipherlast $out5,$out5,$in4
- le?vperm $in2,$in2,$in2,$inpperm
- lvx_u $in4,$x40,$inp
- vncipherlast $out6,$out6,$in5
- le?vperm $in3,$in3,$in3,$inpperm
- lvx_u $in5,$x50,$inp
- vncipherlast $out7,$out7,$in6
- le?vperm $in4,$in4,$in4,$inpperm
- lvx_u $in6,$x60,$inp
- vmr $ivec,$in7
- le?vperm $in5,$in5,$in5,$inpperm
- lvx_u $in7,$x70,$inp
- addi $inp,$inp,0x80
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $in6,$in6,$in6,$inpperm
- vxor $out0,$in0,$rndkey0
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $in7,$in7,$in7,$inpperm
- vxor $out1,$in1,$rndkey0
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- vxor $out2,$in2,$rndkey0
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- vxor $out3,$in3,$rndkey0
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- vxor $out4,$in4,$rndkey0
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x50,$out
- vxor $out5,$in5,$rndkey0
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x60,$out
- vxor $out6,$in6,$rndkey0
- stvx_u $out7,$x70,$out
- addi $out,$out,0x80
- vxor $out7,$in7,$rndkey0
-
- mtctr $rounds
- beq Loop_cbc_dec8x # did $len-=128 borrow?
-
- addic. $len,$len,128
- beq Lcbc_dec8x_done
- nop
- nop
-
-Loop_cbc_dec8x_tail: # up to 7 "words" tail...
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_cbc_dec8x_tail
-
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
-
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
-
- vncipher $out1,$out1,v26
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- vncipher $out4,$out4,v26
- vncipher $out5,$out5,v26
- vncipher $out6,$out6,v26
- vncipher $out7,$out7,v26
-
- vncipher $out1,$out1,v27
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vncipher $out4,$out4,v27
- vncipher $out5,$out5,v27
- vncipher $out6,$out6,v27
- vncipher $out7,$out7,v27
-
- vncipher $out1,$out1,v28
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vncipher $out4,$out4,v28
- vncipher $out5,$out5,v28
- vncipher $out6,$out6,v28
- vncipher $out7,$out7,v28
-
- vncipher $out1,$out1,v29
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vncipher $out4,$out4,v29
- vncipher $out5,$out5,v29
- vncipher $out6,$out6,v29
- vncipher $out7,$out7,v29
-
- vncipher $out1,$out1,v30
- vxor $ivec,$ivec,v31 # last round key
- vncipher $out2,$out2,v30
- vxor $in1,$in1,v31
- vncipher $out3,$out3,v30
- vxor $in2,$in2,v31
- vncipher $out4,$out4,v30
- vxor $in3,$in3,v31
- vncipher $out5,$out5,v30
- vxor $in4,$in4,v31
- vncipher $out6,$out6,v30
- vxor $in5,$in5,v31
- vncipher $out7,$out7,v30
- vxor $in6,$in6,v31
-
- cmplwi $len,32 # switch($len)
- blt Lcbc_dec8x_one
- nop
- beq Lcbc_dec8x_two
- cmplwi $len,64
- blt Lcbc_dec8x_three
- nop
- beq Lcbc_dec8x_four
- cmplwi $len,96
- blt Lcbc_dec8x_five
- nop
- beq Lcbc_dec8x_six
-
-Lcbc_dec8x_seven:
- vncipherlast $out1,$out1,$ivec
- vncipherlast $out2,$out2,$in1
- vncipherlast $out3,$out3,$in2
- vncipherlast $out4,$out4,$in3
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out1,$out1,$out1,$inpperm
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x00,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x10,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x20,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x30,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x40,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x50,$out
- stvx_u $out7,$x60,$out
- addi $out,$out,0x70
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_six:
- vncipherlast $out2,$out2,$ivec
- vncipherlast $out3,$out3,$in2
- vncipherlast $out4,$out4,$in3
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out2,$out2,$out2,$inpperm
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x00,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x10,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x20,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x30,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x40,$out
- stvx_u $out7,$x50,$out
- addi $out,$out,0x60
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_five:
- vncipherlast $out3,$out3,$ivec
- vncipherlast $out4,$out4,$in3
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out3,$out3,$out3,$inpperm
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x00,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x10,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x20,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x30,$out
- stvx_u $out7,$x40,$out
- addi $out,$out,0x50
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_four:
- vncipherlast $out4,$out4,$ivec
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out4,$out4,$out4,$inpperm
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x00,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x10,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x20,$out
- stvx_u $out7,$x30,$out
- addi $out,$out,0x40
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_three:
- vncipherlast $out5,$out5,$ivec
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out5,$out5,$out5,$inpperm
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x00,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x10,$out
- stvx_u $out7,$x20,$out
- addi $out,$out,0x30
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_two:
- vncipherlast $out6,$out6,$ivec
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out6,$out6,$out6,$inpperm
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x00,$out
- stvx_u $out7,$x10,$out
- addi $out,$out,0x20
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_one:
- vncipherlast $out7,$out7,$ivec
- vmr $ivec,$in7
-
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out7,0,$out
- addi $out,$out,0x10
-
-Lcbc_dec8x_done:
- le?vperm $ivec,$ivec,$ivec,$inpperm
- stvx_u $ivec,0,$ivp # write [unaligned] iv
-
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $inpperm,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x04,0,0x80,6,6,0
- .long 0
-.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
-___
-}} }}}
-
-#########################################################################
-{{{ # CTR procedure[s] #
-my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
- map("v$_",(4..11));
-my $dat=$tmp;
-
-$code.=<<___;
-.globl .${prefix}_ctr32_encrypt_blocks
-.align 5
-.${prefix}_ctr32_encrypt_blocks:
- ${UCMP}i $len,1
- bltlr-
-
- lis r0,0xfff0
- mfspr $vrsave,256
- mtspr 256,r0
-
- li $idx,15
- vxor $rndkey0,$rndkey0,$rndkey0
- le?vspltisb $tmp,0x0f
-
- lvx $ivec,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- vspltisb $one,1
- le?vxor $inpperm,$inpperm,$tmp
- vperm $ivec,$ivec,$inptail,$inpperm
- vsldoi $one,$rndkey0,$one,1
-
- neg r11,$inp
- ?lvsl $keyperm,0,$key # prepare for unaligned key
- lwz $rounds,240($key)
-
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inptail,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- srwi $rounds,$rounds,1
- li $idx,16
- subi $rounds,$rounds,1
-
- ${UCMP}i $len,8
- bge _aesp8_ctr32_encrypt8x
-
- ?lvsr $outperm,0,$out # prepare for unaligned store
- vspltisb $outmask,-1
- lvx $outhead,0,$out
- ?vperm $outmask,$rndkey0,$outmask,$outperm
- le?vxor $outperm,$outperm,$tmp
-
- lvx $rndkey0,0,$key
- mtctr $rounds
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$ivec,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- b Loop_ctr32_enc
-
-.align 5
-Loop_ctr32_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_ctr32_enc
-
- vadduwm $ivec,$ivec,$one
- vmr $dat,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- subic. $len,$len,1 # blocks--
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- vperm $dat,$dat,$inptail,$inpperm
- li $idx,16
- ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
- lvx $rndkey0,0,$key
- vxor $dat,$dat,$rndkey1 # last round key
- vcipherlast $inout,$inout,$dat
-
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- vperm $inout,$inout,$inout,$outperm
- vsel $dat,$outhead,$inout,$outmask
- mtctr $rounds
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vmr $outhead,$inout
- vxor $inout,$ivec,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- stvx $dat,0,$out
- addi $out,$out,16
- bne Loop_ctr32_enc
-
- addi $out,$out,-1
- lvx $inout,0,$out # redundant in aligned case
- vsel $inout,$outhead,$inout,$outmask
- stvx $inout,0,$out
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,6,0
- .long 0
-___
-#########################################################################
-{{ # Optimized CTR procedure #
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
- $x00=0 if ($flavour =~ /osx/);
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
-my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
- # v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
-my ($two,$three,$four)=($outhead,$outperm,$outmask);
-
-$code.=<<___;
-.align 5
-_aesp8_ctr32_encrypt8x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- li r10,`$FRAME+8*16+15`
- li r11,`$FRAME+8*16+31`
- stvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- stvx v21,r11,$sp
- addi r11,r11,32
- stvx v22,r10,$sp
- addi r10,r10,32
- stvx v23,r11,$sp
- addi r11,r11,32
- stvx v24,r10,$sp
- addi r10,r10,32
- stvx v25,r11,$sp
- addi r11,r11,32
- stvx v26,r10,$sp
- addi r10,r10,32
- stvx v27,r11,$sp
- addi r11,r11,32
- stvx v28,r10,$sp
- addi r10,r10,32
- stvx v29,r11,$sp
- addi r11,r11,32
- stvx v30,r10,$sp
- stvx v31,r11,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- subi $rounds,$rounds,3 # -4 in total
-
- lvx $rndkey0,$x00,$key # load key schedule
- lvx v30,$x10,$key
- addi $key,$key,0x20
- lvx v31,$x00,$key
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,`$FRAME+15`
- mtctr $rounds
-
-Load_ctr32_enc_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key
- addi $key,$key,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_ctr32_enc_key
-
- lvx v26,$x10,$key
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key
- ?vperm v29,v29,v30,$keyperm
- lvx $out0,$x70,$key # borrow $out0
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$out0,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- vadduwm $two,$one,$one
- subi $inp,$inp,15 # undo "caller"
- $SHL $len,$len,4
-
- vadduwm $out1,$ivec,$one # counter values ...
- vadduwm $out2,$ivec,$two
- vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
- le?li $idx,8
- vadduwm $out3,$out1,$two
- vxor $out1,$out1,$rndkey0
- le?lvsl $inpperm,0,$idx
- vadduwm $out4,$out2,$two
- vxor $out2,$out2,$rndkey0
- le?vspltisb $tmp,0x0f
- vadduwm $out5,$out3,$two
- vxor $out3,$out3,$rndkey0
- le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
- vadduwm $out6,$out4,$two
- vxor $out4,$out4,$rndkey0
- vadduwm $out7,$out5,$two
- vxor $out5,$out5,$rndkey0
- vadduwm $ivec,$out6,$two # next counter value
- vxor $out6,$out6,$rndkey0
- vxor $out7,$out7,$rndkey0
-
- mtctr $rounds
- b Loop_ctr32_enc8x
-.align 5
-Loop_ctr32_enc8x:
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
- vcipher $out6,$out6,v24
- vcipher $out7,$out7,v24
-Loop_ctr32_enc8x_middle:
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
- vcipher $out6,$out6,v25
- vcipher $out7,$out7,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_ctr32_enc8x
-
- subic r11,$len,256 # $len-256, borrow $key_
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
- vcipher $out6,$out6,v24
- vcipher $out7,$out7,v24
-
- subfe r0,r0,r0 # borrow?-1:0
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
- vcipher $out6,$out6,v25
- vcipher $out7,$out7,v25
-
- and r0,r0,r11
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- vcipher $out0,$out0,v26
- vcipher $out1,$out1,v26
- vcipher $out2,$out2,v26
- vcipher $out3,$out3,v26
- vcipher $out4,$out4,v26
- vcipher $out5,$out5,v26
- vcipher $out6,$out6,v26
- vcipher $out7,$out7,v26
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- subic $len,$len,129 # $len-=129
- vcipher $out0,$out0,v27
- addi $len,$len,1 # $len-=128 really
- vcipher $out1,$out1,v27
- vcipher $out2,$out2,v27
- vcipher $out3,$out3,v27
- vcipher $out4,$out4,v27
- vcipher $out5,$out5,v27
- vcipher $out6,$out6,v27
- vcipher $out7,$out7,v27
- lvx v25,$x10,$key_ # re-pre-load round[2]
-
- vcipher $out0,$out0,v28
- lvx_u $in0,$x00,$inp # load input
- vcipher $out1,$out1,v28
- lvx_u $in1,$x10,$inp
- vcipher $out2,$out2,v28
- lvx_u $in2,$x20,$inp
- vcipher $out3,$out3,v28
- lvx_u $in3,$x30,$inp
- vcipher $out4,$out4,v28
- lvx_u $in4,$x40,$inp
- vcipher $out5,$out5,v28
- lvx_u $in5,$x50,$inp
- vcipher $out6,$out6,v28
- lvx_u $in6,$x60,$inp
- vcipher $out7,$out7,v28
- lvx_u $in7,$x70,$inp
- addi $inp,$inp,0x80
-
- vcipher $out0,$out0,v29
- le?vperm $in0,$in0,$in0,$inpperm
- vcipher $out1,$out1,v29
- le?vperm $in1,$in1,$in1,$inpperm
- vcipher $out2,$out2,v29
- le?vperm $in2,$in2,$in2,$inpperm
- vcipher $out3,$out3,v29
- le?vperm $in3,$in3,$in3,$inpperm
- vcipher $out4,$out4,v29
- le?vperm $in4,$in4,$in4,$inpperm
- vcipher $out5,$out5,v29
- le?vperm $in5,$in5,$in5,$inpperm
- vcipher $out6,$out6,v29
- le?vperm $in6,$in6,$in6,$inpperm
- vcipher $out7,$out7,v29
- le?vperm $in7,$in7,$in7,$inpperm
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in7 are loaded
- # with last "words"
- subfe. r0,r0,r0 # borrow?-1:0
- vcipher $out0,$out0,v30
- vxor $in0,$in0,v31 # xor with last round key
- vcipher $out1,$out1,v30
- vxor $in1,$in1,v31
- vcipher $out2,$out2,v30
- vxor $in2,$in2,v31
- vcipher $out3,$out3,v30
- vxor $in3,$in3,v31
- vcipher $out4,$out4,v30
- vxor $in4,$in4,v31
- vcipher $out5,$out5,v30
- vxor $in5,$in5,v31
- vcipher $out6,$out6,v30
- vxor $in6,$in6,v31
- vcipher $out7,$out7,v30
- vxor $in7,$in7,v31
-
- bne Lctr32_enc8x_break # did $len-129 borrow?
-
- vcipherlast $in0,$out0,$in0
- vcipherlast $in1,$out1,$in1
- vadduwm $out1,$ivec,$one # counter values ...
- vcipherlast $in2,$out2,$in2
- vadduwm $out2,$ivec,$two
- vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
- vcipherlast $in3,$out3,$in3
- vadduwm $out3,$out1,$two
- vxor $out1,$out1,$rndkey0
- vcipherlast $in4,$out4,$in4
- vadduwm $out4,$out2,$two
- vxor $out2,$out2,$rndkey0
- vcipherlast $in5,$out5,$in5
- vadduwm $out5,$out3,$two
- vxor $out3,$out3,$rndkey0
- vcipherlast $in6,$out6,$in6
- vadduwm $out6,$out4,$two
- vxor $out4,$out4,$rndkey0
- vcipherlast $in7,$out7,$in7
- vadduwm $out7,$out5,$two
- vxor $out5,$out5,$rndkey0
- le?vperm $in0,$in0,$in0,$inpperm
- vadduwm $ivec,$out6,$two # next counter value
- vxor $out6,$out6,$rndkey0
- le?vperm $in1,$in1,$in1,$inpperm
- vxor $out7,$out7,$rndkey0
- mtctr $rounds
-
- vcipher $out0,$out0,v24
- stvx_u $in0,$x00,$out
- le?vperm $in2,$in2,$in2,$inpperm
- vcipher $out1,$out1,v24
- stvx_u $in1,$x10,$out
- le?vperm $in3,$in3,$in3,$inpperm
- vcipher $out2,$out2,v24
- stvx_u $in2,$x20,$out
- le?vperm $in4,$in4,$in4,$inpperm
- vcipher $out3,$out3,v24
- stvx_u $in3,$x30,$out
- le?vperm $in5,$in5,$in5,$inpperm
- vcipher $out4,$out4,v24
- stvx_u $in4,$x40,$out
- le?vperm $in6,$in6,$in6,$inpperm
- vcipher $out5,$out5,v24
- stvx_u $in5,$x50,$out
- le?vperm $in7,$in7,$in7,$inpperm
- vcipher $out6,$out6,v24
- stvx_u $in6,$x60,$out
- vcipher $out7,$out7,v24
- stvx_u $in7,$x70,$out
- addi $out,$out,0x80
-
- b Loop_ctr32_enc8x_middle
-
-.align 5
-Lctr32_enc8x_break:
- cmpwi $len,-0x60
- blt Lctr32_enc8x_one
- nop
- beq Lctr32_enc8x_two
- cmpwi $len,-0x40
- blt Lctr32_enc8x_three
- nop
- beq Lctr32_enc8x_four
- cmpwi $len,-0x20
- blt Lctr32_enc8x_five
- nop
- beq Lctr32_enc8x_six
- cmpwi $len,0x00
- blt Lctr32_enc8x_seven
-
-Lctr32_enc8x_eight:
- vcipherlast $out0,$out0,$in0
- vcipherlast $out1,$out1,$in1
- vcipherlast $out2,$out2,$in2
- vcipherlast $out3,$out3,$in3
- vcipherlast $out4,$out4,$in4
- vcipherlast $out5,$out5,$in5
- vcipherlast $out6,$out6,$in6
- vcipherlast $out7,$out7,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x50,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x60,$out
- stvx_u $out7,$x70,$out
- addi $out,$out,0x80
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_seven:
- vcipherlast $out0,$out0,$in1
- vcipherlast $out1,$out1,$in2
- vcipherlast $out2,$out2,$in3
- vcipherlast $out3,$out3,$in4
- vcipherlast $out4,$out4,$in5
- vcipherlast $out5,$out5,$in6
- vcipherlast $out6,$out6,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x50,$out
- stvx_u $out6,$x60,$out
- addi $out,$out,0x70
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_six:
- vcipherlast $out0,$out0,$in2
- vcipherlast $out1,$out1,$in3
- vcipherlast $out2,$out2,$in4
- vcipherlast $out3,$out3,$in5
- vcipherlast $out4,$out4,$in6
- vcipherlast $out5,$out5,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- stvx_u $out5,$x50,$out
- addi $out,$out,0x60
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_five:
- vcipherlast $out0,$out0,$in3
- vcipherlast $out1,$out1,$in4
- vcipherlast $out2,$out2,$in5
- vcipherlast $out3,$out3,$in6
- vcipherlast $out4,$out4,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- stvx_u $out4,$x40,$out
- addi $out,$out,0x50
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_four:
- vcipherlast $out0,$out0,$in4
- vcipherlast $out1,$out1,$in5
- vcipherlast $out2,$out2,$in6
- vcipherlast $out3,$out3,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- stvx_u $out3,$x30,$out
- addi $out,$out,0x40
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_three:
- vcipherlast $out0,$out0,$in5
- vcipherlast $out1,$out1,$in6
- vcipherlast $out2,$out2,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- stvx_u $out2,$x20,$out
- addi $out,$out,0x30
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_two:
- vcipherlast $out0,$out0,$in6
- vcipherlast $out1,$out1,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- stvx_u $out1,$x10,$out
- addi $out,$out,0x20
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_one:
- vcipherlast $out0,$out0,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- stvx_u $out0,0,$out
- addi $out,$out,0x10
-
-Lctr32_enc8x_done:
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $inpperm,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x04,0,0x80,6,6,0
- .long 0
-.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
-___
-}} }}}
-
-#########################################################################
-{{{ # XTS procedures #
-# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
-# const AES_KEY *key1, const AES_KEY *key2, #
-# [const] unsigned char iv[16]); #
-# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
-# input tweak value is assumed to be encrypted already, and last tweak #
-# value, one suitable for consecutive call on same chunk of data, is #
-# written back to original buffer. In addition, in "tweak chaining" #
-# mode only complete input blocks are processed. #
-
-my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
-my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
-my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
-my $taillen = $key2;
-
- ($inp,$idx) = ($idx,$inp); # reassign
-
-$code.=<<___;
-.globl .${prefix}_xts_encrypt
-.align 5
-.${prefix}_xts_encrypt:
- mr $inp,r3 # reassign
- li r3,-1
- ${UCMP}i $len,16
- bltlr-
-
- lis r0,0xfff0
- mfspr r12,256 # save vrsave
- li r11,0
- mtspr 256,r0
-
- vspltisb $seven,0x07 # 0x070707..07
- le?lvsl $leperm,r11,r11
- le?vspltisb $tmp,0x0f
- le?vxor $leperm,$leperm,$seven
-
- li $idx,15
- lvx $tweak,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- le?vxor $inpperm,$inpperm,$tmp
- vperm $tweak,$tweak,$inptail,$inpperm
-
- neg r11,$inp
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inout,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- ${UCMP}i $key2,0 # key2==NULL?
- beq Lxts_enc_no_key2
-
- ?lvsl $keyperm,0,$key2 # prepare for unaligned key
- lwz $rounds,240($key2)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- lvx $rndkey0,0,$key2
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- mtctr $rounds
-
-Ltweak_xts_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- bdnz Ltweak_xts_enc
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipherlast $tweak,$tweak,$rndkey0
-
- li $ivp,0 # don't chain the tweak
- b Lxts_enc
-
-Lxts_enc_no_key2:
- li $idx,-16
- and $len,$len,$idx # in "tweak chaining"
- # mode only complete
- # blocks are processed
-Lxts_enc:
- lvx $inptail,0,$inp
- addi $inp,$inp,16
-
- ?lvsl $keyperm,0,$key1 # prepare for unaligned key
- lwz $rounds,240($key1)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- vslb $eighty7,$seven,$seven # 0x808080..80
- vor $eighty7,$eighty7,$seven # 0x878787..87
- vspltisb $tmp,1 # 0x010101..01
- vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
-
- ${UCMP}i $len,96
- bge _aesp8_xts_encrypt6x
-
- andi. $taillen,$len,15
- subic r0,$len,32
- subi $taillen,$taillen,16
- subfe r0,r0,r0
- and r0,r0,$taillen
- add $inp,$inp,r0
-
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- mtctr $rounds
- b Loop_xts_enc
-
-.align 5
-Loop_xts_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- bdnz Loop_xts_enc
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $rndkey0,$rndkey0,$tweak
- vcipherlast $output,$inout,$rndkey0
-
- le?vperm $tmp,$output,$output,$leperm
- be?nop
- le?stvx_u $tmp,0,$out
- be?stvx_u $output,0,$out
- addi $out,$out,16
-
- subic. $len,$len,16
- beq Lxts_enc_done
-
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
-
- subic r0,$len,32
- subfe r0,r0,r0
- and r0,r0,$taillen
- add $inp,$inp,r0
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $output,$output,$rndkey0 # just in case $len<16
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
-
- mtctr $rounds
- ${UCMP}i $len,16
- bge Loop_xts_enc
-
- vxor $output,$output,$tweak
- lvsr $inpperm,0,$len # $inpperm is no longer needed
- vxor $inptail,$inptail,$inptail # $inptail is no longer needed
- vspltisb $tmp,-1
- vperm $inptail,$inptail,$tmp,$inpperm
- vsel $inout,$inout,$output,$inptail
-
- subi r11,$out,17
- subi $out,$out,16
- mtctr $len
- li $len,16
-Loop_xts_enc_steal:
- lbzu r0,1(r11)
- stb r0,16(r11)
- bdnz Loop_xts_enc_steal
-
- mtctr $rounds
- b Loop_xts_enc # one more time...
-
-Lxts_enc_done:
- ${UCMP}i $ivp,0
- beq Lxts_enc_ret
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_enc_ret:
- mtspr 256,r12 # restore vrsave
- li r3,0
- blr
- .long 0
- .byte 0,12,0x04,0,0x80,6,6,0
- .long 0
-.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
-
-.globl .${prefix}_xts_decrypt
-.align 5
-.${prefix}_xts_decrypt:
- mr $inp,r3 # reassign
- li r3,-1
- ${UCMP}i $len,16
- bltlr-
-
- lis r0,0xfff8
- mfspr r12,256 # save vrsave
- li r11,0
- mtspr 256,r0
-
- andi. r0,$len,15
- neg r0,r0
- andi. r0,r0,16
- sub $len,$len,r0
-
- vspltisb $seven,0x07 # 0x070707..07
- le?lvsl $leperm,r11,r11
- le?vspltisb $tmp,0x0f
- le?vxor $leperm,$leperm,$seven
-
- li $idx,15
- lvx $tweak,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- le?vxor $inpperm,$inpperm,$tmp
- vperm $tweak,$tweak,$inptail,$inpperm
-
- neg r11,$inp
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inout,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- ${UCMP}i $key2,0 # key2==NULL?
- beq Lxts_dec_no_key2
-
- ?lvsl $keyperm,0,$key2 # prepare for unaligned key
- lwz $rounds,240($key2)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- lvx $rndkey0,0,$key2
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- mtctr $rounds
-
-Ltweak_xts_dec:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- bdnz Ltweak_xts_dec
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipherlast $tweak,$tweak,$rndkey0
-
- li $ivp,0 # don't chain the tweak
- b Lxts_dec
-
-Lxts_dec_no_key2:
- neg $idx,$len
- andi. $idx,$idx,15
- add $len,$len,$idx # in "tweak chaining"
- # mode only complete
- # blocks are processed
-Lxts_dec:
- lvx $inptail,0,$inp
- addi $inp,$inp,16
-
- ?lvsl $keyperm,0,$key1 # prepare for unaligned key
- lwz $rounds,240($key1)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- vslb $eighty7,$seven,$seven # 0x808080..80
- vor $eighty7,$eighty7,$seven # 0x878787..87
- vspltisb $tmp,1 # 0x010101..01
- vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
-
- ${UCMP}i $len,96
- bge _aesp8_xts_decrypt6x
-
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- mtctr $rounds
-
- ${UCMP}i $len,16
- blt Ltail_xts_dec
- be?b Loop_xts_dec
-
-.align 5
-Loop_xts_dec:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- bdnz Loop_xts_dec
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $rndkey0,$rndkey0,$tweak
- vncipherlast $output,$inout,$rndkey0
-
- le?vperm $tmp,$output,$output,$leperm
- be?nop
- le?stvx_u $tmp,0,$out
- be?stvx_u $output,0,$out
- addi $out,$out,16
-
- subic. $len,$len,16
- beq Lxts_dec_done
-
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
-
- mtctr $rounds
- ${UCMP}i $len,16
- bge Loop_xts_dec
-
-Ltail_xts_dec:
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak1,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak1,$tweak1,$tmp
-
- subi $inp,$inp,16
- add $inp,$inp,$len
-
- vxor $inout,$inout,$tweak # :-(
- vxor $inout,$inout,$tweak1 # :-)
-
-Loop_xts_dec_short:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- bdnz Loop_xts_dec_short
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $rndkey0,$rndkey0,$tweak1
- vncipherlast $output,$inout,$rndkey0
-
- le?vperm $tmp,$output,$output,$leperm
- be?nop
- le?stvx_u $tmp,0,$out
- be?stvx_u $output,0,$out
-
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- #addi $inp,$inp,16
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
-
- lvsr $inpperm,0,$len # $inpperm is no longer needed
- vxor $inptail,$inptail,$inptail # $inptail is no longer needed
- vspltisb $tmp,-1
- vperm $inptail,$inptail,$tmp,$inpperm
- vsel $inout,$inout,$output,$inptail
-
- vxor $rndkey0,$rndkey0,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
-
- subi r11,$out,1
- mtctr $len
- li $len,16
-Loop_xts_dec_steal:
- lbzu r0,1(r11)
- stb r0,16(r11)
- bdnz Loop_xts_dec_steal
-
- mtctr $rounds
- b Loop_xts_dec # one more time...
-
-Lxts_dec_done:
- ${UCMP}i $ivp,0
- beq Lxts_dec_ret
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_dec_ret:
- mtspr 256,r12 # restore vrsave
- li r3,0
- blr
- .long 0
- .byte 0,12,0x04,0,0x80,6,6,0
- .long 0
-.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
-___
-#########################################################################
-{{ # Optimized XTS procedures #
-my $key_=$key2;
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
- $x00=0 if ($flavour =~ /osx/);
-my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
-my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
-my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
-my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
- # v26-v31 last 6 round keys
-my ($keyperm)=($out0); # aliases with "caller", redundant assignment
-my $taillen=$x70;
-
-$code.=<<___;
-.align 5
-_aesp8_xts_encrypt6x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- mflr r11
- li r7,`$FRAME+8*16+15`
- li r3,`$FRAME+8*16+31`
- $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
- stvx v20,r7,$sp # ABI says so
- addi r7,r7,32
- stvx v21,r3,$sp
- addi r3,r3,32
- stvx v22,r7,$sp
- addi r7,r7,32
- stvx v23,r3,$sp
- addi r3,r3,32
- stvx v24,r7,$sp
- addi r7,r7,32
- stvx v25,r3,$sp
- addi r3,r3,32
- stvx v26,r7,$sp
- addi r7,r7,32
- stvx v27,r3,$sp
- addi r3,r3,32
- stvx v28,r7,$sp
- addi r7,r7,32
- stvx v29,r3,$sp
- addi r3,r3,32
- stvx v30,r7,$sp
- stvx v31,r3,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- subi $rounds,$rounds,3 # -4 in total
-
- lvx $rndkey0,$x00,$key1 # load key schedule
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- lvx v31,$x00,$key1
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,`$FRAME+15`
- mtctr $rounds
-
-Load_xts_enc_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key1
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_xts_enc_key
-
- lvx v26,$x10,$key1
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key1
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key1
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key1
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key1
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key1
- ?vperm v29,v29,v30,$keyperm
- lvx $twk5,$x70,$key1 # borrow $twk5
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$twk5,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- vperm $in0,$inout,$inptail,$inpperm
- subi $inp,$inp,31 # undo "caller"
- vxor $twk0,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $out0,$in0,$twk0
- vxor $tweak,$tweak,$tmp
-
- lvx_u $in1,$x10,$inp
- vxor $twk1,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- le?vperm $in1,$in1,$in1,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out1,$in1,$twk1
- vxor $tweak,$tweak,$tmp
-
- lvx_u $in2,$x20,$inp
- andi. $taillen,$len,15
- vxor $twk2,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- le?vperm $in2,$in2,$in2,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out2,$in2,$twk2
- vxor $tweak,$tweak,$tmp
-
- lvx_u $in3,$x30,$inp
- sub $len,$len,$taillen
- vxor $twk3,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- le?vperm $in3,$in3,$in3,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out3,$in3,$twk3
- vxor $tweak,$tweak,$tmp
-
- lvx_u $in4,$x40,$inp
- subi $len,$len,0x60
- vxor $twk4,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- le?vperm $in4,$in4,$in4,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out4,$in4,$twk4
- vxor $tweak,$tweak,$tmp
-
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- vxor $twk5,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- le?vperm $in5,$in5,$in5,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out5,$in5,$twk5
- vxor $tweak,$tweak,$tmp
-
- vxor v31,v31,$rndkey0
- mtctr $rounds
- b Loop_xts_enc6x
-
-.align 5
-Loop_xts_enc6x:
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_enc6x
-
- subic $len,$len,96 # $len-=96
- vxor $in0,$twk0,v31 # xor with last round key
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk0,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vsldoi $tmp,$tmp,$tmp,15
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
-
- subfe. r0,r0,r0 # borrow?-1:0
- vand $tmp,$tmp,$eighty7
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vxor $tweak,$tweak,$tmp
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vxor $in1,$twk1,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk1,$tweak,$rndkey0
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
-
- and r0,r0,$len
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vcipher $out0,$out0,v26
- vcipher $out1,$out1,v26
- vand $tmp,$tmp,$eighty7
- vcipher $out2,$out2,v26
- vcipher $out3,$out3,v26
- vxor $tweak,$tweak,$tmp
- vcipher $out4,$out4,v26
- vcipher $out5,$out5,v26
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in5 are loaded
- # with last "words"
- vxor $in2,$twk2,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk2,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vcipher $out0,$out0,v27
- vcipher $out1,$out1,v27
- vsldoi $tmp,$tmp,$tmp,15
- vcipher $out2,$out2,v27
- vcipher $out3,$out3,v27
- vand $tmp,$tmp,$eighty7
- vcipher $out4,$out4,v27
- vcipher $out5,$out5,v27
-
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- vxor $tweak,$tweak,$tmp
- vcipher $out0,$out0,v28
- vcipher $out1,$out1,v28
- vxor $in3,$twk3,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk3,$tweak,$rndkey0
- vcipher $out2,$out2,v28
- vcipher $out3,$out3,v28
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vcipher $out4,$out4,v28
- vcipher $out5,$out5,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vand $tmp,$tmp,$eighty7
-
- vcipher $out0,$out0,v29
- vcipher $out1,$out1,v29
- vxor $tweak,$tweak,$tmp
- vcipher $out2,$out2,v29
- vcipher $out3,$out3,v29
- vxor $in4,$twk4,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk4,$tweak,$rndkey0
- vcipher $out4,$out4,v29
- vcipher $out5,$out5,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
-
- vcipher $out0,$out0,v30
- vcipher $out1,$out1,v30
- vand $tmp,$tmp,$eighty7
- vcipher $out2,$out2,v30
- vcipher $out3,$out3,v30
- vxor $tweak,$tweak,$tmp
- vcipher $out4,$out4,v30
- vcipher $out5,$out5,v30
- vxor $in5,$twk5,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk5,$tweak,$rndkey0
-
- vcipherlast $out0,$out0,$in0
- lvx_u $in0,$x00,$inp # load next input block
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vcipherlast $out1,$out1,$in1
- lvx_u $in1,$x10,$inp
- vcipherlast $out2,$out2,$in2
- le?vperm $in0,$in0,$in0,$leperm
- lvx_u $in2,$x20,$inp
- vand $tmp,$tmp,$eighty7
- vcipherlast $out3,$out3,$in3
- le?vperm $in1,$in1,$in1,$leperm
- lvx_u $in3,$x30,$inp
- vcipherlast $out4,$out4,$in4
- le?vperm $in2,$in2,$in2,$leperm
- lvx_u $in4,$x40,$inp
- vxor $tweak,$tweak,$tmp
- vcipherlast $tmp,$out5,$in5 # last block might be needed
- # in stealing mode
- le?vperm $in3,$in3,$in3,$leperm
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- le?vperm $in4,$in4,$in4,$leperm
- le?vperm $in5,$in5,$in5,$leperm
-
- le?vperm $out0,$out0,$out0,$leperm
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk0
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- vxor $out1,$in1,$twk1
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- vxor $out2,$in2,$twk2
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- vxor $out3,$in3,$twk3
- le?vperm $out5,$tmp,$tmp,$leperm
- stvx_u $out4,$x40,$out
- vxor $out4,$in4,$twk4
- le?stvx_u $out5,$x50,$out
- be?stvx_u $tmp, $x50,$out
- vxor $out5,$in5,$twk5
- addi $out,$out,0x60
-
- mtctr $rounds
- beq Loop_xts_enc6x # did $len-=96 borrow?
-
- addic. $len,$len,0x60
- beq Lxts_enc6x_zero
- cmpwi $len,0x20
- blt Lxts_enc6x_one
- nop
- beq Lxts_enc6x_two
- cmpwi $len,0x40
- blt Lxts_enc6x_three
- nop
- beq Lxts_enc6x_four
-
-Lxts_enc6x_five:
- vxor $out0,$in1,$twk0
- vxor $out1,$in2,$twk1
- vxor $out2,$in3,$twk2
- vxor $out3,$in4,$twk3
- vxor $out4,$in5,$twk4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk5 # unused tweak
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- vxor $tmp,$out4,$twk5 # last block prep for stealing
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- stvx_u $out4,$x40,$out
- addi $out,$out,0x50
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_four:
- vxor $out0,$in2,$twk0
- vxor $out1,$in3,$twk1
- vxor $out2,$in4,$twk2
- vxor $out3,$in5,$twk3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk4 # unused tweak
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- vxor $tmp,$out3,$twk4 # last block prep for stealing
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- stvx_u $out3,$x30,$out
- addi $out,$out,0x40
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_three:
- vxor $out0,$in3,$twk0
- vxor $out1,$in4,$twk1
- vxor $out2,$in5,$twk2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk3 # unused tweak
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $tmp,$out2,$twk3 # last block prep for stealing
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- stvx_u $out2,$x20,$out
- addi $out,$out,0x30
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_two:
- vxor $out0,$in4,$twk0
- vxor $out1,$in5,$twk1
- vxor $out2,$out2,$out2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk2 # unused tweak
- vxor $tmp,$out1,$twk2 # last block prep for stealing
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- stvx_u $out1,$x10,$out
- addi $out,$out,0x20
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_one:
- vxor $out0,$in5,$twk0
- nop
-Loop_xts_enc1x:
- vcipher $out0,$out0,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_enc1x
-
- add $inp,$inp,$taillen
- cmpwi $taillen,0
- vcipher $out0,$out0,v24
-
- subi $inp,$inp,16
- vcipher $out0,$out0,v25
-
- lvsr $inpperm,0,$taillen
- vcipher $out0,$out0,v26
-
- lvx_u $in0,0,$inp
- vcipher $out0,$out0,v27
-
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- vcipher $out0,$out0,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vcipher $out0,$out0,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $twk0,$twk0,v31
-
- le?vperm $in0,$in0,$in0,$leperm
- vcipher $out0,$out0,v30
-
- vperm $in0,$in0,$in0,$inpperm
- vcipherlast $out0,$out0,$twk0
-
- vmr $twk0,$twk1 # unused tweak
- vxor $tmp,$out0,$twk1 # last block prep for stealing
- le?vperm $out0,$out0,$out0,$leperm
- stvx_u $out0,$x00,$out # store output
- addi $out,$out,0x10
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_zero:
- cmpwi $taillen,0
- beq Lxts_enc6x_done
-
- add $inp,$inp,$taillen
- subi $inp,$inp,16
- lvx_u $in0,0,$inp
- lvsr $inpperm,0,$taillen # $in5 is no more
- le?vperm $in0,$in0,$in0,$leperm
- vperm $in0,$in0,$in0,$inpperm
- vxor $tmp,$tmp,$twk0
-Lxts_enc6x_steal:
- vxor $in0,$in0,$twk0
- vxor $out0,$out0,$out0
- vspltisb $out1,-1
- vperm $out0,$out0,$out1,$inpperm
- vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
-
- subi r30,$out,17
- subi $out,$out,16
- mtctr $taillen
-Loop_xts_enc6x_steal:
- lbzu r0,1(r30)
- stb r0,16(r30)
- bdnz Loop_xts_enc6x_steal
-
- li $taillen,0
- mtctr $rounds
- b Loop_xts_enc1x # one more time...
-
-.align 4
-Lxts_enc6x_done:
- ${UCMP}i $ivp,0
- beq Lxts_enc6x_ret
-
- vxor $tweak,$twk0,$rndkey0
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_enc6x_ret:
- mtlr r11
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $seven,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x04,1,0x80,6,6,0
- .long 0
-
-.align 5
-_aesp8_xts_enc5x:
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz _aesp8_xts_enc5x
-
- add $inp,$inp,$taillen
- cmpwi $taillen,0
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
-
- subi $inp,$inp,16
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vxor $twk0,$twk0,v31
-
- vcipher $out0,$out0,v26
- lvsr $inpperm,0,$taillen # $in5 is no more
- vcipher $out1,$out1,v26
- vcipher $out2,$out2,v26
- vcipher $out3,$out3,v26
- vcipher $out4,$out4,v26
- vxor $in1,$twk1,v31
-
- vcipher $out0,$out0,v27
- lvx_u $in0,0,$inp
- vcipher $out1,$out1,v27
- vcipher $out2,$out2,v27
- vcipher $out3,$out3,v27
- vcipher $out4,$out4,v27
- vxor $in2,$twk2,v31
-
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- vcipher $out0,$out0,v28
- vcipher $out1,$out1,v28
- vcipher $out2,$out2,v28
- vcipher $out3,$out3,v28
- vcipher $out4,$out4,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vxor $in3,$twk3,v31
-
- vcipher $out0,$out0,v29
- le?vperm $in0,$in0,$in0,$leperm
- vcipher $out1,$out1,v29
- vcipher $out2,$out2,v29
- vcipher $out3,$out3,v29
- vcipher $out4,$out4,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $in4,$twk4,v31
-
- vcipher $out0,$out0,v30
- vperm $in0,$in0,$in0,$inpperm
- vcipher $out1,$out1,v30
- vcipher $out2,$out2,v30
- vcipher $out3,$out3,v30
- vcipher $out4,$out4,v30
-
- vcipherlast $out0,$out0,$twk0
- vcipherlast $out1,$out1,$in1
- vcipherlast $out2,$out2,$in2
- vcipherlast $out3,$out3,$in3
- vcipherlast $out4,$out4,$in4
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
-
-.align 5
-_aesp8_xts_decrypt6x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- mflr r11
- li r7,`$FRAME+8*16+15`
- li r3,`$FRAME+8*16+31`
- $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
- stvx v20,r7,$sp # ABI says so
- addi r7,r7,32
- stvx v21,r3,$sp
- addi r3,r3,32
- stvx v22,r7,$sp
- addi r7,r7,32
- stvx v23,r3,$sp
- addi r3,r3,32
- stvx v24,r7,$sp
- addi r7,r7,32
- stvx v25,r3,$sp
- addi r3,r3,32
- stvx v26,r7,$sp
- addi r7,r7,32
- stvx v27,r3,$sp
- addi r3,r3,32
- stvx v28,r7,$sp
- addi r7,r7,32
- stvx v29,r3,$sp
- addi r3,r3,32
- stvx v30,r7,$sp
- stvx v31,r3,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- subi $rounds,$rounds,3 # -4 in total
-
- lvx $rndkey0,$x00,$key1 # load key schedule
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- lvx v31,$x00,$key1
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,`$FRAME+15`
- mtctr $rounds
-
-Load_xts_dec_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key1
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_xts_dec_key
-
- lvx v26,$x10,$key1
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key1
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key1
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key1
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key1
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key1
- ?vperm v29,v29,v30,$keyperm
- lvx $twk5,$x70,$key1 # borrow $twk5
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$twk5,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- vperm $in0,$inout,$inptail,$inpperm
- subi $inp,$inp,31 # undo "caller"
- vxor $twk0,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $out0,$in0,$twk0
- vxor $tweak,$tweak,$tmp
-
- lvx_u $in1,$x10,$inp
- vxor $twk1,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- le?vperm $in1,$in1,$in1,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out1,$in1,$twk1
- vxor $tweak,$tweak,$tmp
-
- lvx_u $in2,$x20,$inp
- andi. $taillen,$len,15
- vxor $twk2,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- le?vperm $in2,$in2,$in2,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out2,$in2,$twk2
- vxor $tweak,$tweak,$tmp
-
- lvx_u $in3,$x30,$inp
- sub $len,$len,$taillen
- vxor $twk3,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- le?vperm $in3,$in3,$in3,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out3,$in3,$twk3
- vxor $tweak,$tweak,$tmp
-
- lvx_u $in4,$x40,$inp
- subi $len,$len,0x60
- vxor $twk4,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- le?vperm $in4,$in4,$in4,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out4,$in4,$twk4
- vxor $tweak,$tweak,$tmp
-
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- vxor $twk5,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- le?vperm $in5,$in5,$in5,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out5,$in5,$twk5
- vxor $tweak,$tweak,$tmp
-
- vxor v31,v31,$rndkey0
- mtctr $rounds
- b Loop_xts_dec6x
-
-.align 5
-Loop_xts_dec6x:
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_dec6x
-
- subic $len,$len,96 # $len-=96
- vxor $in0,$twk0,v31 # xor with last round key
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk0,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vsldoi $tmp,$tmp,$tmp,15
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
-
- subfe. r0,r0,r0 # borrow?-1:0
- vand $tmp,$tmp,$eighty7
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vxor $tweak,$tweak,$tmp
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vxor $in1,$twk1,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk1,$tweak,$rndkey0
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
-
- and r0,r0,$len
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vncipher $out0,$out0,v26
- vncipher $out1,$out1,v26
- vand $tmp,$tmp,$eighty7
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- vxor $tweak,$tweak,$tmp
- vncipher $out4,$out4,v26
- vncipher $out5,$out5,v26
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in5 are loaded
- # with last "words"
- vxor $in2,$twk2,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk2,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vncipher $out0,$out0,v27
- vncipher $out1,$out1,v27
- vsldoi $tmp,$tmp,$tmp,15
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vand $tmp,$tmp,$eighty7
- vncipher $out4,$out4,v27
- vncipher $out5,$out5,v27
-
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- vxor $tweak,$tweak,$tmp
- vncipher $out0,$out0,v28
- vncipher $out1,$out1,v28
- vxor $in3,$twk3,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk3,$tweak,$rndkey0
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vncipher $out4,$out4,v28
- vncipher $out5,$out5,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vand $tmp,$tmp,$eighty7
-
- vncipher $out0,$out0,v29
- vncipher $out1,$out1,v29
- vxor $tweak,$tweak,$tmp
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vxor $in4,$twk4,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk4,$tweak,$rndkey0
- vncipher $out4,$out4,v29
- vncipher $out5,$out5,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
-
- vncipher $out0,$out0,v30
- vncipher $out1,$out1,v30
- vand $tmp,$tmp,$eighty7
- vncipher $out2,$out2,v30
- vncipher $out3,$out3,v30
- vxor $tweak,$tweak,$tmp
- vncipher $out4,$out4,v30
- vncipher $out5,$out5,v30
- vxor $in5,$twk5,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk5,$tweak,$rndkey0
-
- vncipherlast $out0,$out0,$in0
- lvx_u $in0,$x00,$inp # load next input block
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vncipherlast $out1,$out1,$in1
- lvx_u $in1,$x10,$inp
- vncipherlast $out2,$out2,$in2
- le?vperm $in0,$in0,$in0,$leperm
- lvx_u $in2,$x20,$inp
- vand $tmp,$tmp,$eighty7
- vncipherlast $out3,$out3,$in3
- le?vperm $in1,$in1,$in1,$leperm
- lvx_u $in3,$x30,$inp
- vncipherlast $out4,$out4,$in4
- le?vperm $in2,$in2,$in2,$leperm
- lvx_u $in4,$x40,$inp
- vxor $tweak,$tweak,$tmp
- vncipherlast $out5,$out5,$in5
- le?vperm $in3,$in3,$in3,$leperm
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- le?vperm $in4,$in4,$in4,$leperm
- le?vperm $in5,$in5,$in5,$leperm
-
- le?vperm $out0,$out0,$out0,$leperm
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk0
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- vxor $out1,$in1,$twk1
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- vxor $out2,$in2,$twk2
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- vxor $out3,$in3,$twk3
- le?vperm $out5,$out5,$out5,$leperm
- stvx_u $out4,$x40,$out
- vxor $out4,$in4,$twk4
- stvx_u $out5,$x50,$out
- vxor $out5,$in5,$twk5
- addi $out,$out,0x60
-
- mtctr $rounds
- beq Loop_xts_dec6x # did $len-=96 borrow?
-
- addic. $len,$len,0x60
- beq Lxts_dec6x_zero
- cmpwi $len,0x20
- blt Lxts_dec6x_one
- nop
- beq Lxts_dec6x_two
- cmpwi $len,0x40
- blt Lxts_dec6x_three
- nop
- beq Lxts_dec6x_four
-
-Lxts_dec6x_five:
- vxor $out0,$in1,$twk0
- vxor $out1,$in2,$twk1
- vxor $out2,$in3,$twk2
- vxor $out3,$in4,$twk3
- vxor $out4,$in5,$twk4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk5 # unused tweak
- vxor $twk1,$tweak,$rndkey0
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk1
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- stvx_u $out4,$x40,$out
- addi $out,$out,0x50
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_four:
- vxor $out0,$in2,$twk0
- vxor $out1,$in3,$twk1
- vxor $out2,$in4,$twk2
- vxor $out3,$in5,$twk3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk4 # unused tweak
- vmr $twk1,$twk5
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk5
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- stvx_u $out3,$x30,$out
- addi $out,$out,0x40
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_three:
- vxor $out0,$in3,$twk0
- vxor $out1,$in4,$twk1
- vxor $out2,$in5,$twk2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk3 # unused tweak
- vmr $twk1,$twk4
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk4
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- stvx_u $out2,$x20,$out
- addi $out,$out,0x30
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_two:
- vxor $out0,$in4,$twk0
- vxor $out1,$in5,$twk1
- vxor $out2,$out2,$out2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk2 # unused tweak
- vmr $twk1,$twk3
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk3
- stvx_u $out1,$x10,$out
- addi $out,$out,0x20
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_one:
- vxor $out0,$in5,$twk0
- nop
-Loop_xts_dec1x:
- vncipher $out0,$out0,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_dec1x
-
- subi r0,$taillen,1
- vncipher $out0,$out0,v24
-
- andi. r0,r0,16
- cmpwi $taillen,0
- vncipher $out0,$out0,v25
-
- sub $inp,$inp,r0
- vncipher $out0,$out0,v26
-
- lvx_u $in0,0,$inp
- vncipher $out0,$out0,v27
-
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- vncipher $out0,$out0,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vncipher $out0,$out0,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $twk0,$twk0,v31
-
- le?vperm $in0,$in0,$in0,$leperm
- vncipher $out0,$out0,v30
-
- mtctr $rounds
- vncipherlast $out0,$out0,$twk0
-
- vmr $twk0,$twk1 # unused tweak
- vmr $twk1,$twk2
- le?vperm $out0,$out0,$out0,$leperm
- stvx_u $out0,$x00,$out # store output
- addi $out,$out,0x10
- vxor $out0,$in0,$twk2
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_zero:
- cmpwi $taillen,0
- beq Lxts_dec6x_done
-
- lvx_u $in0,0,$inp
- le?vperm $in0,$in0,$in0,$leperm
- vxor $out0,$in0,$twk1
-Lxts_dec6x_steal:
- vncipher $out0,$out0,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Lxts_dec6x_steal
-
- add $inp,$inp,$taillen
- vncipher $out0,$out0,v24
-
- cmpwi $taillen,0
- vncipher $out0,$out0,v25
-
- lvx_u $in0,0,$inp
- vncipher $out0,$out0,v26
-
- lvsr $inpperm,0,$taillen # $in5 is no more
- vncipher $out0,$out0,v27
-
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- vncipher $out0,$out0,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vncipher $out0,$out0,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $twk1,$twk1,v31
-
- le?vperm $in0,$in0,$in0,$leperm
- vncipher $out0,$out0,v30
-
- vperm $in0,$in0,$in0,$inpperm
- vncipherlast $tmp,$out0,$twk1
-
- le?vperm $out0,$tmp,$tmp,$leperm
- le?stvx_u $out0,0,$out
- be?stvx_u $tmp,0,$out
-
- vxor $out0,$out0,$out0
- vspltisb $out1,-1
- vperm $out0,$out0,$out1,$inpperm
- vsel $out0,$in0,$tmp,$out0
- vxor $out0,$out0,$twk0
-
- subi r30,$out,1
- mtctr $taillen
-Loop_xts_dec6x_steal:
- lbzu r0,1(r30)
- stb r0,16(r30)
- bdnz Loop_xts_dec6x_steal
-
- li $taillen,0
- mtctr $rounds
- b Loop_xts_dec1x # one more time...
-
-.align 4
-Lxts_dec6x_done:
- ${UCMP}i $ivp,0
- beq Lxts_dec6x_ret
-
- vxor $tweak,$twk0,$rndkey0
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_dec6x_ret:
- mtlr r11
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $seven,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x04,1,0x80,6,6,0
- .long 0
-
-.align 5
-_aesp8_xts_dec5x:
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz _aesp8_xts_dec5x
-
- subi r0,$taillen,1
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
-
- andi. r0,r0,16
- cmpwi $taillen,0
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vxor $twk0,$twk0,v31
-
- sub $inp,$inp,r0
- vncipher $out0,$out0,v26
- vncipher $out1,$out1,v26
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- vncipher $out4,$out4,v26
- vxor $in1,$twk1,v31
-
- vncipher $out0,$out0,v27
- lvx_u $in0,0,$inp
- vncipher $out1,$out1,v27
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vncipher $out4,$out4,v27
- vxor $in2,$twk2,v31
-
- addi $key_,$sp,`$FRAME+15` # rewind $key_
- vncipher $out0,$out0,v28
- vncipher $out1,$out1,v28
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vncipher $out4,$out4,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vxor $in3,$twk3,v31
-
- vncipher $out0,$out0,v29
- le?vperm $in0,$in0,$in0,$leperm
- vncipher $out1,$out1,v29
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vncipher $out4,$out4,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $in4,$twk4,v31
-
- vncipher $out0,$out0,v30
- vncipher $out1,$out1,v30
- vncipher $out2,$out2,v30
- vncipher $out3,$out3,v30
- vncipher $out4,$out4,v30
-
- vncipherlast $out0,$out0,$twk0
- vncipherlast $out1,$out1,$in1
- vncipherlast $out2,$out2,$in2
- vncipherlast $out3,$out3,$in3
- vncipherlast $out4,$out4,$in4
- mtctr $rounds
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
-___
-}} }}}
-
-my $consts=1;
-foreach(split("\n",$code)) {
- s/\`([^\`]*)\`/eval($1)/geo;
-
- # constants table endian-specific conversion
- if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
- my $conv=$3;
- my @bytes=();
-
- # convert to endian-agnostic format
- if ($1 eq "long") {
- foreach (split(/,\s*/,$2)) {
- my $l = /^0/?oct:int;
- push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
- }
- } else {
- @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
- }
-
- # little-endian conversion
- if ($flavour =~ /le$/o) {
- SWITCH: for($conv) {
- /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
- /\?rev/ && do { @bytes=reverse(@bytes); last; };
- }
- }
-
- #emit
- print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
- next;
- }
- $consts=0 if (m/Lconsts:/o); # end of table
-
- # instructions prefixed with '?' are endian-specific and need
- # to be adjusted accordingly...
- if ($flavour =~ /le$/o) { # little-endian
- s/le\?//o or
- s/be\?/#be#/o or
- s/\?lvsr/lvsl/o or
- s/\?lvsl/lvsr/o or
- s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
- s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
- s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
- } else { # big-endian
- s/le\?/#le#/o or
- s/be\?//o or
- s/\?([a-z]+)/$1/o;
- }
-
- print $_,"\n";
-}
-
-close STDOUT or die "error closing STDOUT: $!";
diff --git a/crypto/fipsmodule/aes/internal.h b/crypto/fipsmodule/aes/internal.h
index 0685bc4..98b2a14d 100644
--- a/crypto/fipsmodule/aes/internal.h
+++ b/crypto/fipsmodule/aes/internal.h
@@ -59,12 +59,6 @@
OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
#endif
-#elif defined(OPENSSL_PPC64LE)
-#define HWAES
-
-OPENSSL_INLINE int hwaes_capable(void) {
- return CRYPTO_is_PPC64LE_vcrypto_capable();
-}
#endif
#endif // !NO_ASM
diff --git a/crypto/fipsmodule/bcm.c b/crypto/fipsmodule/bcm.c
index 1792134..e2e4d90 100644
--- a/crypto/fipsmodule/bcm.c
+++ b/crypto/fipsmodule/bcm.c
@@ -101,7 +101,6 @@
#include "self_check/fips.c"
#include "self_check/self_check.c"
#include "service_indicator/service_indicator.c"
-#include "sha/sha1-altivec.c"
#include "sha/sha1.c"
#include "sha/sha256.c"
#include "sha/sha512.c"
diff --git a/crypto/fipsmodule/bn/bn.c b/crypto/fipsmodule/bn/bn.c
index f3fbb7a..93fae56 100644
--- a/crypto/fipsmodule/bn/bn.c
+++ b/crypto/fipsmodule/bn/bn.c
@@ -386,23 +386,6 @@
}
int bn_resize_words(BIGNUM *bn, size_t words) {
-#if defined(OPENSSL_PPC64LE)
- // This is a workaround for a miscompilation bug in Clang 7.0.1 on POWER.
- // The unittests catch the miscompilation, if it occurs, and it manifests
- // as a crash in |bn_fits_in_words|.
- //
- // The bug only triggers if building in FIPS mode and with -O3. Clang 8.0.1
- // has the same bug but this workaround is not effective there---I've not
- // been able to find a workaround for 8.0.1.
- //
- // At the time of writing (2019-08-08), Clang git does *not* have this bug
- // and does not need this workaroud. The current git version should go on to
- // be Clang 10 thus, once we can depend on that, this can be removed.
- if (value_barrier_w((size_t)bn->width == words)) {
- return 1;
- }
-#endif
-
if ((size_t)bn->width <= words) {
if (!bn_wexpand(bn, words)) {
return 0;
diff --git a/crypto/fipsmodule/cipher/e_aes.c b/crypto/fipsmodule/cipher/e_aes.c
index e8e03fe..0db77b8 100644
--- a/crypto/fipsmodule/cipher/e_aes.c
+++ b/crypto/fipsmodule/cipher/e_aes.c
@@ -1468,8 +1468,6 @@
return hwaes_capable() && crypto_gcm_clmul_enabled();
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
return hwaes_capable() && CRYPTO_is_ARMv8_PMULL_capable();
-#elif defined(OPENSSL_PPC64LE)
- return CRYPTO_is_PPC64LE_vcrypto_capable();
#else
return 0;
#endif
diff --git a/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl b/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
deleted file mode 100644
index 0d12a77..0000000
--- a/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
+++ /dev/null
@@ -1,671 +0,0 @@
-#! /usr/bin/env perl
-# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License"). You may not use
-# this file except in compliance with the License. You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# GHASH for for PowerISA v2.07.
-#
-# July 2014
-#
-# Accurate performance measurements are problematic, because it's
-# always virtualized setup with possibly throttled processor.
-# Relative comparison is therefore more informative. This initial
-# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
-# faster than "4-bit" integer-only compiler-generated 64-bit code.
-# "Initial version" means that there is room for futher improvement.
-
-# May 2016
-#
-# 2x aggregated reduction improves performance by 50% (resulting
-# performance on POWER8 is 1 cycle per processed byte), and 4x
-# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
-
-$flavour=shift;
-$output =shift;
-
-if ($flavour =~ /64/) {
- $SIZE_T=8;
- $LRSAVE=2*$SIZE_T;
- $STU="stdu";
- $POP="ld";
- $PUSH="std";
- $UCMP="cmpld";
- $SHRI="srdi";
-} elsif ($flavour =~ /32/) {
- $SIZE_T=4;
- $LRSAVE=$SIZE_T;
- $STU="stwu";
- $POP="lwz";
- $PUSH="stw";
- $UCMP="cmplw";
- $SHRI="srwi";
-} else { die "nonsense $flavour"; }
-
-$sp="r1";
-$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!";
-*STDOUT=*OUT;
-
-my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
-
-my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
-my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
-my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
-my $vrsave="r12";
-
-$code=<<___;
-.machine "any"
-
-.text
-
-.globl .gcm_init_p8
-.align 5
-.gcm_init_p8:
- li r0,-4096
- li r8,0x10
- mfspr $vrsave,256
- li r9,0x20
- mtspr 256,r0
- li r10,0x30
- lvx_u $H,0,r4 # load H
-
- vspltisb $xC2,-16 # 0xf0
- vspltisb $t0,1 # one
- vaddubm $xC2,$xC2,$xC2 # 0xe0
- vxor $zero,$zero,$zero
- vor $xC2,$xC2,$t0 # 0xe1
- vsldoi $xC2,$xC2,$zero,15 # 0xe1...
- vsldoi $t1,$zero,$t0,1 # ...1
- vaddubm $xC2,$xC2,$xC2 # 0xc2...
- vspltisb $t2,7
- vor $xC2,$xC2,$t1 # 0xc2....01
- vspltb $t1,$H,0 # most significant byte
- vsl $H,$H,$t0 # H<<=1
- vsrab $t1,$t1,$t2 # broadcast carry bit
- vand $t1,$t1,$xC2
- vxor $IN,$H,$t1 # twisted H
-
- vsldoi $H,$IN,$IN,8 # twist even more ...
- vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
- vsldoi $Hl,$zero,$H,8 # ... and split
- vsldoi $Hh,$H,$zero,8
-
- stvx_u $xC2,0,r3 # save pre-computed table
- stvx_u $Hl,r8,r3
- li r8,0x40
- stvx_u $H, r9,r3
- li r9,0x50
- stvx_u $Hh,r10,r3
- li r10,0x60
-
- vpmsumd $Xl,$IN,$Hl # H.lo·H.lo
- vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi
- vpmsumd $Xh,$IN,$Hh # H.hi·H.hi
-
- vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
-
- vsldoi $t0,$Xm,$zero,8
- vsldoi $t1,$zero,$Xm,8
- vxor $Xl,$Xl,$t0
- vxor $Xh,$Xh,$t1
-
- vsldoi $Xl,$Xl,$Xl,8
- vxor $Xl,$Xl,$t2
-
- vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
- vpmsumd $Xl,$Xl,$xC2
- vxor $t1,$t1,$Xh
- vxor $IN1,$Xl,$t1
-
- vsldoi $H2,$IN1,$IN1,8
- vsldoi $H2l,$zero,$H2,8
- vsldoi $H2h,$H2,$zero,8
-
- stvx_u $H2l,r8,r3 # save H^2
- li r8,0x70
- stvx_u $H2,r9,r3
- li r9,0x80
- stvx_u $H2h,r10,r3
- li r10,0x90
-___
-{
-my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
-$code.=<<___;
- vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo
- vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo
- vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi
- vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi
- vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi
- vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi
-
- vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
- vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase
-
- vsldoi $t0,$Xm,$zero,8
- vsldoi $t1,$zero,$Xm,8
- vsldoi $t4,$Xm1,$zero,8
- vsldoi $t5,$zero,$Xm1,8
- vxor $Xl,$Xl,$t0
- vxor $Xh,$Xh,$t1
- vxor $Xl1,$Xl1,$t4
- vxor $Xh1,$Xh1,$t5
-
- vsldoi $Xl,$Xl,$Xl,8
- vsldoi $Xl1,$Xl1,$Xl1,8
- vxor $Xl,$Xl,$t2
- vxor $Xl1,$Xl1,$t6
-
- vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
- vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase
- vpmsumd $Xl,$Xl,$xC2
- vpmsumd $Xl1,$Xl1,$xC2
- vxor $t1,$t1,$Xh
- vxor $t5,$t5,$Xh1
- vxor $Xl,$Xl,$t1
- vxor $Xl1,$Xl1,$t5
-
- vsldoi $H,$Xl,$Xl,8
- vsldoi $H2,$Xl1,$Xl1,8
- vsldoi $Hl,$zero,$H,8
- vsldoi $Hh,$H,$zero,8
- vsldoi $H2l,$zero,$H2,8
- vsldoi $H2h,$H2,$zero,8
-
- stvx_u $Hl,r8,r3 # save H^3
- li r8,0xa0
- stvx_u $H,r9,r3
- li r9,0xb0
- stvx_u $Hh,r10,r3
- li r10,0xc0
- stvx_u $H2l,r8,r3 # save H^4
- stvx_u $H2,r9,r3
- stvx_u $H2h,r10,r3
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,2,0
- .long 0
-.size .gcm_init_p8,.-.gcm_init_p8
-___
-}
-$code.=<<___;
-.globl .gcm_gmult_p8
-.align 5
-.gcm_gmult_p8:
- lis r0,0xfff8
- li r8,0x10
- mfspr $vrsave,256
- li r9,0x20
- mtspr 256,r0
- li r10,0x30
- lvx_u $IN,0,$Xip # load Xi
-
- lvx_u $Hl,r8,$Htbl # load pre-computed table
- le?lvsl $lemask,r0,r0
- lvx_u $H, r9,$Htbl
- le?vspltisb $t0,0x07
- lvx_u $Hh,r10,$Htbl
- le?vxor $lemask,$lemask,$t0
- lvx_u $xC2,0,$Htbl
- le?vperm $IN,$IN,$IN,$lemask
- vxor $zero,$zero,$zero
-
- vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
- vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
- vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
-
- vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
-
- vsldoi $t0,$Xm,$zero,8
- vsldoi $t1,$zero,$Xm,8
- vxor $Xl,$Xl,$t0
- vxor $Xh,$Xh,$t1
-
- vsldoi $Xl,$Xl,$Xl,8
- vxor $Xl,$Xl,$t2
-
- vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
- vpmsumd $Xl,$Xl,$xC2
- vxor $t1,$t1,$Xh
- vxor $Xl,$Xl,$t1
-
- le?vperm $Xl,$Xl,$Xl,$lemask
- stvx_u $Xl,0,$Xip # write out Xi
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,2,0
- .long 0
-.size .gcm_gmult_p8,.-.gcm_gmult_p8
-
-.globl .gcm_ghash_p8
-.align 5
-.gcm_ghash_p8:
- li r0,-4096
- li r8,0x10
- mfspr $vrsave,256
- li r9,0x20
- mtspr 256,r0
- li r10,0x30
- lvx_u $Xl,0,$Xip # load Xi
-
- lvx_u $Hl,r8,$Htbl # load pre-computed table
- li r8,0x40
- le?lvsl $lemask,r0,r0
- lvx_u $H, r9,$Htbl
- li r9,0x50
- le?vspltisb $t0,0x07
- lvx_u $Hh,r10,$Htbl
- li r10,0x60
- le?vxor $lemask,$lemask,$t0
- lvx_u $xC2,0,$Htbl
- le?vperm $Xl,$Xl,$Xl,$lemask
- vxor $zero,$zero,$zero
-
- ${UCMP}i $len,64
- bge Lgcm_ghash_p8_4x
-
- lvx_u $IN,0,$inp
- addi $inp,$inp,16
- subic. $len,$len,16
- le?vperm $IN,$IN,$IN,$lemask
- vxor $IN,$IN,$Xl
- beq Lshort
-
- lvx_u $H2l,r8,$Htbl # load H^2
- li r8,16
- lvx_u $H2, r9,$Htbl
- add r9,$inp,$len # end of input
- lvx_u $H2h,r10,$Htbl
- be?b Loop_2x
-
-.align 5
-Loop_2x:
- lvx_u $IN1,0,$inp
- le?vperm $IN1,$IN1,$IN1,$lemask
-
- subic $len,$len,32
- vpmsumd $Xl,$IN,$H2l # H^2.lo·Xi.lo
- vpmsumd $Xl1,$IN1,$Hl # H.lo·Xi+1.lo
- subfe r0,r0,r0 # borrow?-1:0
- vpmsumd $Xm,$IN,$H2 # H^2.hi·Xi.lo+H^2.lo·Xi.hi
- vpmsumd $Xm1,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+1.hi
- and r0,r0,$len
- vpmsumd $Xh,$IN,$H2h # H^2.hi·Xi.hi
- vpmsumd $Xh1,$IN1,$Hh # H.hi·Xi+1.hi
- add $inp,$inp,r0
-
- vxor $Xl,$Xl,$Xl1
- vxor $Xm,$Xm,$Xm1
-
- vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
-
- vsldoi $t0,$Xm,$zero,8
- vsldoi $t1,$zero,$Xm,8
- vxor $Xh,$Xh,$Xh1
- vxor $Xl,$Xl,$t0
- vxor $Xh,$Xh,$t1
-
- vsldoi $Xl,$Xl,$Xl,8
- vxor $Xl,$Xl,$t2
- lvx_u $IN,r8,$inp
- addi $inp,$inp,32
-
- vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
- vpmsumd $Xl,$Xl,$xC2
- le?vperm $IN,$IN,$IN,$lemask
- vxor $t1,$t1,$Xh
- vxor $IN,$IN,$t1
- vxor $IN,$IN,$Xl
- $UCMP r9,$inp
- bgt Loop_2x # done yet?
-
- cmplwi $len,0
- bne Leven
-
-Lshort:
- vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
- vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
- vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
-
- vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
-
- vsldoi $t0,$Xm,$zero,8
- vsldoi $t1,$zero,$Xm,8
- vxor $Xl,$Xl,$t0
- vxor $Xh,$Xh,$t1
-
- vsldoi $Xl,$Xl,$Xl,8
- vxor $Xl,$Xl,$t2
-
- vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
- vpmsumd $Xl,$Xl,$xC2
- vxor $t1,$t1,$Xh
-
-Leven:
- vxor $Xl,$Xl,$t1
- le?vperm $Xl,$Xl,$Xl,$lemask
- stvx_u $Xl,0,$Xip # write out Xi
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,4,0
- .long 0
-___
-{
-my ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h,
- $Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31));
-my $IN0=$IN;
-my ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h);
-
-$code.=<<___;
-.align 5
-.gcm_ghash_p8_4x:
-Lgcm_ghash_p8_4x:
- $STU $sp,-$FRAME($sp)
- li r10,`15+6*$SIZE_T`
- li r11,`31+6*$SIZE_T`
- stvx v20,r10,$sp
- addi r10,r10,32
- stvx v21,r11,$sp
- addi r11,r11,32
- stvx v22,r10,$sp
- addi r10,r10,32
- stvx v23,r11,$sp
- addi r11,r11,32
- stvx v24,r10,$sp
- addi r10,r10,32
- stvx v25,r11,$sp
- addi r11,r11,32
- stvx v26,r10,$sp
- addi r10,r10,32
- stvx v27,r11,$sp
- addi r11,r11,32
- stvx v28,r10,$sp
- addi r10,r10,32
- stvx v29,r11,$sp
- addi r11,r11,32
- stvx v30,r10,$sp
- li r10,0x60
- stvx v31,r11,$sp
- li r0,-1
- stw $vrsave,`$FRAME-4`($sp) # save vrsave
- mtspr 256,r0 # preserve all AltiVec registers
-
- lvsl $t0,0,r8 # 0x0001..0e0f
- #lvx_u $H2l,r8,$Htbl # load H^2
- li r8,0x70
- lvx_u $H2, r9,$Htbl
- li r9,0x80
- vspltisb $t1,8 # 0x0808..0808
- #lvx_u $H2h,r10,$Htbl
- li r10,0x90
- lvx_u $H3l,r8,$Htbl # load H^3
- li r8,0xa0
- lvx_u $H3, r9,$Htbl
- li r9,0xb0
- lvx_u $H3h,r10,$Htbl
- li r10,0xc0
- lvx_u $H4l,r8,$Htbl # load H^4
- li r8,0x10
- lvx_u $H4, r9,$Htbl
- li r9,0x20
- lvx_u $H4h,r10,$Htbl
- li r10,0x30
-
- vsldoi $t2,$zero,$t1,8 # 0x0000..0808
- vaddubm $hiperm,$t0,$t2 # 0x0001..1617
- vaddubm $loperm,$t1,$hiperm # 0x0809..1e1f
-
- $SHRI $len,$len,4 # this allows to use sign bit
- # as carry
- lvx_u $IN0,0,$inp # load input
- lvx_u $IN1,r8,$inp
- subic. $len,$len,8
- lvx_u $IN2,r9,$inp
- lvx_u $IN3,r10,$inp
- addi $inp,$inp,0x40
- le?vperm $IN0,$IN0,$IN0,$lemask
- le?vperm $IN1,$IN1,$IN1,$lemask
- le?vperm $IN2,$IN2,$IN2,$lemask
- le?vperm $IN3,$IN3,$IN3,$lemask
-
- vxor $Xh,$IN0,$Xl
-
- vpmsumd $Xl1,$IN1,$H3l
- vpmsumd $Xm1,$IN1,$H3
- vpmsumd $Xh1,$IN1,$H3h
-
- vperm $H21l,$H2,$H,$hiperm
- vperm $t0,$IN2,$IN3,$loperm
- vperm $H21h,$H2,$H,$loperm
- vperm $t1,$IN2,$IN3,$hiperm
- vpmsumd $Xm2,$IN2,$H2 # H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo
- vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+2.lo+H.lo·Xi+3.lo
- vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
- vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+2.hi+H.hi·Xi+3.hi
-
- vxor $Xm2,$Xm2,$Xm1
- vxor $Xl3,$Xl3,$Xl1
- vxor $Xm3,$Xm3,$Xm2
- vxor $Xh3,$Xh3,$Xh1
-
- blt Ltail_4x
-
-Loop_4x:
- lvx_u $IN0,0,$inp
- lvx_u $IN1,r8,$inp
- subic. $len,$len,4
- lvx_u $IN2,r9,$inp
- lvx_u $IN3,r10,$inp
- addi $inp,$inp,0x40
- le?vperm $IN1,$IN1,$IN1,$lemask
- le?vperm $IN2,$IN2,$IN2,$lemask
- le?vperm $IN3,$IN3,$IN3,$lemask
- le?vperm $IN0,$IN0,$IN0,$lemask
-
- vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
- vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
- vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
- vpmsumd $Xl1,$IN1,$H3l
- vpmsumd $Xm1,$IN1,$H3
- vpmsumd $Xh1,$IN1,$H3h
-
- vxor $Xl,$Xl,$Xl3
- vxor $Xm,$Xm,$Xm3
- vxor $Xh,$Xh,$Xh3
- vperm $t0,$IN2,$IN3,$loperm
- vperm $t1,$IN2,$IN3,$hiperm
-
- vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
- vpmsumd $Xl3,$t0,$H21l # H.lo·Xi+3.lo +H^2.lo·Xi+2.lo
- vpmsumd $Xh3,$t1,$H21h # H.hi·Xi+3.hi +H^2.hi·Xi+2.hi
-
- vsldoi $t0,$Xm,$zero,8
- vsldoi $t1,$zero,$Xm,8
- vxor $Xl,$Xl,$t0
- vxor $Xh,$Xh,$t1
-
- vsldoi $Xl,$Xl,$Xl,8
- vxor $Xl,$Xl,$t2
-
- vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
- vpmsumd $Xm2,$IN2,$H2 # H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi
- vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
- vpmsumd $Xl,$Xl,$xC2
-
- vxor $Xl3,$Xl3,$Xl1
- vxor $Xh3,$Xh3,$Xh1
- vxor $Xh,$Xh,$IN0
- vxor $Xm2,$Xm2,$Xm1
- vxor $Xh,$Xh,$t1
- vxor $Xm3,$Xm3,$Xm2
- vxor $Xh,$Xh,$Xl
- bge Loop_4x
-
-Ltail_4x:
- vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
- vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
- vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
-
- vxor $Xl,$Xl,$Xl3
- vxor $Xm,$Xm,$Xm3
-
- vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
-
- vsldoi $t0,$Xm,$zero,8
- vsldoi $t1,$zero,$Xm,8
- vxor $Xh,$Xh,$Xh3
- vxor $Xl,$Xl,$t0
- vxor $Xh,$Xh,$t1
-
- vsldoi $Xl,$Xl,$Xl,8
- vxor $Xl,$Xl,$t2
-
- vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
- vpmsumd $Xl,$Xl,$xC2
- vxor $t1,$t1,$Xh
- vxor $Xl,$Xl,$t1
-
- addic. $len,$len,4
- beq Ldone_4x
-
- lvx_u $IN0,0,$inp
- ${UCMP}i $len,2
- li $len,-4
- blt Lone
- lvx_u $IN1,r8,$inp
- beq Ltwo
-
-Lthree:
- lvx_u $IN2,r9,$inp
- le?vperm $IN0,$IN0,$IN0,$lemask
- le?vperm $IN1,$IN1,$IN1,$lemask
- le?vperm $IN2,$IN2,$IN2,$lemask
-
- vxor $Xh,$IN0,$Xl
- vmr $H4l,$H3l
- vmr $H4, $H3
- vmr $H4h,$H3h
-
- vperm $t0,$IN1,$IN2,$loperm
- vperm $t1,$IN1,$IN2,$hiperm
- vpmsumd $Xm2,$IN1,$H2 # H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo
- vpmsumd $Xm3,$IN2,$H # H.hi·Xi+2.lo +H.lo·Xi+2.hi
- vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+1.lo+H.lo·Xi+2.lo
- vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+1.hi+H.hi·Xi+2.hi
-
- vxor $Xm3,$Xm3,$Xm2
- b Ltail_4x
-
-.align 4
-Ltwo:
- le?vperm $IN0,$IN0,$IN0,$lemask
- le?vperm $IN1,$IN1,$IN1,$lemask
-
- vxor $Xh,$IN0,$Xl
- vperm $t0,$zero,$IN1,$loperm
- vperm $t1,$zero,$IN1,$hiperm
-
- vsldoi $H4l,$zero,$H2,8
- vmr $H4, $H2
- vsldoi $H4h,$H2,$zero,8
-
- vpmsumd $Xl3,$t0, $H21l # H.lo·Xi+1.lo
- vpmsumd $Xm3,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+2.hi
- vpmsumd $Xh3,$t1, $H21h # H.hi·Xi+1.hi
-
- b Ltail_4x
-
-.align 4
-Lone:
- le?vperm $IN0,$IN0,$IN0,$lemask
-
- vsldoi $H4l,$zero,$H,8
- vmr $H4, $H
- vsldoi $H4h,$H,$zero,8
-
- vxor $Xh,$IN0,$Xl
- vxor $Xl3,$Xl3,$Xl3
- vxor $Xm3,$Xm3,$Xm3
- vxor $Xh3,$Xh3,$Xh3
-
- b Ltail_4x
-
-Ldone_4x:
- le?vperm $Xl,$Xl,$Xl,$lemask
- stvx_u $Xl,0,$Xip # write out Xi
-
- li r10,`15+6*$SIZE_T`
- li r11,`31+6*$SIZE_T`
- mtspr 256,$vrsave
- lvx v20,r10,$sp
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- addi $sp,$sp,$FRAME
- blr
- .long 0
- .byte 0,12,0x04,0,0x80,0,4,0
- .long 0
-___
-}
-$code.=<<___;
-.size .gcm_ghash_p8,.-.gcm_ghash_p8
-
-.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-.align 2
-___
-
-foreach (split("\n",$code)) {
- s/\`([^\`]*)\`/eval $1/geo;
-
- if ($flavour =~ /le$/o) { # little-endian
- s/le\?//o or
- s/be\?/#be#/o;
- } else {
- s/le\?/#le#/o or
- s/be\?//o;
- }
- print $_,"\n";
-}
-
-close STDOUT or die "error closing STDOUT: $!"; # enforce flush
diff --git a/crypto/fipsmodule/modes/gcm.c b/crypto/fipsmodule/modes/gcm.c
index 11a0b20..f22fa9d 100644
--- a/crypto/fipsmodule/modes/gcm.c
+++ b/crypto/fipsmodule/modes/gcm.c
@@ -230,13 +230,6 @@
*out_hash = gcm_ghash_neon;
return;
}
-#elif defined(GHASH_ASM_PPC64LE)
- if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
- gcm_init_p8(out_table, H);
- *out_mult = gcm_gmult_p8;
- *out_hash = gcm_ghash_p8;
- return;
- }
#endif
gcm_init_nohw(out_table, H);
diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc
index 324d0e8..e7fcd5c 100644
--- a/crypto/fipsmodule/modes/gcm_test.cc
+++ b/crypto/fipsmodule/modes/gcm_test.cc
@@ -222,15 +222,5 @@
}
}
#endif
-
-#if defined(GHASH_ASM_PPC64LE)
- if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
- CHECK_ABI(gcm_init_p8, Htable, kH);
- CHECK_ABI(gcm_gmult_p8, X, Htable);
- for (size_t blocks : kBlockCounts) {
- CHECK_ABI(gcm_ghash_p8, X, Htable, buf, 16 * blocks);
- }
- }
-#endif // GHASH_ASM_PPC64LE
}
#endif // SUPPORTS_ABI_TEST && !OPENSSL_NO_ASM
diff --git a/crypto/fipsmodule/modes/internal.h b/crypto/fipsmodule/modes/internal.h
index 8a0a75f..d77efca 100644
--- a/crypto/fipsmodule/modes/internal.h
+++ b/crypto/fipsmodule/modes/internal.h
@@ -308,13 +308,6 @@
void *Xi, uint8_t *ivec, const AES_KEY *key);
#endif
-#elif defined(OPENSSL_PPC64LE)
-#define GHASH_ASM_PPC64LE
-#define GCM_FUNCREF
-void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
-void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
-void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
- size_t len);
#endif
#endif // OPENSSL_NO_ASM
diff --git a/crypto/fipsmodule/rand/rand.c b/crypto/fipsmodule/rand/rand.c
index cb1ee7d..41754c6 100644
--- a/crypto/fipsmodule/rand/rand.c
+++ b/crypto/fipsmodule/rand/rand.c
@@ -416,11 +416,6 @@
// Take a read lock around accesses to |state->drbg|. This is needed to
// avoid returning bad entropy if we race with
// |rand_thread_state_clear_all|.
- //
- // This lock must be taken after any calls to |CRYPTO_sysrand| to avoid a
- // bug on ppc64le. glibc may implement pthread locks by wrapping user code
- // in a hardware transaction, but, on some older versions of glibc and the
- // kernel, syscalls made with |syscall| did not abort the transaction.
CRYPTO_STATIC_MUTEX_lock_read(state_clear_all_lock_bss_get());
#endif
if (!CTR_DRBG_reseed(&state->drbg, seed, reseed_additional_data,
diff --git a/crypto/fipsmodule/sha/internal.h b/crypto/fipsmodule/sha/internal.h
index cc90914..605f166 100644
--- a/crypto/fipsmodule/sha/internal.h
+++ b/crypto/fipsmodule/sha/internal.h
@@ -22,23 +22,14 @@
#endif
-#if defined(OPENSSL_PPC64LE) || \
- (!defined(OPENSSL_NO_ASM) && \
- (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
- defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)))
-// POWER has an intrinsics-based implementation of SHA-1 and thus the functions
-// normally defined in assembly are available even with |OPENSSL_NO_ASM| in
-// this case.
-#define SHA1_ASM
-void sha1_block_data_order(uint32_t *state, const uint8_t *in,
- size_t num_blocks);
-#endif
-
#if !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
+#define SHA1_ASM
#define SHA256_ASM
#define SHA512_ASM
+void sha1_block_data_order(uint32_t *state, const uint8_t *in,
+ size_t num_blocks);
void sha256_block_data_order(uint32_t *state, const uint8_t *in,
size_t num_blocks);
void sha512_block_data_order(uint64_t *state, const uint8_t *in,
diff --git a/crypto/fipsmodule/sha/sha1-altivec.c b/crypto/fipsmodule/sha/sha1-altivec.c
deleted file mode 100644
index 3152827..0000000
--- a/crypto/fipsmodule/sha/sha1-altivec.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young (eay@cryptsoft.com).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to. The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson (tjh@cryptsoft.com).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * "This product includes cryptographic software written by
- * Eric Young (eay@cryptsoft.com)"
- * The word 'cryptographic' can be left out if the rouines from the library
- * being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- * the apps directory (application code) you must include an acknowledgement:
- * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed. i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.] */
-
-// Altivec-optimized SHA1 in C. This is tested on ppc64le only.
-//
-// References:
-// https://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
-// http://arctic.org/~dean/crypto/sha1.html
-//
-// This code used the generic SHA-1 from OpenSSL as a basis and AltiVec
-// optimisations were added on top.
-
-#include <openssl/sha.h>
-
-#if defined(OPENSSL_PPC64LE)
-
-#include <altivec.h>
-
-void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num);
-
-static uint32_t rotate(uint32_t a, int n) { return (a << n) | (a >> (32 - n)); }
-
-typedef vector unsigned int vec_uint32_t;
-typedef vector unsigned char vec_uint8_t;
-
-// Vector constants
-static const vec_uint8_t k_swap_endianness = {3, 2, 1, 0, 7, 6, 5, 4,
- 11, 10, 9, 8, 15, 14, 13, 12};
-
-// Shift amounts for byte and bit shifts and rotations
-static const vec_uint8_t k_4_bytes = {32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32};
-static const vec_uint8_t k_12_bytes = {96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96};
-
-#define K_00_19 0x5a827999UL
-#define K_20_39 0x6ed9eba1UL
-#define K_40_59 0x8f1bbcdcUL
-#define K_60_79 0xca62c1d6UL
-
-// Vector versions of the above.
-static const vec_uint32_t K_00_19_x_4 = {K_00_19, K_00_19, K_00_19, K_00_19};
-static const vec_uint32_t K_20_39_x_4 = {K_20_39, K_20_39, K_20_39, K_20_39};
-static const vec_uint32_t K_40_59_x_4 = {K_40_59, K_40_59, K_40_59, K_40_59};
-static const vec_uint32_t K_60_79_x_4 = {K_60_79, K_60_79, K_60_79, K_60_79};
-
-// vector message scheduling: compute message schedule for round i..i+3 where i
-// is divisible by 4. We return the schedule w[i..i+3] as a vector. In
-// addition, we also precompute sum w[i..+3] and an additive constant K. This
-// is done to offload some computation of f() in the integer execution units.
-//
-// Byte shifting code below may not be correct for big-endian systems.
-static vec_uint32_t sched_00_15(vec_uint32_t *pre_added, const void *data,
- vec_uint32_t k) {
- const vector unsigned char unaligned_data =
- vec_vsx_ld(0, (const unsigned char*) data);
- const vec_uint32_t v = (vec_uint32_t) unaligned_data;
- const vec_uint32_t w = vec_perm(v, v, k_swap_endianness);
- vec_st(w + k, 0, pre_added);
- return w;
-}
-
-// Compute w[i..i+3] using these steps for i in [16, 20, 24, 28]
-//
-// w'[i ] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) <<< 1
-// w'[i+1] = (w[i-2] ^ w[i-7] ^ w[i-13] ^ w[i-15]) <<< 1
-// w'[i+2] = (w[i-1] ^ w[i-6] ^ w[i-12] ^ w[i-14]) <<< 1
-// w'[i+3] = ( 0 ^ w[i-5] ^ w[i-11] ^ w[i-13]) <<< 1
-//
-// w[ i] = w'[ i]
-// w[i+1] = w'[i+1]
-// w[i+2] = w'[i+2]
-// w[i+3] = w'[i+3] ^ (w'[i] <<< 1)
-static vec_uint32_t sched_16_31(vec_uint32_t *pre_added, vec_uint32_t minus_4,
- vec_uint32_t minus_8, vec_uint32_t minus_12,
- vec_uint32_t minus_16, vec_uint32_t k) {
- const vec_uint32_t minus_3 = vec_sro(minus_4, k_4_bytes);
- const vec_uint32_t minus_14 = vec_sld((minus_12), (minus_16), 8);
- const vec_uint32_t k_1_bit = vec_splat_u32(1);
- const vec_uint32_t w_prime =
- vec_rl(minus_3 ^ minus_8 ^ minus_14 ^ minus_16, k_1_bit);
- const vec_uint32_t w =
- w_prime ^ vec_rl(vec_slo(w_prime, k_12_bytes), k_1_bit);
- vec_st(w + k, 0, pre_added);
- return w;
-}
-
-// Compute w[i..i+3] using this relation for i in [32, 36, 40 ... 76]
-// w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]), 2) <<< 2
-static vec_uint32_t sched_32_79(vec_uint32_t *pre_added, vec_uint32_t minus_4,
- vec_uint32_t minus_8, vec_uint32_t minus_16,
- vec_uint32_t minus_28, vec_uint32_t minus_32,
- vec_uint32_t k) {
- const vec_uint32_t minus_6 = vec_sld(minus_4, minus_8, 8);
- const vec_uint32_t k_2_bits = vec_splat_u32(2);
- const vec_uint32_t w =
- vec_rl(minus_6 ^ minus_16 ^ minus_28 ^ minus_32, k_2_bits);
- vec_st(w + k, 0, pre_added);
- return w;
-}
-
-// As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be simplified
-// to the code in F_00_19. Wei attributes these optimisations to Peter
-// Gutmann's SHS code, and he attributes it to Rich Schroeppel. #define
-// F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) I've just become aware of another
-// tweak to be made, again from Wei Dai, in F_40_59, (x&a)|(y&a) -> (x|y)&a
-#define F_00_19(b, c, d) ((((c) ^ (d)) & (b)) ^ (d))
-#define F_20_39(b, c, d) ((b) ^ (c) ^ (d))
-#define F_40_59(b, c, d) (((b) & (c)) | (((b) | (c)) & (d)))
-#define F_60_79(b, c, d) F_20_39(b, c, d)
-
-// We pre-added the K constants during message scheduling.
-#define BODY_00_19(i, a, b, c, d, e, f) \
- do { \
- (f) = w[i] + (e) + rotate((a), 5) + F_00_19((b), (c), (d)); \
- (b) = rotate((b), 30); \
- } while (0)
-
-#define BODY_20_39(i, a, b, c, d, e, f) \
- do { \
- (f) = w[i] + (e) + rotate((a), 5) + F_20_39((b), (c), (d)); \
- (b) = rotate((b), 30); \
- } while (0)
-
-#define BODY_40_59(i, a, b, c, d, e, f) \
- do { \
- (f) = w[i] + (e) + rotate((a), 5) + F_40_59((b), (c), (d)); \
- (b) = rotate((b), 30); \
- } while (0)
-
-#define BODY_60_79(i, a, b, c, d, e, f) \
- do { \
- (f) = w[i] + (e) + rotate((a), 5) + F_60_79((b), (c), (d)); \
- (b) = rotate((b), 30); \
- } while (0)
-
-void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num) {
- uint32_t A, B, C, D, E, T;
-
- A = state[0];
- B = state[1];
- C = state[2];
- D = state[3];
- E = state[4];
-
- for (;;) {
- vec_uint32_t vw[20];
- const uint32_t *w = (const uint32_t *)&vw;
-
- vec_uint32_t k = K_00_19_x_4;
- const vec_uint32_t w0 = sched_00_15(vw + 0, data + 0, k);
- BODY_00_19(0, A, B, C, D, E, T);
- BODY_00_19(1, T, A, B, C, D, E);
- BODY_00_19(2, E, T, A, B, C, D);
- BODY_00_19(3, D, E, T, A, B, C);
-
- const vec_uint32_t w4 = sched_00_15(vw + 1, data + 16, k);
- BODY_00_19(4, C, D, E, T, A, B);
- BODY_00_19(5, B, C, D, E, T, A);
- BODY_00_19(6, A, B, C, D, E, T);
- BODY_00_19(7, T, A, B, C, D, E);
-
- const vec_uint32_t w8 = sched_00_15(vw + 2, data + 32, k);
- BODY_00_19(8, E, T, A, B, C, D);
- BODY_00_19(9, D, E, T, A, B, C);
- BODY_00_19(10, C, D, E, T, A, B);
- BODY_00_19(11, B, C, D, E, T, A);
-
- const vec_uint32_t w12 = sched_00_15(vw + 3, data + 48, k);
- BODY_00_19(12, A, B, C, D, E, T);
- BODY_00_19(13, T, A, B, C, D, E);
- BODY_00_19(14, E, T, A, B, C, D);
- BODY_00_19(15, D, E, T, A, B, C);
-
- const vec_uint32_t w16 = sched_16_31(vw + 4, w12, w8, w4, w0, k);
- BODY_00_19(16, C, D, E, T, A, B);
- BODY_00_19(17, B, C, D, E, T, A);
- BODY_00_19(18, A, B, C, D, E, T);
- BODY_00_19(19, T, A, B, C, D, E);
-
- k = K_20_39_x_4;
- const vec_uint32_t w20 = sched_16_31(vw + 5, w16, w12, w8, w4, k);
- BODY_20_39(20, E, T, A, B, C, D);
- BODY_20_39(21, D, E, T, A, B, C);
- BODY_20_39(22, C, D, E, T, A, B);
- BODY_20_39(23, B, C, D, E, T, A);
-
- const vec_uint32_t w24 = sched_16_31(vw + 6, w20, w16, w12, w8, k);
- BODY_20_39(24, A, B, C, D, E, T);
- BODY_20_39(25, T, A, B, C, D, E);
- BODY_20_39(26, E, T, A, B, C, D);
- BODY_20_39(27, D, E, T, A, B, C);
-
- const vec_uint32_t w28 = sched_16_31(vw + 7, w24, w20, w16, w12, k);
- BODY_20_39(28, C, D, E, T, A, B);
- BODY_20_39(29, B, C, D, E, T, A);
- BODY_20_39(30, A, B, C, D, E, T);
- BODY_20_39(31, T, A, B, C, D, E);
-
- const vec_uint32_t w32 = sched_32_79(vw + 8, w28, w24, w16, w4, w0, k);
- BODY_20_39(32, E, T, A, B, C, D);
- BODY_20_39(33, D, E, T, A, B, C);
- BODY_20_39(34, C, D, E, T, A, B);
- BODY_20_39(35, B, C, D, E, T, A);
-
- const vec_uint32_t w36 = sched_32_79(vw + 9, w32, w28, w20, w8, w4, k);
- BODY_20_39(36, A, B, C, D, E, T);
- BODY_20_39(37, T, A, B, C, D, E);
- BODY_20_39(38, E, T, A, B, C, D);
- BODY_20_39(39, D, E, T, A, B, C);
-
- k = K_40_59_x_4;
- const vec_uint32_t w40 = sched_32_79(vw + 10, w36, w32, w24, w12, w8, k);
- BODY_40_59(40, C, D, E, T, A, B);
- BODY_40_59(41, B, C, D, E, T, A);
- BODY_40_59(42, A, B, C, D, E, T);
- BODY_40_59(43, T, A, B, C, D, E);
-
- const vec_uint32_t w44 = sched_32_79(vw + 11, w40, w36, w28, w16, w12, k);
- BODY_40_59(44, E, T, A, B, C, D);
- BODY_40_59(45, D, E, T, A, B, C);
- BODY_40_59(46, C, D, E, T, A, B);
- BODY_40_59(47, B, C, D, E, T, A);
-
- const vec_uint32_t w48 = sched_32_79(vw + 12, w44, w40, w32, w20, w16, k);
- BODY_40_59(48, A, B, C, D, E, T);
- BODY_40_59(49, T, A, B, C, D, E);
- BODY_40_59(50, E, T, A, B, C, D);
- BODY_40_59(51, D, E, T, A, B, C);
-
- const vec_uint32_t w52 = sched_32_79(vw + 13, w48, w44, w36, w24, w20, k);
- BODY_40_59(52, C, D, E, T, A, B);
- BODY_40_59(53, B, C, D, E, T, A);
- BODY_40_59(54, A, B, C, D, E, T);
- BODY_40_59(55, T, A, B, C, D, E);
-
- const vec_uint32_t w56 = sched_32_79(vw + 14, w52, w48, w40, w28, w24, k);
- BODY_40_59(56, E, T, A, B, C, D);
- BODY_40_59(57, D, E, T, A, B, C);
- BODY_40_59(58, C, D, E, T, A, B);
- BODY_40_59(59, B, C, D, E, T, A);
-
- k = K_60_79_x_4;
- const vec_uint32_t w60 = sched_32_79(vw + 15, w56, w52, w44, w32, w28, k);
- BODY_60_79(60, A, B, C, D, E, T);
- BODY_60_79(61, T, A, B, C, D, E);
- BODY_60_79(62, E, T, A, B, C, D);
- BODY_60_79(63, D, E, T, A, B, C);
-
- const vec_uint32_t w64 = sched_32_79(vw + 16, w60, w56, w48, w36, w32, k);
- BODY_60_79(64, C, D, E, T, A, B);
- BODY_60_79(65, B, C, D, E, T, A);
- BODY_60_79(66, A, B, C, D, E, T);
- BODY_60_79(67, T, A, B, C, D, E);
-
- const vec_uint32_t w68 = sched_32_79(vw + 17, w64, w60, w52, w40, w36, k);
- BODY_60_79(68, E, T, A, B, C, D);
- BODY_60_79(69, D, E, T, A, B, C);
- BODY_60_79(70, C, D, E, T, A, B);
- BODY_60_79(71, B, C, D, E, T, A);
-
- const vec_uint32_t w72 = sched_32_79(vw + 18, w68, w64, w56, w44, w40, k);
- BODY_60_79(72, A, B, C, D, E, T);
- BODY_60_79(73, T, A, B, C, D, E);
- BODY_60_79(74, E, T, A, B, C, D);
- BODY_60_79(75, D, E, T, A, B, C);
-
- // We don't use the last value
- (void)sched_32_79(vw + 19, w72, w68, w60, w48, w44, k);
- BODY_60_79(76, C, D, E, T, A, B);
- BODY_60_79(77, B, C, D, E, T, A);
- BODY_60_79(78, A, B, C, D, E, T);
- BODY_60_79(79, T, A, B, C, D, E);
-
- const uint32_t mask = 0xffffffffUL;
- state[0] = (state[0] + E) & mask;
- state[1] = (state[1] + T) & mask;
- state[2] = (state[2] + A) & mask;
- state[3] = (state[3] + B) & mask;
- state[4] = (state[4] + C) & mask;
-
- data += 64;
- if (--num == 0) {
- break;
- }
-
- A = state[0];
- B = state[1];
- C = state[2];
- D = state[3];
- E = state[4];
- }
-}
-
-#endif // OPENSSL_PPC64LE
-
-#undef K_00_19
-#undef K_20_39
-#undef K_40_59
-#undef K_60_79
-#undef F_00_19
-#undef F_20_39
-#undef F_40_59
-#undef F_60_79
-#undef BODY_00_19
-#undef BODY_20_39
-#undef BODY_40_59
-#undef BODY_60_79
diff --git a/crypto/internal.h b/crypto/internal.h
index 63e6a66..f9a243e 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -166,7 +166,7 @@
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM) || \
- defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
+ defined(OPENSSL_AARCH64)
// OPENSSL_cpuid_setup initializes the platform-specific feature cache.
void OPENSSL_cpuid_setup(void);
#endif
@@ -1251,16 +1251,6 @@
#endif // OPENSSL_ARM || OPENSSL_AARCH64
-#if defined(OPENSSL_PPC64LE)
-
-// CRYPTO_is_PPC64LE_vcrypto_capable returns true iff the current CPU supports
-// the Vector.AES category of instructions.
-int CRYPTO_is_PPC64LE_vcrypto_capable(void);
-
-extern unsigned long OPENSSL_ppc64le_hwcap2;
-
-#endif // OPENSSL_PPC64LE
-
#if defined(BORINGSSL_DISPATCH_TEST)
// Runtime CPU dispatch testing support
diff --git a/crypto/perlasm/ppc-xlate.pl b/crypto/perlasm/ppc-xlate.pl
deleted file mode 100644
index 1c51577..0000000
--- a/crypto/perlasm/ppc-xlate.pl
+++ /dev/null
@@ -1,320 +0,0 @@
-#! /usr/bin/env perl
-# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License"). You may not use
-# this file except in compliance with the License. You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
-my $flavour = shift;
-my $output = shift;
-open STDOUT,">$output" || die "can't open $output: $!";
-
-my %GLOBALS;
-my %TYPES;
-my $dotinlocallabels=($flavour=~/linux/)?1:0;
-
-################################################################
-# directives which need special treatment on different platforms
-################################################################
-my $type = sub {
- my ($dir,$name,$type) = @_;
-
- $TYPES{$name} = $type;
- if ($flavour =~ /linux/) {
- $name =~ s|^\.||;
- ".type $name,$type";
- } else {
- "";
- }
-};
-my $globl = sub {
- my $junk = shift;
- my $name = shift;
- my $global = \$GLOBALS{$name};
- my $type = \$TYPES{$name};
- my $ret;
-
- $name =~ s|^\.||;
-
- SWITCH: for ($flavour) {
- /aix/ && do { if (!$$type) {
- $$type = "\@function";
- }
- if ($$type =~ /function/) {
- $name = ".$name";
- }
- last;
- };
- /osx/ && do { $name = "_$name";
- last;
- };
- /linux.*(32|64le)/
- && do { $ret .= ".globl $name";
- if (!$$type) {
- $ret .= "\n.type $name,\@function";
- $$type = "\@function";
- }
- last;
- };
- /linux.*64/ && do { $ret .= ".globl $name";
- if (!$$type) {
- $ret .= "\n.type $name,\@function";
- $$type = "\@function";
- }
- if ($$type =~ /function/) {
- $ret .= "\n.section \".opd\",\"aw\"";
- $ret .= "\n.align 3";
- $ret .= "\n$name:";
- $ret .= "\n.quad .$name,.TOC.\@tocbase,0";
- $ret .= "\n.previous";
- $name = ".$name";
- }
- last;
- };
- }
-
- $ret = ".globl $name" if (!$ret);
- $$global = $name;
- $ret;
-};
-my $text = sub {
- my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
- $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
- $ret;
-};
-my $machine = sub {
- my $junk = shift;
- my $arch = shift;
- if ($flavour =~ /osx/)
- { $arch =~ s/\"//g;
- $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
- }
- ".machine $arch";
-};
-my $size = sub {
- if ($flavour =~ /linux/)
- { shift;
- my $name = shift;
- my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name;
- my $ret = ".size $$real,.-$$real";
- $name =~ s|^\.||;
- if ($$real ne $name) {
- $ret .= "\n.size $name,.-$$real";
- }
- $ret;
- }
- else
- { ""; }
-};
-my $asciz = sub {
- shift;
- my $line = join(",",@_);
- if ($line =~ /^"(.*)"$/)
- { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
- else
- { ""; }
-};
-my $quad = sub {
- shift;
- my @ret;
- my ($hi,$lo);
- for (@_) {
- if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
- { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
- elsif (/^([0-9]+)$/o)
- { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
- else
- { $hi=undef; $lo=$_; }
-
- if (defined($hi))
- { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
- else
- { push(@ret,".quad $lo"); }
- }
- join("\n",@ret);
-};
-
-################################################################
-# simplified mnemonics not handled by at least one assembler
-################################################################
-my $cmplw = sub {
- my $f = shift;
- my $cr = 0; $cr = shift if ($#_>1);
- # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
- ($flavour =~ /linux.*32/) ?
- " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
- " cmplw ".join(',',$cr,@_);
-};
-my $bdnz = sub {
- my $f = shift;
- my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
- " bc $bo,0,".shift;
-} if ($flavour!~/linux/);
-my $bltlr = sub {
- my $f = shift;
- my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
- ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
- " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
- " bclr $bo,0";
-};
-my $bnelr = sub {
- my $f = shift;
- my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
- ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
- " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
- " bclr $bo,2";
-};
-my $beqlr = sub {
- my $f = shift;
- my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
- ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
- " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
- " bclr $bo,2";
-};
-# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
-# arguments is 64, with "operand out of range" error.
-my $extrdi = sub {
- my ($f,$ra,$rs,$n,$b) = @_;
- $b = ($b+$n)&63; $n = 64-$n;
- " rldicl $ra,$rs,$b,$n";
-};
-my $vmr = sub {
- my ($f,$vx,$vy) = @_;
- " vor $vx,$vy,$vy";
-};
-
-# Some ABIs specify vrsave, special-purpose register #256, as reserved
-# for system use.
-my $no_vrsave = ($flavour =~ /aix|linux64le/);
-my $mtspr = sub {
- my ($f,$idx,$ra) = @_;
- if ($idx == 256 && $no_vrsave) {
- " or $ra,$ra,$ra";
- } else {
- " mtspr $idx,$ra";
- }
-};
-my $mfspr = sub {
- my ($f,$rd,$idx) = @_;
- if ($idx == 256 && $no_vrsave) {
- " li $rd,-1";
- } else {
- " mfspr $rd,$idx";
- }
-};
-
-# PowerISA 2.06 stuff
-sub vsxmem_op {
- my ($f, $vrt, $ra, $rb, $op) = @_;
- " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
-}
-# made-up unaligned memory reference AltiVec/VMX instructions
-my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
-my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
-my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
-my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
-my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
-my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
-
-# PowerISA 2.07 stuff
-sub vcrypto_op {
- my ($f, $vrt, $vra, $vrb, $op) = @_;
- " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
-}
-my $vcipher = sub { vcrypto_op(@_, 1288); };
-my $vcipherlast = sub { vcrypto_op(@_, 1289); };
-my $vncipher = sub { vcrypto_op(@_, 1352); };
-my $vncipherlast= sub { vcrypto_op(@_, 1353); };
-my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
-my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
-my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
-my $vpmsumb = sub { vcrypto_op(@_, 1032); };
-my $vpmsumd = sub { vcrypto_op(@_, 1224); };
-my $vpmsubh = sub { vcrypto_op(@_, 1096); };
-my $vpmsumw = sub { vcrypto_op(@_, 1160); };
-my $vaddudm = sub { vcrypto_op(@_, 192); };
-
-my $mtsle = sub {
- my ($f, $arg) = @_;
- " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
-};
-
-# PowerISA 3.0 stuff
-my $maddhdu = sub {
- my ($f, $rt, $ra, $rb, $rc) = @_;
- " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|49;
-};
-my $maddld = sub {
- my ($f, $rt, $ra, $rb, $rc) = @_;
- " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|51;
-};
-
-my $darn = sub {
- my ($f, $rt, $l) = @_;
- " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1);
-};
-
-print <<___;
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if defined(__has_feature)
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-#endif
-
-#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__) && defined(__ELF__)
-___
-
-while($line=<>) {
-
- $line =~ s|[#!;].*$||; # get rid of asm-style comments...
- $line =~ s|/\*.*\*/||; # ... and C-style comments...
- $line =~ s|^\s+||; # ... and skip white spaces in beginning...
- $line =~ s|\s+$||; # ... and at the end
-
- {
- $line =~ s|\.L(\w+)|L$1|g; # common denominator for Locallabel
- $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
- }
-
- {
- $line =~ s|(^[\.\w]+)\:\s*||;
- my $label = $1;
- if ($label) {
- my $xlated = ($GLOBALS{$label} or $label);
- print "$xlated:";
- if ($flavour =~ /linux.*64le/) {
- if ($TYPES{$label} =~ /function/) {
- printf "\n.localentry %s,0\n",$xlated;
- }
- }
- }
- }
-
- {
- $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
- my $c = $1; $c = "\t" if ($c eq "");
- my $mnemonic = $2;
- my $f = $3;
- my $opcode = eval("\$$mnemonic");
- $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
- if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
- elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
- }
-
- print $line if ($line);
- print "\n";
-}
-
-print <<___;
-#endif // !OPENSSL_NO_ASM && __powerpc64__ && __ELF__
-#if defined(__ELF__)
-// See https://www.airs.com/blog/archives/518.
-.section .note.GNU-stack,"",\%progbits
-#endif
-___
-
-close STDOUT or die "error closing STDOUT: $!";
diff --git a/crypto/test/abi_test.h b/crypto/test/abi_test.h
index 1ba82b1..24340c9 100644
--- a/crypto/test/abi_test.h
+++ b/crypto/test/abi_test.h
@@ -179,78 +179,7 @@
CALLER_STATE_REGISTER(uint64_t, x28) \
CALLER_STATE_REGISTER(uint64_t, x29)
-#elif defined(OPENSSL_PPC64LE)
-
-// CRReg only compares the CR2-CR4 bits of a CR register.
-struct CRReg {
- uint32_t masked() const { return value & 0x00fff000; }
- bool operator==(CRReg r) const { return masked() == r.masked(); }
- bool operator!=(CRReg r) const { return masked() != r.masked(); }
- uint32_t value;
-};
-
-// References:
-// ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
-//
-// Note vector and floating-point registers on POWER have two different names.
-// Originally, there were 32 floating-point registers and 32 vector registers,
-// labelled f0-f31 and v0-v31 respectively. Later, VSX (Vector Scalar Extension)
-// unified them into 64 registers vs0-vs63. f0-f31 map to the lower halves of
-// vs0-vs31. v0-v31 map to vs32-vs63. The ABI was defined in terms of pre-VSX
-// names, so we use those names here. In particular, f14-f31 are
-// callee-saved, but the upper halves of vs14-vs31 are not.
-#define LOOP_CALLER_STATE_REGISTERS() \
- CALLER_STATE_REGISTER(Reg128, v20) \
- CALLER_STATE_REGISTER(Reg128, v21) \
- CALLER_STATE_REGISTER(Reg128, v22) \
- CALLER_STATE_REGISTER(Reg128, v23) \
- CALLER_STATE_REGISTER(Reg128, v24) \
- CALLER_STATE_REGISTER(Reg128, v25) \
- CALLER_STATE_REGISTER(Reg128, v26) \
- CALLER_STATE_REGISTER(Reg128, v27) \
- CALLER_STATE_REGISTER(Reg128, v28) \
- CALLER_STATE_REGISTER(Reg128, v29) \
- CALLER_STATE_REGISTER(Reg128, v30) \
- CALLER_STATE_REGISTER(Reg128, v31) \
- CALLER_STATE_REGISTER(uint64_t, r14) \
- CALLER_STATE_REGISTER(uint64_t, r15) \
- CALLER_STATE_REGISTER(uint64_t, r16) \
- CALLER_STATE_REGISTER(uint64_t, r17) \
- CALLER_STATE_REGISTER(uint64_t, r18) \
- CALLER_STATE_REGISTER(uint64_t, r19) \
- CALLER_STATE_REGISTER(uint64_t, r20) \
- CALLER_STATE_REGISTER(uint64_t, r21) \
- CALLER_STATE_REGISTER(uint64_t, r22) \
- CALLER_STATE_REGISTER(uint64_t, r23) \
- CALLER_STATE_REGISTER(uint64_t, r24) \
- CALLER_STATE_REGISTER(uint64_t, r25) \
- CALLER_STATE_REGISTER(uint64_t, r26) \
- CALLER_STATE_REGISTER(uint64_t, r27) \
- CALLER_STATE_REGISTER(uint64_t, r28) \
- CALLER_STATE_REGISTER(uint64_t, r29) \
- CALLER_STATE_REGISTER(uint64_t, r30) \
- CALLER_STATE_REGISTER(uint64_t, r31) \
- CALLER_STATE_REGISTER(uint64_t, f14) \
- CALLER_STATE_REGISTER(uint64_t, f15) \
- CALLER_STATE_REGISTER(uint64_t, f16) \
- CALLER_STATE_REGISTER(uint64_t, f17) \
- CALLER_STATE_REGISTER(uint64_t, f18) \
- CALLER_STATE_REGISTER(uint64_t, f19) \
- CALLER_STATE_REGISTER(uint64_t, f20) \
- CALLER_STATE_REGISTER(uint64_t, f21) \
- CALLER_STATE_REGISTER(uint64_t, f22) \
- CALLER_STATE_REGISTER(uint64_t, f23) \
- CALLER_STATE_REGISTER(uint64_t, f24) \
- CALLER_STATE_REGISTER(uint64_t, f25) \
- CALLER_STATE_REGISTER(uint64_t, f26) \
- CALLER_STATE_REGISTER(uint64_t, f27) \
- CALLER_STATE_REGISTER(uint64_t, f28) \
- CALLER_STATE_REGISTER(uint64_t, f29) \
- CALLER_STATE_REGISTER(uint64_t, f30) \
- CALLER_STATE_REGISTER(uint64_t, f31) \
- CALLER_STATE_REGISTER(CRReg, cr)
-
-#endif // X86_64 || X86 || ARM || AARCH64 || PPC64LE
+#endif // X86_64 || X86 || ARM || AARCH64
// Enable ABI testing if all of the following are true.
//
@@ -302,12 +231,6 @@
// on 32-bit architectures for simplicity.
static_assert(sizeof(T) == 4, "parameter types must be word-sized");
return (crypto_word_t)t;
-#elif defined(OPENSSL_PPC64LE)
- // ELFv2, section 2.2.2.3 says the parameter save area sign- or zero-extends
- // parameters passed in memory. Section 2.2.3 is unclear on how to handle
- // register parameters, but section 2.2.2.3 additionally says that the memory
- // copy of a parameter is identical to the register one.
- return (crypto_word_t)t;
#elif defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)
// AAPCS64, section 5.4.2, clauses C.7 and C.14 says any remaining bits in
// aarch are unspecified. iOS64 contradicts this and says the callee extends
@@ -362,9 +285,9 @@
template <typename R, typename... Args>
inline crypto_word_t CheckImpl(Result *out, bool unwind, R (*func)(Args...),
typename DeductionGuard<Args>::Type... args) {
- // We only support up to 8 arguments, so all arguments on aarch64 and ppc64le
- // are passed in registers. This is simpler and avoids the iOS discrepancy
- // around packing small arguments on the stack. (See the iOS64 reference.)
+ // We only support up to 8 arguments, so all arguments on aarch64 are passed
+ // in registers. This is simpler and avoids the iOS discrepancy around packing
+ // small arguments on the stack. (See the iOS64 reference.)
static_assert(sizeof...(args) <= 8,
"too many arguments for abi_test_trampoline");
diff --git a/crypto/test/asm/trampoline-ppc.pl b/crypto/test/asm/trampoline-ppc.pl
deleted file mode 100755
index b29c361..0000000
--- a/crypto/test/asm/trampoline-ppc.pl
+++ /dev/null
@@ -1,262 +0,0 @@
-#!/usr/bin/env perl
-# Copyright (c) 2019, Google Inc.
-#
-# Permission to use, copy, modify, and/or distribute this software for any
-# purpose with or without fee is hereby granted, provided that the above
-# copyright notice and this permission notice appear in all copies.
-#
-# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
-# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
-# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-# This file defines helper functions for crypto/test/abi_test.h on ppc64le. See
-# that header for details on how to use this.
-#
-# For convenience, this file is linked into libcrypto, where consuming builds
-# already support architecture-specific sources. The static linker should drop
-# this code in non-test binaries. This includes a shared library build of
-# libcrypto, provided --gc-sections or equivalent is used.
-#
-# References:
-#
-# ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
-
-use strict;
-
-my $flavour = shift;
-my $output = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/;
-my $dir = $1;
-my $xlate;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
-*STDOUT = *OUT;
-
-unless ($flavour =~ /linux.*64le/) {
- die "This file only supports the ELFv2 ABI, used by ppc64le";
-}
-
-my $code = "";
-
-sub load_or_store_regs {
- # $op is "l" or "st".
- my ($op, $base_reg, $base_offset) = @_;
- # Vector registers.
- foreach (20..31) {
- my $offset = $base_offset + ($_ - 20) * 16;
- # Vector registers only support indexed register addressing.
- $code .= "\tli\tr11, $offset\n";
- $code .= "\t${op}vx\tv$_, r11, $base_reg\n";
- }
- # Save general registers.
- foreach (14..31) {
- my $offset = $base_offset + 192 + ($_ - 14) * 8;
- $code .= "\t${op}d\tr$_, $offset($base_reg)\n";
- }
- # Save floating point registers.
- foreach (14..31) {
- my $offset = $base_offset + 336 + ($_ - 14) * 8;
- $code .= "\t${op}fd\tf$_, $offset($base_reg)\n";
- }
-}
-
-sub load_regs {
- my ($base_reg, $base_offset) = @_;
- load_or_store_regs("l", $base_reg, $base_offset);
-}
-
-sub store_regs {
- my ($base_reg, $base_offset) = @_;
- load_or_store_regs("st", $base_reg, $base_offset);
-}
-
-my ($func, $state, $argv, $argc) = ("r3", "r4", "r5", "r6");
-$code .= <<____;
-.machine "any"
-.text
-
-# abi_test_trampoline loads callee-saved registers from |state|, calls |func|
-# with |argv|, then saves the callee-saved registers into |state|. It returns
-# the result of |func|. The |unwind| argument is unused.
-# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
-# const uint64_t *argv, size_t argc,
-# uint64_t unwind);
-.globl abi_test_trampoline
-.align 5
-abi_test_trampoline:
- # LR is saved into the caller's stack frame.
- mflr r0
- std r0, 16(r1)
-
- # Allocate 66*8 = 528 bytes of stack frame. From the top of the stack
- # to the bottom, the stack frame is:
- #
- # 0(r1) - Back chain pointer
- # 8(r1) - CR save area
- # 16(r1) - LR save area (for |func|)
- # 24(r1) - TOC pointer save area
- # 32(r1) - Saved copy of |state|
- # 40(r1) - Padding
- # 48(r1) - Vector register save area (v20-v31, 12 registers)
- # 240(r1) - General register save area (r14-r31, 18 registers)
- # 384(r1) - Floating point register save area (f14-f31, 18 registers)
- #
- # Note the layouts of the register save areas and CallerState match.
- #
- # In the ELFv2 ABI, the parameter save area is optional if the function
- # is non-variadic and all parameters fit in registers. We only support
- # such functions, so we omit it to test that |func| does not rely on it.
- stdu r1, -528(r1)
-
- mfcr r0
- std r0, 8(r1) # Save CR
- std r2, 24(r1) # Save TOC
- std $state, 32(r1) # Save |state|
-____
-# Save registers to the stack.
-store_regs("r1", 48);
-# Load registers from the caller.
-load_regs($state, 0);
-$code .= <<____;
- # Load CR from |state|.
- ld r0, 480($state)
- mtcr r0
-
- # Move parameters into temporary registers so they are not clobbered.
- addi r11, $argv, -8 # Adjust for ldu below
- mr r12, $func
-
- # Load parameters into registers.
- cmpdi $argc, 0
- beq .Largs_done
- mtctr $argc
- ldu r3, 8(r11)
- bdz .Largs_done
- ldu r4, 8(r11)
- bdz .Largs_done
- ldu r5, 8(r11)
- bdz .Largs_done
- ldu r6, 8(r11)
- bdz .Largs_done
- ldu r7, 8(r11)
- bdz .Largs_done
- ldu r8, 8(r11)
- bdz .Largs_done
- ldu r9, 8(r11)
- bdz .Largs_done
- ldu r10, 8(r11)
-
-.Largs_done:
- li r2, 0 # Clear TOC to test |func|'s global entry point
- mtctr r12
- bctrl
- ld r2, 24(r1) # Restore TOC
-
- ld $state, 32(r1) # Reload |state|
-____
-# Output resulting registers to the caller.
-store_regs($state, 0);
-# Restore registers from the stack.
-load_regs("r1", 48);
-$code .= <<____;
- mfcr r0
- std r0, 480($state) # Output CR to caller
- ld r0, 8(r1)
- mtcrf 0b00111000, r0 # Restore CR2-CR4
- addi r1, r1, 528
- ld r0, 16(r1) # Restore LR
- mtlr r0
- blr
-.size abi_test_trampoline,.-abi_test_trampoline
-____
-
-# abi_test_clobber_* clobbers the corresponding register. These are used to test
-# the ABI-testing framework.
-foreach (0..31) {
- # r1 is the stack pointer. r13 is the thread pointer.
- next if ($_ == 1 || $_ == 13);
- $code .= <<____;
-.globl abi_test_clobber_r$_
-.align 5
-abi_test_clobber_r$_:
- li r$_, 0
- blr
-.size abi_test_clobber_r$_,.-abi_test_clobber_r$_
-____
-}
-
-foreach (0..31) {
- $code .= <<____;
-.globl abi_test_clobber_f$_
-.align 4
-abi_test_clobber_f$_:
- li r0, 0
- # Use the red zone.
- std r0, -8(r1)
- lfd f$_, -8(r1)
- blr
-.size abi_test_clobber_f$_,.-abi_test_clobber_f$_
-____
-}
-
-foreach (0..31) {
- $code .= <<____;
-.globl abi_test_clobber_v$_
-.align 4
-abi_test_clobber_v$_:
- vxor v$_, v$_, v$_
- blr
-.size abi_test_clobber_v$_,.-abi_test_clobber_v$_
-____
-}
-
-foreach (0..7) {
- # PPC orders CR fields in big-endian, so the mask is reversed from what one
- # would expect.
- my $mask = 1 << (7 - $_);
- $code .= <<____;
-.globl abi_test_clobber_cr$_
-.align 4
-abi_test_clobber_cr$_:
- # Flip the bits on cr$_ rather than setting to zero. With a four-bit
- # register, zeroing it will do nothing 1 in 16 times.
- mfcr r0
- not r0, r0
- mtcrf $mask, r0
- blr
-.size abi_test_clobber_cr$_,.-abi_test_clobber_cr$_
-____
-}
-
-$code .= <<____;
-.globl abi_test_clobber_ctr
-.align 4
-abi_test_clobber_ctr:
- li r0, 0
- mtctr r0
- blr
-.size abi_test_clobber_ctr,.-abi_test_clobber_ctr
-
-.globl abi_test_clobber_lr
-.align 4
-abi_test_clobber_lr:
- mflr r0
- mtctr r0
- li r0, 0
- mtlr r0
- bctr
-.size abi_test_clobber_lr,.-abi_test_clobber_lr
-
-____
-
-print $code;
-close STDOUT or die "error closing STDOUT: $!";
diff --git a/util/BUILD.toplevel b/util/BUILD.toplevel
index e0d3148..c314389 100644
--- a/util/BUILD.toplevel
+++ b/util/BUILD.toplevel
@@ -24,7 +24,6 @@
"crypto_sources_apple_x86_64",
"crypto_sources_linux_aarch64",
"crypto_sources_linux_arm",
- "crypto_sources_linux_ppc64le",
"crypto_sources_linux_x86",
"crypto_sources_linux_x86_64",
"fips_fragments",
@@ -65,14 +64,6 @@
]
]
-config_setting(
- name = "linux_ppc64le",
- constraint_values = [
- "@platforms//os:linux",
- "@platforms//cpu:ppc",
- ],
-)
-
posix_copts = [
# Assembler option --noexecstack adds .note.GNU-stack to each object to
# ensure that binaries can be built with non-executable stack.
@@ -110,7 +101,6 @@
# These selects must be kept in sync.
crypto_sources_asm = select({
- ":linux_ppc64le": crypto_sources_linux_ppc64le,
":linux_armv7": crypto_sources_linux_arm,
":linux_arm64": crypto_sources_linux_aarch64,
":linux_x86_32": crypto_sources_linux_x86,
@@ -138,7 +128,6 @@
"//conditions:default": [],
})
boringssl_copts += select({
- ":linux_ppc64le": [],
":linux_armv7": [],
":linux_arm64": [],
":linux_x86_32": [],
diff --git a/util/fipstools/delocate/delocate.peg b/util/fipstools/delocate/delocate.peg
index 0ffecea..6ec5f7a 100644
--- a/util/fipstools/delocate/delocate.peg
+++ b/util/fipstools/delocate/delocate.peg
@@ -12,7 +12,7 @@
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-# This is a rough parser for x86-64 and ppc64le assembly designed to work with
+# This is a rough parser for x86-64 and aarch64 assembly designed to work with
# https://github.com/pointlander/peg. delocate.go has a go:generate line for
# rebuilding delocate.peg.go from this file.
diff --git a/util/generate_build_files.py b/util/generate_build_files.py
index 4a93a7f..c319a55 100644
--- a/util/generate_build_files.py
+++ b/util/generate_build_files.py
@@ -35,7 +35,6 @@
('apple', 'x86_64', 'macosx', [], 'S'),
('linux', 'arm', 'linux32', [], 'S'),
('linux', 'aarch64', 'linux64', [], 'S'),
- ('linux', 'ppc64le', 'linux64le', [], 'S'),
('linux', 'x86', 'elf', ['-fPIC', '-DOPENSSL_IA32_SSE2'], 'S'),
('linux', 'x86_64', 'elf', [], 'S'),
('win', 'x86', 'win32n', ['-DOPENSSL_IA32_SSE2'], 'asm'),
@@ -142,7 +141,7 @@
if asm_outputs:
blueprint.write(' target: {\n')
for ((osname, arch), asm_files) in asm_outputs:
- if osname != 'linux' or arch == 'ppc64le':
+ if osname != 'linux':
continue
if arch == 'aarch64':
arch = 'arm64'
@@ -480,8 +479,6 @@
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "mips")
# Just to avoid the “unknown processor” error.
set(ARCH "generic")
-elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le")
- set(ARCH "ppc64le")
else()
message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
endif()